diff options
Diffstat (limited to 'sys/contrib/openzfs/module/icp')
102 files changed, 49777 insertions, 24668 deletions
diff --git a/sys/contrib/openzfs/module/icp/Makefile.in b/sys/contrib/openzfs/module/icp/Makefile.in deleted file mode 100644 index 858c5a610c26..000000000000 --- a/sys/contrib/openzfs/module/icp/Makefile.in +++ /dev/null @@ -1,101 +0,0 @@ -ifneq ($(KBUILD_EXTMOD),) -src = @abs_srcdir@ -obj = @abs_builddir@ -icp_include = $(src)/include -else -icp_include = $(srctree)/$(src)/include -endif - -MODULE := icp - -obj-$(CONFIG_ZFS) := $(MODULE).o - -asflags-y := -I$(icp_include) -ccflags-y := -I$(icp_include) - -$(MODULE)-objs += illumos-crypto.o -$(MODULE)-objs += api/kcf_cipher.o -$(MODULE)-objs += api/kcf_digest.o -$(MODULE)-objs += api/kcf_mac.o -$(MODULE)-objs += api/kcf_miscapi.o -$(MODULE)-objs += api/kcf_ctxops.o -$(MODULE)-objs += core/kcf_callprov.o -$(MODULE)-objs += core/kcf_prov_tabs.o -$(MODULE)-objs += core/kcf_sched.o -$(MODULE)-objs += core/kcf_mech_tabs.o -$(MODULE)-objs += core/kcf_prov_lib.o -$(MODULE)-objs += spi/kcf_spi.o -$(MODULE)-objs += io/aes.o -$(MODULE)-objs += io/edonr_mod.o -$(MODULE)-objs += io/sha1_mod.o -$(MODULE)-objs += io/sha2_mod.o -$(MODULE)-objs += io/skein_mod.o -$(MODULE)-objs += os/modhash.o -$(MODULE)-objs += os/modconf.o -$(MODULE)-objs += algs/modes/cbc.o -$(MODULE)-objs += algs/modes/ccm.o -$(MODULE)-objs += algs/modes/ctr.o -$(MODULE)-objs += algs/modes/ecb.o -$(MODULE)-objs += algs/modes/gcm_generic.o -$(MODULE)-objs += algs/modes/gcm.o -$(MODULE)-objs += algs/modes/modes.o -$(MODULE)-objs += algs/aes/aes_impl_generic.o -$(MODULE)-objs += algs/aes/aes_impl.o -$(MODULE)-objs += algs/aes/aes_modes.o -$(MODULE)-objs += algs/edonr/edonr.o -$(MODULE)-objs += algs/sha1/sha1.o -$(MODULE)-objs += algs/sha2/sha2.o -$(MODULE)-objs += algs/skein/skein.o -$(MODULE)-objs += algs/skein/skein_block.o -$(MODULE)-objs += algs/skein/skein_iv.o - -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aeskey.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_amd64.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/aes/aes_aesni.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/gcm_pclmulqdq.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/aesni-gcm-x86_64.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/modes/ghash-x86_64.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha1/sha1-x86_64.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha256_impl.o -$(MODULE)-$(CONFIG_X86_64) += asm-x86_64/sha2/sha512_impl.o - -$(MODULE)-$(CONFIG_X86) += algs/modes/gcm_pclmulqdq.o -$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_aesni.o -$(MODULE)-$(CONFIG_X86) += algs/aes/aes_impl_x86-64.o - -# Suppress objtool "can't find jump dest instruction at" warnings. They -# are caused by the constants which are defined in the text section of the -# assembly file using .byte instructions (e.g. bswap_mask). The objtool -# utility tries to interpret them as opcodes and obviously fails doing so. -OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y -OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y -# Suppress objtool "unsupported stack pointer realignment" warnings. We are -# not using a DRAP register while aligning the stack to a 64 byte boundary. -# See #6950 for the reasoning. -OBJECT_FILES_NON_STANDARD_sha1-x86_64.o := y -OBJECT_FILES_NON_STANDARD_sha256_impl.o := y -OBJECT_FILES_NON_STANDARD_sha512_impl.o := y - -ICP_DIRS = \ - api \ - core \ - spi \ - io \ - os \ - algs \ - algs/aes \ - algs/edonr \ - algs/modes \ - algs/sha1 \ - algs/sha2 \ - algs/skein \ - asm-x86_64 \ - asm-x86_64/aes \ - asm-x86_64/modes \ - asm-x86_64/sha1 \ - asm-x86_64/sha2 \ - asm-i386 \ - asm-generic - -all: - mkdir -p $(ICP_DIRS) diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c index 037be0db60d7..9daa975226fe 100644 --- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c +++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -47,7 +47,7 @@ aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched) union { uint64_t ka64[4]; uint32_t ka32[8]; - } keyarr; + } keyarr; switch (keyBits) { case 128: @@ -81,7 +81,7 @@ aes_init_keysched(const uint8_t *cipherKey, uint_t keyBits, void *keysched) keyarr.ka64[i] = *((uint64_t *)&cipherKey[j]); } } else { - bcopy(cipherKey, keyarr.ka32, keysize); + memcpy(keyarr.ka32, cipherKey, keysize); } } else { /* byte swap */ @@ -132,7 +132,7 @@ aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct) buffer[2] = htonl(*(uint32_t *)(void *)&pt[8]); buffer[3] = htonl(*(uint32_t *)(void *)&pt[12]); } else - bcopy(pt, &buffer, AES_BLOCK_LEN); + memcpy(&buffer, pt, AES_BLOCK_LEN); ops->encrypt(&ksch->encr_ks.ks32[0], ksch->nr, buffer, buffer); @@ -143,7 +143,7 @@ aes_encrypt_block(const void *ks, const uint8_t *pt, uint8_t *ct) *(uint32_t *)(void *)&ct[8] = htonl(buffer[2]); *(uint32_t *)(void *)&ct[12] = htonl(buffer[3]); } else - bcopy(&buffer, ct, AES_BLOCK_LEN); + memcpy(ct, &buffer, AES_BLOCK_LEN); } return (CRYPTO_SUCCESS); } @@ -179,7 +179,7 @@ aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt) buffer[2] = htonl(*(uint32_t *)(void *)&ct[8]); buffer[3] = htonl(*(uint32_t *)(void *)&ct[12]); } else - bcopy(ct, &buffer, AES_BLOCK_LEN); + memcpy(&buffer, ct, AES_BLOCK_LEN); ops->decrypt(&ksch->decr_ks.ks32[0], ksch->nr, buffer, buffer); @@ -190,7 +190,7 @@ aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt) *(uint32_t *)(void *)&pt[8] = htonl(buffer[2]); *(uint32_t *)(void *)&pt[12] = htonl(buffer[3]); } else - bcopy(&buffer, pt, AES_BLOCK_LEN); + memcpy(pt, &buffer, AES_BLOCK_LEN); } return (CRYPTO_SUCCESS); } @@ -206,13 +206,12 @@ aes_decrypt_block(const void *ks, const uint8_t *ct, uint8_t *pt) * size Size of key schedule allocated, in bytes * kmflag Flag passed to kmem_alloc(9F); ignored in userland. */ -/* ARGSUSED */ void * aes_alloc_keysched(size_t *size, int kmflag) { aes_key_t *keysched; - keysched = (aes_key_t *)kmem_alloc(sizeof (aes_key_t), kmflag); + keysched = kmem_alloc(sizeof (aes_key_t), kmflag); if (keysched != NULL) { *size = sizeof (aes_key_t); return (keysched); @@ -226,7 +225,7 @@ static aes_impl_ops_t aes_fastest_impl = { }; /* All compiled in implementations */ -const aes_impl_ops_t *aes_all_impl[] = { +static const aes_impl_ops_t *aes_all_impl[] = { &aes_generic_impl, #if defined(__x86_64) &aes_x86_64_impl, @@ -338,7 +337,7 @@ aes_impl_init(void) } static const struct { - char *name; + const char *name; uint32_t sel; } aes_impl_opts[] = { { "cycle", IMPL_CYCLE }, @@ -425,13 +424,15 @@ icp_aes_impl_get(char *buffer, zfs_kernel_param_t *kp) /* list mandatory options */ for (i = 0; i < ARRAY_SIZE(aes_impl_opts); i++) { fmt = (impl == aes_impl_opts[i].sel) ? "[%s] " : "%s "; - cnt += sprintf(buffer + cnt, fmt, aes_impl_opts[i].name); + cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, + aes_impl_opts[i].name); } /* list all supported implementations */ for (i = 0; i < aes_supp_impl_cnt; i++) { fmt = (i == impl) ? "[%s] " : "%s "; - cnt += sprintf(buffer + cnt, fmt, aes_supp_impl[i]->name); + cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, + aes_supp_impl[i]->name); } return (cnt); diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c index 4b5eefd71b17..61085214c77b 100644 --- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c +++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_aesni.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -26,15 +26,16 @@ #include <sys/simd.h> #include <sys/types.h> +#include <sys/asm_linkage.h> /* These functions are used to execute AES-NI instructions: */ -extern int rijndael_key_setup_enc_intel(uint32_t rk[], +extern ASMABI int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits); -extern int rijndael_key_setup_dec_intel(uint32_t rk[], +extern ASMABI int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits); -extern void aes_encrypt_intel(const uint32_t rk[], int Nr, +extern ASMABI void aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]); -extern void aes_decrypt_intel(const uint32_t rk[], int Nr, +extern ASMABI void aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]); diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c index 427c096c6ab3..ae13c0b85578 100644 --- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c +++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_generic.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c index 19f8fd5012cf..f4f206a00935 100644 --- a/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c +++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_impl_x86-64.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c b/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c index 9e4b498fffcb..6a25496d050e 100644 --- a/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c +++ b/sys/contrib/openzfs/module/icp/algs/aes/aes_modes.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c new file mode 100644 index 000000000000..0bab7a3a7593 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c @@ -0,0 +1,731 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor + * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#include <sys/simd.h> +#include <sys/zfs_context.h> +#include <sys/blake3.h> + +#include "blake3_impl.h" + +/* + * We need 1056 byte stack for blake3_compress_subtree_wide() + * - we define this pragma to make gcc happy + */ +#if defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wframe-larger-than=" +#endif + +/* internal used */ +typedef struct { + uint32_t input_cv[8]; + uint64_t counter; + uint8_t block[BLAKE3_BLOCK_LEN]; + uint8_t block_len; + uint8_t flags; +} output_t; + +/* internal flags */ +enum blake3_flags { + CHUNK_START = 1 << 0, + CHUNK_END = 1 << 1, + PARENT = 1 << 2, + ROOT = 1 << 3, + KEYED_HASH = 1 << 4, + DERIVE_KEY_CONTEXT = 1 << 5, + DERIVE_KEY_MATERIAL = 1 << 6, +}; + +/* internal start */ +static void chunk_state_init(blake3_chunk_state_t *ctx, + const uint32_t key[8], uint8_t flags) +{ + memcpy(ctx->cv, key, BLAKE3_KEY_LEN); + ctx->chunk_counter = 0; + memset(ctx->buf, 0, BLAKE3_BLOCK_LEN); + ctx->buf_len = 0; + ctx->blocks_compressed = 0; + ctx->flags = flags; +} + +static void chunk_state_reset(blake3_chunk_state_t *ctx, + const uint32_t key[8], uint64_t chunk_counter) +{ + memcpy(ctx->cv, key, BLAKE3_KEY_LEN); + ctx->chunk_counter = chunk_counter; + ctx->blocks_compressed = 0; + memset(ctx->buf, 0, BLAKE3_BLOCK_LEN); + ctx->buf_len = 0; +} + +static size_t chunk_state_len(const blake3_chunk_state_t *ctx) +{ + return (BLAKE3_BLOCK_LEN * (size_t)ctx->blocks_compressed) + + ((size_t)ctx->buf_len); +} + +static size_t chunk_state_fill_buf(blake3_chunk_state_t *ctx, + const uint8_t *input, size_t input_len) +{ + size_t take = BLAKE3_BLOCK_LEN - ((size_t)ctx->buf_len); + if (take > input_len) { + take = input_len; + } + uint8_t *dest = ctx->buf + ((size_t)ctx->buf_len); + memcpy(dest, input, take); + ctx->buf_len += (uint8_t)take; + return (take); +} + +static uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state_t *ctx) +{ + if (ctx->blocks_compressed == 0) { + return (CHUNK_START); + } else { + return (0); + } +} + +static output_t make_output(const uint32_t input_cv[8], + const uint8_t *block, uint8_t block_len, + uint64_t counter, uint8_t flags) +{ + output_t ret; + memcpy(ret.input_cv, input_cv, 32); + memcpy(ret.block, block, BLAKE3_BLOCK_LEN); + ret.block_len = block_len; + ret.counter = counter; + ret.flags = flags; + return (ret); +} + +/* + * Chaining values within a given chunk (specifically the compress_in_place + * interface) are represented as words. This avoids unnecessary bytes<->words + * conversion overhead in the portable implementation. However, the hash_many + * interface handles both user input and parent node blocks, so it accepts + * bytes. For that reason, chaining values in the CV stack are represented as + * bytes. + */ +static void output_chaining_value(const blake3_ops_t *ops, + const output_t *ctx, uint8_t cv[32]) +{ + uint32_t cv_words[8]; + memcpy(cv_words, ctx->input_cv, 32); + ops->compress_in_place(cv_words, ctx->block, ctx->block_len, + ctx->counter, ctx->flags); + store_cv_words(cv, cv_words); +} + +static void output_root_bytes(const blake3_ops_t *ops, const output_t *ctx, + uint64_t seek, uint8_t *out, size_t out_len) +{ + uint64_t output_block_counter = seek / 64; + size_t offset_within_block = seek % 64; + uint8_t wide_buf[64]; + while (out_len > 0) { + ops->compress_xof(ctx->input_cv, ctx->block, ctx->block_len, + output_block_counter, ctx->flags | ROOT, wide_buf); + size_t available_bytes = 64 - offset_within_block; + size_t memcpy_len; + if (out_len > available_bytes) { + memcpy_len = available_bytes; + } else { + memcpy_len = out_len; + } + memcpy(out, wide_buf + offset_within_block, memcpy_len); + out += memcpy_len; + out_len -= memcpy_len; + output_block_counter += 1; + offset_within_block = 0; + } +} + +static void chunk_state_update(const blake3_ops_t *ops, + blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len) +{ + if (ctx->buf_len > 0) { + size_t take = chunk_state_fill_buf(ctx, input, input_len); + input += take; + input_len -= take; + if (input_len > 0) { + ops->compress_in_place(ctx->cv, ctx->buf, + BLAKE3_BLOCK_LEN, ctx->chunk_counter, + ctx->flags|chunk_state_maybe_start_flag(ctx)); + ctx->blocks_compressed += 1; + ctx->buf_len = 0; + memset(ctx->buf, 0, BLAKE3_BLOCK_LEN); + } + } + + while (input_len > BLAKE3_BLOCK_LEN) { + ops->compress_in_place(ctx->cv, input, BLAKE3_BLOCK_LEN, + ctx->chunk_counter, + ctx->flags|chunk_state_maybe_start_flag(ctx)); + ctx->blocks_compressed += 1; + input += BLAKE3_BLOCK_LEN; + input_len -= BLAKE3_BLOCK_LEN; + } + + chunk_state_fill_buf(ctx, input, input_len); +} + +static output_t chunk_state_output(const blake3_chunk_state_t *ctx) +{ + uint8_t block_flags = + ctx->flags | chunk_state_maybe_start_flag(ctx) | CHUNK_END; + return (make_output(ctx->cv, ctx->buf, ctx->buf_len, ctx->chunk_counter, + block_flags)); +} + +static output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN], + const uint32_t key[8], uint8_t flags) +{ + return (make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT)); +} + +/* + * Given some input larger than one chunk, return the number of bytes that + * should go in the left subtree. This is the largest power-of-2 number of + * chunks that leaves at least 1 byte for the right subtree. + */ +static size_t left_len(size_t content_len) +{ + /* + * Subtract 1 to reserve at least one byte for the right side. + * content_len + * should always be greater than BLAKE3_CHUNK_LEN. + */ + size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN; + return (round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN); +} + +/* + * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time + * on a single thread. Write out the chunk chaining values and return the + * number of chunks hashed. These chunks are never the root and never empty; + * those cases use a different codepath. + */ +static size_t compress_chunks_parallel(const blake3_ops_t *ops, + const uint8_t *input, size_t input_len, const uint32_t key[8], + uint64_t chunk_counter, uint8_t flags, uint8_t *out) +{ + const uint8_t *chunks_array[MAX_SIMD_DEGREE]; + size_t input_position = 0; + size_t chunks_array_len = 0; + while (input_len - input_position >= BLAKE3_CHUNK_LEN) { + chunks_array[chunks_array_len] = &input[input_position]; + input_position += BLAKE3_CHUNK_LEN; + chunks_array_len += 1; + } + + ops->hash_many(chunks_array, chunks_array_len, BLAKE3_CHUNK_LEN / + BLAKE3_BLOCK_LEN, key, chunk_counter, B_TRUE, flags, CHUNK_START, + CHUNK_END, out); + + /* + * Hash the remaining partial chunk, if there is one. Note that the + * empty chunk (meaning the empty message) is a different codepath. + */ + if (input_len > input_position) { + uint64_t counter = chunk_counter + (uint64_t)chunks_array_len; + blake3_chunk_state_t chunk_state; + chunk_state_init(&chunk_state, key, flags); + chunk_state.chunk_counter = counter; + chunk_state_update(ops, &chunk_state, &input[input_position], + input_len - input_position); + output_t output = chunk_state_output(&chunk_state); + output_chaining_value(ops, &output, &out[chunks_array_len * + BLAKE3_OUT_LEN]); + return (chunks_array_len + 1); + } else { + return (chunks_array_len); + } +} + +/* + * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time + * on a single thread. Write out the parent chaining values and return the + * number of parents hashed. (If there's an odd input chaining value left over, + * return it as an additional output.) These parents are never the root and + * never empty; those cases use a different codepath. + */ +static size_t compress_parents_parallel(const blake3_ops_t *ops, + const uint8_t *child_chaining_values, size_t num_chaining_values, + const uint32_t key[8], uint8_t flags, uint8_t *out) +{ + const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2] = {0}; + size_t parents_array_len = 0; + + while (num_chaining_values - (2 * parents_array_len) >= 2) { + parents_array[parents_array_len] = &child_chaining_values[2 * + parents_array_len * BLAKE3_OUT_LEN]; + parents_array_len += 1; + } + + ops->hash_many(parents_array, parents_array_len, 1, key, 0, B_FALSE, + flags | PARENT, 0, 0, out); + + /* If there's an odd child left over, it becomes an output. */ + if (num_chaining_values > 2 * parents_array_len) { + memcpy(&out[parents_array_len * BLAKE3_OUT_LEN], + &child_chaining_values[2 * parents_array_len * + BLAKE3_OUT_LEN], BLAKE3_OUT_LEN); + return (parents_array_len + 1); + } else { + return (parents_array_len); + } +} + +/* + * The wide helper function returns (writes out) an array of chaining values + * and returns the length of that array. The number of chaining values returned + * is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer, + * if the input is shorter than that many chunks. The reason for maintaining a + * wide array of chaining values going back up the tree, is to allow the + * implementation to hash as many parents in parallel as possible. + * + * As a special case when the SIMD degree is 1, this function will still return + * at least 2 outputs. This guarantees that this function doesn't perform the + * root compression. (If it did, it would use the wrong flags, and also we + * wouldn't be able to implement exendable ouput.) Note that this function is + * not used when the whole input is only 1 chunk long; that's a different + * codepath. + * + * Why not just have the caller split the input on the first update(), instead + * of implementing this special rule? Because we don't want to limit SIMD or + * multi-threading parallelism for that update(). + */ +static size_t blake3_compress_subtree_wide(const blake3_ops_t *ops, + const uint8_t *input, size_t input_len, const uint32_t key[8], + uint64_t chunk_counter, uint8_t flags, uint8_t *out) +{ + /* + * Note that the single chunk case does *not* bump the SIMD degree up + * to 2 when it is 1. If this implementation adds multi-threading in + * the future, this gives us the option of multi-threading even the + * 2-chunk case, which can help performance on smaller platforms. + */ + if (input_len <= (size_t)(ops->degree * BLAKE3_CHUNK_LEN)) { + return (compress_chunks_parallel(ops, input, input_len, key, + chunk_counter, flags, out)); + } + + + /* + * With more than simd_degree chunks, we need to recurse. Start by + * dividing the input into left and right subtrees. (Note that this is + * only optimal as long as the SIMD degree is a power of 2. If we ever + * get a SIMD degree of 3 or something, we'll need a more complicated + * strategy.) + */ + size_t left_input_len = left_len(input_len); + size_t right_input_len = input_len - left_input_len; + const uint8_t *right_input = &input[left_input_len]; + uint64_t right_chunk_counter = chunk_counter + + (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN); + + /* + * Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2 + * to account for the special case of returning 2 outputs when the + * SIMD degree is 1. + */ + uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t degree = ops->degree; + if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) { + + /* + * The special case: We always use a degree of at least two, + * to make sure there are two outputs. Except, as noted above, + * at the chunk level, where we allow degree=1. (Note that the + * 1-chunk-input case is a different codepath.) + */ + degree = 2; + } + uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN]; + + /* + * Recurse! If this implementation adds multi-threading support in the + * future, this is where it will go. + */ + size_t left_n = blake3_compress_subtree_wide(ops, input, left_input_len, + key, chunk_counter, flags, cv_array); + size_t right_n = blake3_compress_subtree_wide(ops, right_input, + right_input_len, key, right_chunk_counter, flags, right_cvs); + + /* + * The special case again. If simd_degree=1, then we'll have left_n=1 + * and right_n=1. Rather than compressing them into a single output, + * return them directly, to make sure we always have at least two + * outputs. + */ + if (left_n == 1) { + memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); + return (2); + } + + /* Otherwise, do one layer of parent node compression. */ + size_t num_chaining_values = left_n + right_n; + return compress_parents_parallel(ops, cv_array, + num_chaining_values, key, flags, out); +} + +/* + * Hash a subtree with compress_subtree_wide(), and then condense the resulting + * list of chaining values down to a single parent node. Don't compress that + * last parent node, however. Instead, return its message bytes (the + * concatenated chaining values of its children). This is necessary when the + * first call to update() supplies a complete subtree, because the topmost + * parent node of that subtree could end up being the root. It's also necessary + * for extended output in the general case. + * + * As with compress_subtree_wide(), this function is not used on inputs of 1 + * chunk or less. That's a different codepath. + */ +static void compress_subtree_to_parent_node(const blake3_ops_t *ops, + const uint8_t *input, size_t input_len, const uint32_t key[8], + uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN]) +{ + uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN]; + size_t num_cvs = blake3_compress_subtree_wide(ops, input, input_len, + key, chunk_counter, flags, cv_array); + + /* + * If MAX_SIMD_DEGREE is greater than 2 and there's enough input, + * compress_subtree_wide() returns more than 2 chaining values. Condense + * them into 2 by forming parent nodes repeatedly. + */ + uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2]; + while (num_cvs > 2) { + num_cvs = compress_parents_parallel(ops, cv_array, num_cvs, key, + flags, out_array); + memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN); + } + memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN); +} + +static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8], + uint8_t flags) +{ + memcpy(ctx->key, key, BLAKE3_KEY_LEN); + chunk_state_init(&ctx->chunk, key, flags); + ctx->cv_stack_len = 0; + ctx->ops = blake3_get_ops(); +} + +/* + * As described in hasher_push_cv() below, we do "lazy merging", delaying + * merges until right before the next CV is about to be added. This is + * different from the reference implementation. Another difference is that we + * aren't always merging 1 chunk at a time. Instead, each CV might represent + * any power-of-two number of chunks, as long as the smaller-above-larger + * stack order is maintained. Instead of the "count the trailing 0-bits" + * algorithm described in the spec, we use a "count the total number of + * 1-bits" variant that doesn't require us to retain the subtree size of the + * CV on top of the stack. The principle is the same: each CV that should + * remain in the stack is represented by a 1-bit in the total number of chunks + * (or bytes) so far. + */ +static void hasher_merge_cv_stack(BLAKE3_CTX *ctx, uint64_t total_len) +{ + size_t post_merge_stack_len = (size_t)popcnt(total_len); + while (ctx->cv_stack_len > post_merge_stack_len) { + uint8_t *parent_node = + &ctx->cv_stack[(ctx->cv_stack_len - 2) * BLAKE3_OUT_LEN]; + output_t output = + parent_output(parent_node, ctx->key, ctx->chunk.flags); + output_chaining_value(ctx->ops, &output, parent_node); + ctx->cv_stack_len -= 1; + } +} + +/* + * In reference_impl.rs, we merge the new CV with existing CVs from the stack + * before pushing it. We can do that because we know more input is coming, so + * we know none of the merges are root. + * + * This setting is different. We want to feed as much input as possible to + * compress_subtree_wide(), without setting aside anything for the chunk_state. + * If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once + * as a single subtree, if at all possible. + * + * This leads to two problems: + * 1) This 64 KiB input might be the only call that ever gets made to update. + * In this case, the root node of the 64 KiB subtree would be the root node + * of the whole tree, and it would need to be ROOT finalized. We can't + * compress it until we know. + * 2) This 64 KiB input might complete a larger tree, whose root node is + * similarly going to be the the root of the whole tree. For example, maybe + * we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the + * node at the root of the 256 KiB subtree until we know how to finalize it. + * + * The second problem is solved with "lazy merging". That is, when we're about + * to add a CV to the stack, we don't merge it with anything first, as the + * reference impl does. Instead we do merges using the *previous* CV that was + * added, which is sitting on top of the stack, and we put the new CV + * (unmerged) on top of the stack afterwards. This guarantees that we never + * merge the root node until finalize(). + * + * Solving the first problem requires an additional tool, + * compress_subtree_to_parent_node(). That function always returns the top + * *two* chaining values of the subtree it's compressing. We then do lazy + * merging with each of them separately, so that the second CV will always + * remain unmerged. (That also helps us support extendable output when we're + * hashing an input all-at-once.) + */ +static void hasher_push_cv(BLAKE3_CTX *ctx, uint8_t new_cv[BLAKE3_OUT_LEN], + uint64_t chunk_counter) +{ + hasher_merge_cv_stack(ctx, chunk_counter); + memcpy(&ctx->cv_stack[ctx->cv_stack_len * BLAKE3_OUT_LEN], new_cv, + BLAKE3_OUT_LEN); + ctx->cv_stack_len += 1; +} + +void +Blake3_Init(BLAKE3_CTX *ctx) +{ + hasher_init_base(ctx, BLAKE3_IV, 0); +} + +void +Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN]) +{ + uint32_t key_words[8]; + load_key_words(key, key_words); + hasher_init_base(ctx, key_words, KEYED_HASH); +} + +static void +Blake3_Update2(BLAKE3_CTX *ctx, const void *input, size_t input_len) +{ + /* + * Explicitly checking for zero avoids causing UB by passing a null + * pointer to memcpy. This comes up in practice with things like: + * std::vector<uint8_t> v; + * blake3_hasher_update(&hasher, v.data(), v.size()); + */ + if (input_len == 0) { + return; + } + + const uint8_t *input_bytes = (const uint8_t *)input; + + /* + * If we have some partial chunk bytes in the internal chunk_state, we + * need to finish that chunk first. + */ + if (chunk_state_len(&ctx->chunk) > 0) { + size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&ctx->chunk); + if (take > input_len) { + take = input_len; + } + chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, take); + input_bytes += take; + input_len -= take; + /* + * If we've filled the current chunk and there's more coming, + * finalize this chunk and proceed. In this case we know it's + * not the root. + */ + if (input_len > 0) { + output_t output = chunk_state_output(&ctx->chunk); + uint8_t chunk_cv[32]; + output_chaining_value(ctx->ops, &output, chunk_cv); + hasher_push_cv(ctx, chunk_cv, ctx->chunk.chunk_counter); + chunk_state_reset(&ctx->chunk, ctx->key, + ctx->chunk.chunk_counter + 1); + } else { + return; + } + } + + /* + * Now the chunk_state is clear, and we have more input. If there's + * more than a single chunk (so, definitely not the root chunk), hash + * the largest whole subtree we can, with the full benefits of SIMD + * (and maybe in the future, multi-threading) parallelism. Two + * restrictions: + * - The subtree has to be a power-of-2 number of chunks. Only + * subtrees along the right edge can be incomplete, and we don't know + * where the right edge is going to be until we get to finalize(). + * - The subtree must evenly divide the total number of chunks up + * until this point (if total is not 0). If the current incomplete + * subtree is only waiting for 1 more chunk, we can't hash a subtree + * of 4 chunks. We have to complete the current subtree first. + * Because we might need to break up the input to form powers of 2, or + * to evenly divide what we already have, this part runs in a loop. + */ + while (input_len > BLAKE3_CHUNK_LEN) { + size_t subtree_len = round_down_to_power_of_2(input_len); + uint64_t count_so_far = + ctx->chunk.chunk_counter * BLAKE3_CHUNK_LEN; + /* + * Shrink the subtree_len until it evenly divides the count so + * far. We know that subtree_len itself is a power of 2, so we + * can use a bitmasking trick instead of an actual remainder + * operation. (Note that if the caller consistently passes + * power-of-2 inputs of the same size, as is hopefully + * typical, this loop condition will always fail, and + * subtree_len will always be the full length of the input.) + * + * An aside: We don't have to shrink subtree_len quite this + * much. For example, if count_so_far is 1, we could pass 2 + * chunks to compress_subtree_to_parent_node. Since we'll get + * 2 CVs back, we'll still get the right answer in the end, + * and we might get to use 2-way SIMD parallelism. The problem + * with this optimization, is that it gets us stuck always + * hashing 2 chunks. The total number of chunks will remain + * odd, and we'll never graduate to higher degrees of + * parallelism. See + * https://github.com/BLAKE3-team/BLAKE3/issues/69. + */ + while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) { + subtree_len /= 2; + } + /* + * The shrunken subtree_len might now be 1 chunk long. If so, + * hash that one chunk by itself. Otherwise, compress the + * subtree into a pair of CVs. + */ + uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN; + if (subtree_len <= BLAKE3_CHUNK_LEN) { + blake3_chunk_state_t chunk_state; + chunk_state_init(&chunk_state, ctx->key, + ctx->chunk.flags); + chunk_state.chunk_counter = ctx->chunk.chunk_counter; + chunk_state_update(ctx->ops, &chunk_state, input_bytes, + subtree_len); + output_t output = chunk_state_output(&chunk_state); + uint8_t cv[BLAKE3_OUT_LEN]; + output_chaining_value(ctx->ops, &output, cv); + hasher_push_cv(ctx, cv, chunk_state.chunk_counter); + } else { + /* + * This is the high-performance happy path, though + * getting here depends on the caller giving us a long + * enough input. + */ + uint8_t cv_pair[2 * BLAKE3_OUT_LEN]; + compress_subtree_to_parent_node(ctx->ops, input_bytes, + subtree_len, ctx->key, ctx-> chunk.chunk_counter, + ctx->chunk.flags, cv_pair); + hasher_push_cv(ctx, cv_pair, ctx->chunk.chunk_counter); + hasher_push_cv(ctx, &cv_pair[BLAKE3_OUT_LEN], + ctx->chunk.chunk_counter + (subtree_chunks / 2)); + } + ctx->chunk.chunk_counter += subtree_chunks; + input_bytes += subtree_len; + input_len -= subtree_len; + } + + /* + * If there's any remaining input less than a full chunk, add it to + * the chunk state. In that case, also do a final merge loop to make + * sure the subtree stack doesn't contain any unmerged pairs. The + * remaining input means we know these merges are non-root. This merge + * loop isn't strictly necessary here, because hasher_push_chunk_cv + * already does its own merge loop, but it simplifies + * blake3_hasher_finalize below. + */ + if (input_len > 0) { + chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, + input_len); + hasher_merge_cv_stack(ctx, ctx->chunk.chunk_counter); + } +} + +void +Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t todo) +{ + size_t done = 0; + const uint8_t *data = input; + const size_t block_max = 1024 * 64; + + /* max feed buffer to leave the stack size small */ + while (todo != 0) { + size_t block = (todo >= block_max) ? block_max : todo; + Blake3_Update2(ctx, data + done, block); + done += block; + todo -= block; + } +} + +void +Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out) +{ + Blake3_FinalSeek(ctx, 0, out, BLAKE3_OUT_LEN); +} + +void +Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out, + size_t out_len) +{ + /* + * Explicitly checking for zero avoids causing UB by passing a null + * pointer to memcpy. This comes up in practice with things like: + * std::vector<uint8_t> v; + * blake3_hasher_finalize(&hasher, v.data(), v.size()); + */ + if (out_len == 0) { + return; + } + /* If the subtree stack is empty, then the current chunk is the root. */ + if (ctx->cv_stack_len == 0) { + output_t output = chunk_state_output(&ctx->chunk); + output_root_bytes(ctx->ops, &output, seek, out, out_len); + return; + } + /* + * If there are any bytes in the chunk state, finalize that chunk and + * do a roll-up merge between that chunk hash and every subtree in the + * stack. In this case, the extra merge loop at the end of + * blake3_hasher_update guarantees that none of the subtrees in the + * stack need to be merged with each other first. Otherwise, if there + * are no bytes in the chunk state, then the top of the stack is a + * chunk hash, and we start the merge from that. + */ + output_t output; + size_t cvs_remaining; + if (chunk_state_len(&ctx->chunk) > 0) { + cvs_remaining = ctx->cv_stack_len; + output = chunk_state_output(&ctx->chunk); + } else { + /* There are always at least 2 CVs in the stack in this case. */ + cvs_remaining = ctx->cv_stack_len - 2; + output = parent_output(&ctx->cv_stack[cvs_remaining * 32], + ctx->key, ctx->chunk.flags); + } + while (cvs_remaining > 0) { + cvs_remaining -= 1; + uint8_t parent_block[BLAKE3_BLOCK_LEN]; + memcpy(parent_block, &ctx->cv_stack[cvs_remaining * 32], 32); + output_chaining_value(ctx->ops, &output, &parent_block[32]); + output = parent_output(parent_block, ctx->key, + ctx->chunk.flags); + } + output_root_bytes(ctx->ops, &output, seek, out, out_len); +} diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c new file mode 100644 index 000000000000..fbe184969672 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c @@ -0,0 +1,204 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor + * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#include <sys/simd.h> +#include <sys/zfs_context.h> +#include "blake3_impl.h" + +#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) +static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d, + uint32_t x, uint32_t y) +{ + state[a] = state[a] + state[b] + x; + state[d] = rotr32(state[d] ^ state[a], 16); + state[c] = state[c] + state[d]; + state[b] = rotr32(state[b] ^ state[c], 12); + state[a] = state[a] + state[b] + y; + state[d] = rotr32(state[d] ^ state[a], 8); + state[c] = state[c] + state[d]; + state[b] = rotr32(state[b] ^ state[c], 7); +} + +static inline void round_fn(uint32_t state[16], const uint32_t *msg, + size_t round) +{ + /* Select the message schedule based on the round. */ + const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round]; + + /* Mix the columns. */ + g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]); + g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]); + g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]); + g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]); + + /* Mix the rows. */ + g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]); + g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]); + g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]); + g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]); +} + +static inline void compress_pre(uint32_t state[16], const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) +{ + uint32_t block_words[16]; + block_words[0] = load32(block + 4 * 0); + block_words[1] = load32(block + 4 * 1); + block_words[2] = load32(block + 4 * 2); + block_words[3] = load32(block + 4 * 3); + block_words[4] = load32(block + 4 * 4); + block_words[5] = load32(block + 4 * 5); + block_words[6] = load32(block + 4 * 6); + block_words[7] = load32(block + 4 * 7); + block_words[8] = load32(block + 4 * 8); + block_words[9] = load32(block + 4 * 9); + block_words[10] = load32(block + 4 * 10); + block_words[11] = load32(block + 4 * 11); + block_words[12] = load32(block + 4 * 12); + block_words[13] = load32(block + 4 * 13); + block_words[14] = load32(block + 4 * 14); + block_words[15] = load32(block + 4 * 15); + + state[0] = cv[0]; + state[1] = cv[1]; + state[2] = cv[2]; + state[3] = cv[3]; + state[4] = cv[4]; + state[5] = cv[5]; + state[6] = cv[6]; + state[7] = cv[7]; + state[8] = BLAKE3_IV[0]; + state[9] = BLAKE3_IV[1]; + state[10] = BLAKE3_IV[2]; + state[11] = BLAKE3_IV[3]; + state[12] = counter_low(counter); + state[13] = counter_high(counter); + state[14] = (uint32_t)block_len; + state[15] = (uint32_t)flags; + + round_fn(state, &block_words[0], 0); + round_fn(state, &block_words[0], 1); + round_fn(state, &block_words[0], 2); + round_fn(state, &block_words[0], 3); + round_fn(state, &block_words[0], 4); + round_fn(state, &block_words[0], 5); + round_fn(state, &block_words[0], 6); +} + +static inline void blake3_compress_in_place_generic(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) +{ + uint32_t state[16]; + compress_pre(state, cv, block, block_len, counter, flags); + cv[0] = state[0] ^ state[8]; + cv[1] = state[1] ^ state[9]; + cv[2] = state[2] ^ state[10]; + cv[3] = state[3] ^ state[11]; + cv[4] = state[4] ^ state[12]; + cv[5] = state[5] ^ state[13]; + cv[6] = state[6] ^ state[14]; + cv[7] = state[7] ^ state[15]; +} + +static inline void hash_one_generic(const uint8_t *input, size_t blocks, + const uint32_t key[8], uint64_t counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) +{ + uint32_t cv[8]; + memcpy(cv, key, BLAKE3_KEY_LEN); + uint8_t block_flags = flags | flags_start; + while (blocks > 0) { + if (blocks == 1) { + block_flags |= flags_end; + } + blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN, + counter, block_flags); + input = &input[BLAKE3_BLOCK_LEN]; + blocks -= 1; + block_flags = flags; + } + store_cv_words(out, cv); +} + +static inline void blake3_compress_xof_generic(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]) +{ + uint32_t state[16]; + compress_pre(state, cv, block, block_len, counter, flags); + + store32(&out[0 * 4], state[0] ^ state[8]); + store32(&out[1 * 4], state[1] ^ state[9]); + store32(&out[2 * 4], state[2] ^ state[10]); + store32(&out[3 * 4], state[3] ^ state[11]); + store32(&out[4 * 4], state[4] ^ state[12]); + store32(&out[5 * 4], state[5] ^ state[13]); + store32(&out[6 * 4], state[6] ^ state[14]); + store32(&out[7 * 4], state[7] ^ state[15]); + store32(&out[8 * 4], state[8] ^ cv[0]); + store32(&out[9 * 4], state[9] ^ cv[1]); + store32(&out[10 * 4], state[10] ^ cv[2]); + store32(&out[11 * 4], state[11] ^ cv[3]); + store32(&out[12 * 4], state[12] ^ cv[4]); + store32(&out[13 * 4], state[13] ^ cv[5]); + store32(&out[14 * 4], state[14] ^ cv[6]); + store32(&out[15 * 4], state[15] ^ cv[7]); +} + +static inline void blake3_hash_many_generic(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, + boolean_t increment_counter, uint8_t flags, uint8_t flags_start, + uint8_t flags_end, uint8_t *out) +{ + while (num_inputs > 0) { + hash_one_generic(inputs[0], blocks, key, counter, flags, + flags_start, flags_end, out); + if (increment_counter) { + counter += 1; + } + inputs += 1; + num_inputs -= 1; + out = &out[BLAKE3_OUT_LEN]; + } +} + +/* the generic implementation is always okay */ +static boolean_t blake3_is_supported(void) +{ + return (B_TRUE); +} + +const blake3_ops_t blake3_generic_impl = { + .compress_in_place = blake3_compress_in_place_generic, + .compress_xof = blake3_compress_xof_generic, + .hash_many = blake3_hash_many_generic, + .is_supported = blake3_is_supported, + .degree = 4, + .name = "generic" +}; diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c new file mode 100644 index 000000000000..5684b4ff1a97 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c @@ -0,0 +1,407 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#include <sys/simd.h> +#include <sys/zfs_context.h> +#include <sys/zfs_impl.h> +#include <sys/blake3.h> + +#include "blake3_impl.h" + +#if !defined(OMIT_SIMD) && (defined(__aarch64__) || \ + (defined(__x86_64) && defined(HAVE_SSE2)) || \ + (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))) +#define USE_SIMD +#endif + +#ifdef USE_SIMD +extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags); + +extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]); + +extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_compress_in_place_sse2(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) { + kfpu_begin(); + zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter, + flags); + kfpu_end(); +} + +static void blake3_compress_xof_sse2(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]) { + kfpu_begin(); + zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags, + out); + kfpu_end(); +} + +static void blake3_hash_many_sse2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); +} + +static boolean_t blake3_is_sse2_supported(void) +{ +#if defined(__x86_64) + return (kfpu_allowed() && zfs_sse2_available()); +#elif defined(__PPC64__) + return (kfpu_allowed() && zfs_vsx_available()); +#else + return (kfpu_allowed()); +#endif +} + +const blake3_ops_t blake3_sse2_impl = { + .compress_in_place = blake3_compress_in_place_sse2, + .compress_xof = blake3_compress_xof_sse2, + .hash_many = blake3_hash_many_sse2, + .is_supported = blake3_is_sse2_supported, + .degree = 4, + .name = "sse2" +}; +#endif + +#ifdef USE_SIMD + +extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags); + +extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]); + +extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_compress_in_place_sse41(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) { + kfpu_begin(); + zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter, + flags); + kfpu_end(); +} + +static void blake3_compress_xof_sse41(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]) { + kfpu_begin(); + zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags, + out); + kfpu_end(); +} + +static void blake3_hash_many_sse41(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); +} + +static boolean_t blake3_is_sse41_supported(void) +{ +#if defined(__x86_64) + return (kfpu_allowed() && zfs_sse4_1_available()); +#elif defined(__PPC64__) + return (kfpu_allowed() && zfs_vsx_available()); +#else + return (kfpu_allowed()); +#endif +} + +const blake3_ops_t blake3_sse41_impl = { + .compress_in_place = blake3_compress_in_place_sse41, + .compress_xof = blake3_compress_xof_sse41, + .hash_many = blake3_hash_many_sse41, + .is_supported = blake3_is_sse41_supported, + .degree = 4, + .name = "sse41" +}; +#endif + +#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) +extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_hash_many_avx2(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); +} + +static boolean_t blake3_is_avx2_supported(void) +{ + return (kfpu_allowed() && zfs_sse4_1_available() && + zfs_avx2_available()); +} + +const blake3_ops_t +blake3_avx2_impl = { + .compress_in_place = blake3_compress_in_place_sse41, + .compress_xof = blake3_compress_xof_sse41, + .hash_many = blake3_hash_many_avx2, + .is_supported = blake3_is_avx2_supported, + .degree = 8, + .name = "avx2" +}; +#endif + +#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) +extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags); + +extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]); + +extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +static void blake3_compress_in_place_avx512(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags) { + kfpu_begin(); + zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter, + flags); + kfpu_end(); +} + +static void blake3_compress_xof_avx512(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]) { + kfpu_begin(); + zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags, + out); + kfpu_end(); +} + +static void blake3_hash_many_avx512(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out) { + kfpu_begin(); + zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter, + increment_counter, flags, flags_start, flags_end, out); + kfpu_end(); +} + +static boolean_t blake3_is_avx512_supported(void) +{ + return (kfpu_allowed() && zfs_avx512f_available() && + zfs_avx512vl_available()); +} + +const blake3_ops_t blake3_avx512_impl = { + .compress_in_place = blake3_compress_in_place_avx512, + .compress_xof = blake3_compress_xof_avx512, + .hash_many = blake3_hash_many_avx512, + .is_supported = blake3_is_avx512_supported, + .degree = 16, + .name = "avx512" +}; +#endif + +extern const blake3_ops_t blake3_generic_impl; + +static const blake3_ops_t *const blake3_impls[] = { + &blake3_generic_impl, +#ifdef USE_SIMD +#if defined(__aarch64__) || \ + (defined(__x86_64) && defined(HAVE_SSE2)) || \ + (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + &blake3_sse2_impl, +#endif +#if defined(__aarch64__) || \ + (defined(__x86_64) && defined(HAVE_SSE4_1)) || \ + (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + &blake3_sse41_impl, +#endif +#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2) + &blake3_avx2_impl, +#endif +#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) + &blake3_avx512_impl, +#endif +#endif +}; + +/* use the generic implementation functions */ +#define IMPL_NAME "blake3" +#define IMPL_OPS_T blake3_ops_t +#define IMPL_ARRAY blake3_impls +#define IMPL_GET_OPS blake3_get_ops +#define ZFS_IMPL_OPS zfs_blake3_ops +#include <generic_impl.c> + +#ifdef _KERNEL +void **blake3_per_cpu_ctx; + +void +blake3_per_cpu_ctx_init(void) +{ + /* + * Create "The Godfather" ptr to hold all blake3 ctx + */ + blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP); + for (int i = 0; i < max_ncpus; i++) { + blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX), + KM_SLEEP); + } +} + +void +blake3_per_cpu_ctx_fini(void) +{ + for (int i = 0; i < max_ncpus; i++) { + memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX)); + kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX)); + } + memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *)); + kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *)); +} + +#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") + +#if defined(__linux__) + +static int +blake3_param_get(char *buffer, zfs_kernel_param_t *unused) +{ + const uint32_t impl = IMPL_READ(generic_impl_chosen); + char *fmt; + int cnt = 0; + + /* cycling */ + fmt = IMPL_FMT(impl, IMPL_CYCLE); + cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "cycle"); + + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest"); + + /* list all supported implementations */ + generic_impl_init(); + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, + blake3_impls[i]->name); + } + + return (cnt); +} + +static int +blake3_param_set(const char *val, zfs_kernel_param_t *unused) +{ + (void) unused; + return (generic_impl_setname(val)); +} + +#elif defined(__FreeBSD__) + +#include <sys/sbuf.h> + +static int +blake3_param(ZFS_MODULE_PARAM_ARGS) +{ + int err; + + generic_impl_init(); + if (req->newptr == NULL) { + const uint32_t impl = IMPL_READ(generic_impl_chosen); + const int init_buflen = 64; + const char *fmt; + struct sbuf *s; + + s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); + + /* cycling */ + fmt = IMPL_FMT(impl, IMPL_CYCLE); + (void) sbuf_printf(s, fmt, "cycle"); + + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + (void) sbuf_printf(s, fmt, "fastest"); + + /* list all supported implementations */ + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name); + } + + err = sbuf_finish(s); + sbuf_delete(s); + + return (err); + } + + char buf[16]; + + err = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (err) { + return (err); + } + + return (-generic_impl_setname(buf)); +} +#endif + +#undef IMPL_FMT + +ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl, + blake3_param_set, blake3_param_get, ZMOD_RW, \ + "Select BLAKE3 implementation."); +#endif diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h new file mode 100644 index 000000000000..90d508fac08f --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h @@ -0,0 +1,191 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor + * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#ifndef BLAKE3_IMPL_H +#define BLAKE3_IMPL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include <sys/blake3.h> +#include <sys/simd.h> +#include <sys/asm_linkage.h> + +/* + * Methods used to define BLAKE3 assembler implementations + */ +typedef void (*blake3_compress_in_place_f)(uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], + uint8_t block_len, uint64_t counter, + uint8_t flags); + +typedef void (*blake3_compress_xof_f)(const uint32_t cv[8], + const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, + uint64_t counter, uint8_t flags, uint8_t out[64]); + +typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs, + size_t num_inputs, size_t blocks, const uint32_t key[8], + uint64_t counter, boolean_t increment_counter, uint8_t flags, + uint8_t flags_start, uint8_t flags_end, uint8_t *out); + +typedef boolean_t (*blake3_is_supported_f)(void); + +typedef struct { + blake3_compress_in_place_f compress_in_place; + blake3_compress_xof_f compress_xof; + blake3_hash_many_f hash_many; + blake3_is_supported_f is_supported; + int degree; + const char *name; +} blake3_ops_t; + +/* return selected BLAKE3 implementation ops */ +extern const blake3_ops_t *blake3_get_ops(void); + +#if defined(__x86_64) +#define MAX_SIMD_DEGREE 16 +#else +#define MAX_SIMD_DEGREE 4 +#endif + +#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2) + +static const uint32_t BLAKE3_IV[8] = { + 0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL, + 0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL}; + +static const uint8_t BLAKE3_MSG_SCHEDULE[7][16] = { + {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15}, + {2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8}, + {3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1}, + {10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6}, + {12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4}, + {9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7}, + {11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13}, +}; + +/* Find index of the highest set bit */ +static inline unsigned int highest_one(uint64_t x) { +#if defined(__GNUC__) || defined(__clang__) + return (63 ^ __builtin_clzll(x)); +#elif defined(_MSC_VER) && defined(IS_X86_64) + unsigned long index; + _BitScanReverse64(&index, x); + return (index); +#elif defined(_MSC_VER) && defined(IS_X86_32) + if (x >> 32) { + unsigned long index; + _BitScanReverse(&index, x >> 32); + return (32 + index); + } else { + unsigned long index; + _BitScanReverse(&index, x); + return (index); + } +#else + unsigned int c = 0; + if (x & 0xffffffff00000000ULL) { x >>= 32; c += 32; } + if (x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; } + if (x & 0x000000000000ff00ULL) { x >>= 8; c += 8; } + if (x & 0x00000000000000f0ULL) { x >>= 4; c += 4; } + if (x & 0x000000000000000cULL) { x >>= 2; c += 2; } + if (x & 0x0000000000000002ULL) { c += 1; } + return (c); +#endif +} + +/* Count the number of 1 bits. */ +static inline unsigned int popcnt(uint64_t x) { + unsigned int count = 0; + + while (x != 0) { + count += 1; + x &= x - 1; + } + + return (count); +} + +/* + * Largest power of two less than or equal to x. + * As a special case, returns 1 when x is 0. + */ +static inline uint64_t round_down_to_power_of_2(uint64_t x) { + return (1ULL << highest_one(x | 1)); +} + +static inline uint32_t counter_low(uint64_t counter) { + return ((uint32_t)counter); +} + +static inline uint32_t counter_high(uint64_t counter) { + return ((uint32_t)(counter >> 32)); +} + +static inline uint32_t load32(const void *src) { + const uint8_t *p = (const uint8_t *)src; + return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) | + ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24); +} + +static inline void load_key_words(const uint8_t key[BLAKE3_KEY_LEN], + uint32_t key_words[8]) { + key_words[0] = load32(&key[0 * 4]); + key_words[1] = load32(&key[1 * 4]); + key_words[2] = load32(&key[2 * 4]); + key_words[3] = load32(&key[3 * 4]); + key_words[4] = load32(&key[4 * 4]); + key_words[5] = load32(&key[5 * 4]); + key_words[6] = load32(&key[6 * 4]); + key_words[7] = load32(&key[7 * 4]); +} + +static inline void store32(void *dst, uint32_t w) { + uint8_t *p = (uint8_t *)dst; + p[0] = (uint8_t)(w >> 0); + p[1] = (uint8_t)(w >> 8); + p[2] = (uint8_t)(w >> 16); + p[3] = (uint8_t)(w >> 24); +} + +static inline void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) { + store32(&bytes_out[0 * 4], cv_words[0]); + store32(&bytes_out[1 * 4], cv_words[1]); + store32(&bytes_out[2 * 4], cv_words[2]); + store32(&bytes_out[3 * 4], cv_words[3]); + store32(&bytes_out[4 * 4], cv_words[4]); + store32(&bytes_out[5 * 4], cv_words[5]); + store32(&bytes_out[6 * 4], cv_words[6]); + store32(&bytes_out[7 * 4], cv_words[7]); +} + +#ifdef __cplusplus +} +#endif + +#endif /* BLAKE3_IMPL_H */ diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c index ee96e692ef00..d17a40cefcb8 100644 --- a/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c +++ b/sys/contrib/openzfs/module/icp/algs/edonr/edonr.c @@ -1,6 +1,4 @@ /* - * IDI,NTNU - * * CDDL HEADER START * * The contents of this file are subject to the terms of the @@ -19,72 +17,44 @@ * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END - * - * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no> - * Tweaked Edon-R implementation for SUPERCOP, based on NIST API. - * - * $Id: edonr.c 517 2013-02-17 20:34:39Z joern $ */ + /* - * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved + * Based on Edon-R implementation for SUPERCOP, based on NIST API. + * Copyright (c) 2009, 2010, Jørn Amundsen <jorn.amundsen@ntnu.no> + * Copyright (c) 2013 Saso Kiselkov, All rights reserved + * Copyright (c) 2023 Tino Reichardt <milky-zfs@mcmilk.de> */ -#include <sys/strings.h> +#include <sys/zfs_context.h> +#include <sys/string.h> #include <sys/edonr.h> -#include <sys/debug.h> - -/* big endian support, provides no-op's if run on little endian hosts */ -#include "edonr_byteorder.h" -#define hashState224(x) ((x)->pipe->p256) -#define hashState256(x) ((x)->pipe->p256) -#define hashState384(x) ((x)->pipe->p512) -#define hashState512(x) ((x)->pipe->p512) - -/* shift and rotate shortcuts */ -#define shl(x, n) ((x) << n) -#define shr(x, n) ((x) >> n) - -#define rotl32(x, n) (((x) << (n)) | ((x) >> (32 - (n)))) -#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) - -#define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) -#define rotr64(x, n) (((x) >> (n)) | ((x) << (64 - (n)))) - -#if !defined(__C99_RESTRICT) -#define restrict /* restrict */ +/* + * We need 1196 byte stack for Q512() on i386 + * - we define this pragma to make gcc happy + */ +#if defined(__GNUC__) && defined(_ILP32) +#pragma GCC diagnostic ignored "-Wframe-larger-than=" #endif -#define EDONR_VALID_HASHBITLEN(x) \ - ((x) == 512 || (x) == 384 || (x) == 256 || (x) == 224) +/* + * Insert compiler memory barriers to reduce stack frame size. + */ +#define MEMORY_BARRIER asm volatile("" ::: "memory"); -/* EdonR224 initial double chaining pipe */ -static const uint32_t i224p2[16] = { - 0x00010203ul, 0x04050607ul, 0x08090a0bul, 0x0c0d0e0ful, - 0x10111213ul, 0x14151617ul, 0x18191a1bul, 0x1c1d1e1ful, - 0x20212223ul, 0x24252627ul, 0x28292a2bul, 0x2c2d2e2ful, - 0x30313233ul, 0x34353637ul, 0x38393a3bul, 0x3c3d3e3ful, -}; +#if defined(_ZFS_BIG_ENDIAN) +#define ld_swap64(s, d) (d = __builtin_bswap64(*(s))) +#define st_swap64(s, d) (*(d) = __builtin_bswap64(s)) +#else +#define ld_swap64(s, d) (d = *(s)) +#define st_swap64(s, d) (*(d) = s) +#endif -/* EdonR256 initial double chaining pipe */ -static const uint32_t i256p2[16] = { - 0x40414243ul, 0x44454647ul, 0x48494a4bul, 0x4c4d4e4ful, - 0x50515253ul, 0x54555657ul, 0x58595a5bul, 0x5c5d5e5ful, - 0x60616263ul, 0x64656667ul, 0x68696a6bul, 0x6c6d6e6ful, - 0x70717273ul, 0x74757677ul, 0x78797a7bul, 0x7c7d7e7ful, -}; +#define hashState512(x) ((x)->pipe->p512) -/* EdonR384 initial double chaining pipe */ -static const uint64_t i384p2[16] = { - 0x0001020304050607ull, 0x08090a0b0c0d0e0full, - 0x1011121314151617ull, 0x18191a1b1c1d1e1full, - 0x2021222324252627ull, 0x28292a2b2c2d2e2full, - 0x3031323334353637ull, 0x38393a3b3c3d3e3full, - 0x4041424344454647ull, 0x48494a4b4c4d4e4full, - 0x5051525354555657ull, 0x58595a5b5c5d5e5full, - 0x6061626364656667ull, 0x68696a6b6c6d6e6full, - 0x7071727374757677ull, 0x78797a7b7c7d7e7full -}; +/* rotate shortcuts */ +#define rotl64(x, n) (((x) << (n)) | ((x) >> (64 - (n)))) /* EdonR512 initial double chaining pipe */ static const uint64_t i512p2[16] = { @@ -98,294 +68,66 @@ static const uint64_t i512p2[16] = { 0xf0f1f2f3f4f5f6f7ull, 0xf8f9fafbfcfdfeffull }; -/* - * First Latin Square - * 0 7 1 3 2 4 6 5 - * 4 1 7 6 3 0 5 2 - * 7 0 4 2 5 3 1 6 - * 1 4 0 5 6 2 7 3 - * 2 3 6 7 1 5 0 4 - * 5 2 3 1 7 6 4 0 - * 3 6 5 0 4 7 2 1 - * 6 5 2 4 0 1 3 7 - */ -#define LS1_256(c, x0, x1, x2, x3, x4, x5, x6, x7) \ -{ \ - uint32_t x04, x17, x23, x56, x07, x26; \ - x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \ - s0 = c + x07 + x2; \ - s1 = rotl32(x07 + x3, 4); \ - s2 = rotl32(x07 + x6, 8); \ - x23 = x2 + x3; \ - s5 = rotl32(x04 + x23 + x5, 22); \ - x56 = x5 + x6; \ - s6 = rotl32(x17 + x56 + x0, 24); \ - x26 = x23+x56; \ - s3 = rotl32(x26 + x7, 13); \ - s4 = rotl32(x26 + x1, 17); \ - s7 = rotl32(x26 + x4, 29); \ -} - -#define LS1_512(c, x0, x1, x2, x3, x4, x5, x6, x7) \ -{ \ - uint64_t x04, x17, x23, x56, x07, x26; \ - x04 = x0+x4, x17 = x1+x7, x07 = x04+x17; \ - s0 = c + x07 + x2; \ - s1 = rotl64(x07 + x3, 5); \ - s2 = rotl64(x07 + x6, 15); \ - x23 = x2 + x3; \ - s5 = rotl64(x04 + x23 + x5, 40); \ - x56 = x5 + x6; \ - s6 = rotl64(x17 + x56 + x0, 50); \ - x26 = x23+x56; \ - s3 = rotl64(x26 + x7, 22); \ - s4 = rotl64(x26 + x1, 31); \ - s7 = rotl64(x26 + x4, 59); \ -} - -/* - * Second Orthogonal Latin Square - * 0 4 2 3 1 6 5 7 - * 7 6 3 2 5 4 1 0 - * 5 3 1 6 0 2 7 4 - * 1 0 5 4 3 7 2 6 - * 2 1 0 7 4 5 6 3 - * 3 5 7 0 6 1 4 2 - * 4 7 6 1 2 0 3 5 - * 6 2 4 5 7 3 0 1 - */ -#define LS2_256(c, y0, y1, y2, y3, y4, y5, y6, y7) \ -{ \ - uint32_t y01, y25, y34, y67, y04, y05, y27, y37; \ - y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \ - t0 = ~c + y05 + y7; \ - t2 = rotl32(y05 + y3, 9); \ - y34 = y3+y4, y04 = y01+y34; \ - t1 = rotl32(y04 + y6, 5); \ - t4 = rotl32(y04 + y5, 15); \ - y67 = y6+y7, y37 = y34+y67; \ - t3 = rotl32(y37 + y2, 11); \ - t7 = rotl32(y37 + y0, 27); \ - y27 = y25+y67; \ - t5 = rotl32(y27 + y4, 20); \ - t6 = rotl32(y27 + y1, 25); \ -} - -#define LS2_512(c, y0, y1, y2, y3, y4, y5, y6, y7) \ -{ \ - uint64_t y01, y25, y34, y67, y04, y05, y27, y37; \ - y01 = y0+y1, y25 = y2+y5, y05 = y01+y25; \ - t0 = ~c + y05 + y7; \ - t2 = rotl64(y05 + y3, 19); \ - y34 = y3+y4, y04 = y01+y34; \ - t1 = rotl64(y04 + y6, 10); \ - t4 = rotl64(y04 + y5, 36); \ - y67 = y6+y7, y37 = y34+y67; \ - t3 = rotl64(y37 + y2, 29); \ - t7 = rotl64(y37 + y0, 55); \ - y27 = y25+y67; \ - t5 = rotl64(y27 + y4, 44); \ - t6 = rotl64(y27 + y1, 48); \ +#define LS1_512(x0, x1, x2, x3, x4, x5, x6, x7) \ +{ \ + MEMORY_BARRIER \ + z1 = x0 + x4, z2 = x1 + x7; z5 = z1 + z2; \ + s0 = 0xaaaaaaaaaaaaaaaaull + z5 + x2; \ + s1 = rotl64(z5 + x3, 5); \ + s2 = rotl64(z5 + x6, 15); z3 = x2 + x3; \ + s5 = rotl64(z1 + z3 + x5, 40); z4 = x5 + x6; \ + s6 = rotl64(z2 + z4 + x0, 50); z6 = z3 + z4; \ + s3 = rotl64(z6 + x7, 22); \ + s4 = rotl64(z6 + x1, 31); \ + s7 = rotl64(z6 + x4, 59); \ } -#define quasi_exform256(r0, r1, r2, r3, r4, r5, r6, r7) \ -{ \ - uint32_t s04, s17, s23, s56, t01, t25, t34, t67; \ - s04 = s0 ^ s4, t01 = t0 ^ t1; \ - r0 = (s04 ^ s1) + (t01 ^ t5); \ - t67 = t6 ^ t7; \ - r1 = (s04 ^ s7) + (t2 ^ t67); \ - s23 = s2 ^ s3; \ - r7 = (s23 ^ s5) + (t4 ^ t67); \ - t34 = t3 ^ t4; \ - r3 = (s23 ^ s4) + (t0 ^ t34); \ - s56 = s5 ^ s6; \ - r5 = (s3 ^ s56) + (t34 ^ t6); \ - t25 = t2 ^ t5; \ - r6 = (s2 ^ s56) + (t25 ^ t7); \ - s17 = s1 ^ s7; \ - r4 = (s0 ^ s17) + (t1 ^ t25); \ - r2 = (s17 ^ s6) + (t01 ^ t3); \ +#define LS2_512(y0, y1, y2, y3, y4, y5, y6, y7) \ +{ \ + z1 = y0 + y1, z2 = y2 + y5; z6 = z1 + z2; \ + t0 = ~0xaaaaaaaaaaaaaaaaull + z6 + y7; \ + t2 = rotl64(z6 + y3, 19); \ + z3 = y3 + y4, z5 = z1 + z3; \ + t1 = rotl64(z5 + y6, 10); \ + t4 = rotl64(z5 + y5, 36); \ + z4 = y6 + y7, z8 = z3 + z4; \ + t3 = rotl64(z8 + y2, 29); \ + t7 = rotl64(z8 + y0, 55); z7 = z2 + z4; \ + t5 = rotl64(z7 + y4, 44); \ + t6 = rotl64(z7 + y1, 48); \ } -#define quasi_exform512(r0, r1, r2, r3, r4, r5, r6, r7) \ -{ \ - uint64_t s04, s17, s23, s56, t01, t25, t34, t67; \ - s04 = s0 ^ s4, t01 = t0 ^ t1; \ - r0 = (s04 ^ s1) + (t01 ^ t5); \ - t67 = t6 ^ t7; \ - r1 = (s04 ^ s7) + (t2 ^ t67); \ - s23 = s2 ^ s3; \ - r7 = (s23 ^ s5) + (t4 ^ t67); \ - t34 = t3 ^ t4; \ - r3 = (s23 ^ s4) + (t0 ^ t34); \ - s56 = s5 ^ s6; \ - r5 = (s3 ^ s56) + (t34 ^ t6); \ - t25 = t2 ^ t5; \ - r6 = (s2 ^ s56) + (t25 ^ t7); \ - s17 = s1 ^ s7; \ - r4 = (s0 ^ s17) + (t1 ^ t25); \ - r2 = (s17 ^ s6) + (t01 ^ t3); \ -} - -static size_t -Q256(size_t bitlen, const uint32_t *data, uint32_t *restrict p) -{ - size_t bl; - - for (bl = bitlen; bl >= EdonR256_BLOCK_BITSIZE; - bl -= EdonR256_BLOCK_BITSIZE, data += 16) { - uint32_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4, - t5, t6, t7; - uint32_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4, - q5, q6, q7; - const uint32_t defix = 0xaaaaaaaa; -#if defined(MACHINE_IS_BIG_ENDIAN) - uint32_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8, - swp9, swp10, swp11, swp12, swp13, swp14, swp15; -#define d(j) swp ## j -#define s32(j) ld_swap32((uint32_t *)data + j, swp ## j) -#else -#define d(j) data[j] -#endif - - /* First row of quasigroup e-transformations */ -#if defined(MACHINE_IS_BIG_ENDIAN) - s32(8); - s32(9); - s32(10); - s32(11); - s32(12); - s32(13); - s32(14); - s32(15); -#endif - LS1_256(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9), - d(8)); -#if defined(MACHINE_IS_BIG_ENDIAN) - s32(0); - s32(1); - s32(2); - s32(3); - s32(4); - s32(5); - s32(6); - s32(7); -#undef s32 -#endif - LS2_256(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7)); - quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); - - LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_256(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14), - d(15)); - quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); - - /* Second row of quasigroup e-transformations */ - LS1_256(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14], - p[15]); - LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); - quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); - - LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); - quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); - - /* Third row of quasigroup e-transformations */ - LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_256(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); - quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); - - LS1_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); - LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); - quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); - - /* Fourth row of quasigroup e-transformations */ - LS1_256(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0)); - LS2_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); - quasi_exform256(p0, p1, p2, p3, p4, p5, p6, p7); - - LS1_256(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_256(defix, q0, q1, q2, q3, q4, q5, q6, q7); - quasi_exform256(q0, q1, q2, q3, q4, q5, q6, q7); - - /* Edon-R tweak on the original SHA-3 Edon-R submission. */ - p[0] ^= d(8) ^ p0; - p[1] ^= d(9) ^ p1; - p[2] ^= d(10) ^ p2; - p[3] ^= d(11) ^ p3; - p[4] ^= d(12) ^ p4; - p[5] ^= d(13) ^ p5; - p[6] ^= d(14) ^ p6; - p[7] ^= d(15) ^ p7; - p[8] ^= d(0) ^ q0; - p[9] ^= d(1) ^ q1; - p[10] ^= d(2) ^ q2; - p[11] ^= d(3) ^ q3; - p[12] ^= d(4) ^ q4; - p[13] ^= d(5) ^ q5; - p[14] ^= d(6) ^ q6; - p[15] ^= d(7) ^ q7; - } - -#undef d - return (bitlen - bl); +#define QEF_512(r0, r1, r2, r3, r4, r5, r6, r7) \ +{ \ + z1 = s0 ^ s4, z5 = t0 ^ t1; \ + r0 = (z1 ^ s1) + (z5 ^ t5); z8 = t6 ^ t7; \ + r1 = (z1 ^ s7) + (t2 ^ z8); z3 = s2 ^ s3; \ + r7 = (z3 ^ s5) + (t4 ^ z8); z7 = t3 ^ t4; \ + r3 = (z3 ^ s4) + (t0 ^ z7); z4 = s5 ^ s6; \ + r5 = (s3 ^ z4) + (z7 ^ t6); z6 = t2 ^ t5; \ + r6 = (s2 ^ z4) + (z6 ^ t7); z2 = s1 ^ s7; \ + r4 = (s0 ^ z2) + (t1 ^ z6); \ + r2 = (z2 ^ s6) + (z5 ^ t3); \ } -/* - * Why is this #pragma here? - * - * Checksum functions like this one can go over the stack frame size check - * Linux imposes on 32-bit platforms (-Wframe-larger-than=1024). We can - * safely ignore the compiler error since we know that in OpenZFS, that - * the function will be called from a worker thread that won't be using - * much stack. The only function that goes over the 1k limit is Q512(), - * which only goes over it by a hair (1248 bytes on ARM32). - */ -#include <sys/isa_defs.h> /* for _ILP32 */ -#ifdef _ILP32 /* We're 32-bit, assume small stack frames */ -#pragma GCC diagnostic ignored "-Wframe-larger-than=" -#endif - -#if defined(__IBMC__) && defined(_AIX) && defined(__64BIT__) static inline size_t -#else -static size_t -#endif -Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p) +Q512(size_t bitlen, const uint64_t *data, uint64_t *p) { size_t bl; for (bl = bitlen; bl >= EdonR512_BLOCK_BITSIZE; bl -= EdonR512_BLOCK_BITSIZE, data += 16) { - uint64_t s0, s1, s2, s3, s4, s5, s6, s7, t0, t1, t2, t3, t4, - t5, t6, t7; - uint64_t p0, p1, p2, p3, p4, p5, p6, p7, q0, q1, q2, q3, q4, - q5, q6, q7; - const uint64_t defix = 0xaaaaaaaaaaaaaaaaull; -#if defined(MACHINE_IS_BIG_ENDIAN) - uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, swp8, - swp9, swp10, swp11, swp12, swp13, swp14, swp15; + uint64_t q0, q1, q2, q3, q4, q5, q6, q7; + uint64_t p0, p1, p2, p3, p4, p5, p6, p7; + uint64_t s0, s1, s2, s3, s4, s5, s6, s7; + uint64_t t0, t1, t2, t3, t4, t5, t6, t7; + uint64_t z1, z2, z3, z4, z5, z6, z7, z8; + +#if defined(_ZFS_BIG_ENDIAN) + uint64_t swp0, swp1, swp2, swp3, swp4, swp5, swp6, swp7, + swp8, swp9, swp10, swp11, swp12, swp13, swp14, swp15; #define d(j) swp##j #define s64(j) ld_swap64((uint64_t *)data+j, swp##j) -#else -#define d(j) data[j] -#endif - - /* First row of quasigroup e-transformations */ -#if defined(MACHINE_IS_BIG_ENDIAN) - s64(8); - s64(9); - s64(10); - s64(11); - s64(12); - s64(13); - s64(14); - s64(15); -#endif - LS1_512(defix, d(15), d(14), d(13), d(12), d(11), d(10), d(9), - d(8)); -#if defined(MACHINE_IS_BIG_ENDIAN) s64(0); s64(1); s64(2); @@ -394,43 +136,53 @@ Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p) s64(5); s64(6); s64(7); -#undef s64 + s64(8); + s64(9); + s64(10); + s64(11); + s64(12); + s64(13); + s64(14); + s64(15); +#else +#define d(j) data[j] #endif - LS2_512(defix, d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7)); - quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); - LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_512(defix, d(8), d(9), d(10), d(11), d(12), d(13), d(14), - d(15)); - quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + /* First row of quasigroup e-transformations */ + LS1_512(d(15), d(14), d(13), d(12), d(11), d(10), d(9), d(8)); + LS2_512(d(0), d(1), d(2), d(3), d(4), d(5), d(6), d(7)); + QEF_512(p0, p1, p2, p3, p4, p5, p6, p7); + + LS1_512(p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(d(8), d(9), d(10), d(11), d(12), d(13), d(14), d(15)); + QEF_512(q0, q1, q2, q3, q4, q5, q6, q7); /* Second row of quasigroup e-transformations */ - LS1_512(defix, p[8], p[9], p[10], p[11], p[12], p[13], p[14], - p[15]); - LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); - quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); + LS1_512(p[8], p[9], p[10], p[11], p[12], p[13], p[14], p[15]); + LS2_512(p0, p1, p2, p3, p4, p5, p6, p7); + QEF_512(p0, p1, p2, p3, p4, p5, p6, p7); - LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); - quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + LS1_512(p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(q0, q1, q2, q3, q4, q5, q6, q7); + QEF_512(q0, q1, q2, q3, q4, q5, q6, q7); /* Third row of quasigroup e-transformations */ - LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_512(defix, p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); - quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); + LS1_512(p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(p[0], p[1], p[2], p[3], p[4], p[5], p[6], p[7]); + QEF_512(p0, p1, p2, p3, p4, p5, p6, p7); - LS1_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); - LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); - quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + LS1_512(q0, q1, q2, q3, q4, q5, q6, q7); + LS2_512(p0, p1, p2, p3, p4, p5, p6, p7); + QEF_512(q0, q1, q2, q3, q4, q5, q6, q7); /* Fourth row of quasigroup e-transformations */ - LS1_512(defix, d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0)); - LS2_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); - quasi_exform512(p0, p1, p2, p3, p4, p5, p6, p7); + LS1_512(d(7), d(6), d(5), d(4), d(3), d(2), d(1), d(0)); + LS2_512(p0, p1, p2, p3, p4, p5, p6, p7); + QEF_512(p0, p1, p2, p3, p4, p5, p6, p7); - LS1_512(defix, p0, p1, p2, p3, p4, p5, p6, p7); - LS2_512(defix, q0, q1, q2, q3, q4, q5, q6, q7); - quasi_exform512(q0, q1, q2, q3, q4, q5, q6, q7); + LS1_512(p0, p1, p2, p3, p4, p5, p6, p7); + LS2_512(q0, q1, q2, q3, q4, q5, q6, q7); + QEF_512(q0, q1, q2, q3, q4, q5, q6, q7); /* Edon-R tweak on the original SHA-3 Edon-R submission. */ p[0] ^= d(8) ^ p0; @@ -451,289 +203,115 @@ Q512(size_t bitlen, const uint64_t *data, uint64_t *restrict p) p[15] ^= d(7) ^ q7; } +#undef s64 #undef d return (bitlen - bl); } void -EdonRInit(EdonRState *state, size_t hashbitlen) +EdonRInit(EdonRState *state) { - ASSERT(EDONR_VALID_HASHBITLEN(hashbitlen)); - switch (hashbitlen) { - case 224: - state->hashbitlen = 224; - state->bits_processed = 0; - state->unprocessed_bits = 0; - bcopy(i224p2, hashState224(state)->DoublePipe, - 16 * sizeof (uint32_t)); - break; - - case 256: - state->hashbitlen = 256; - state->bits_processed = 0; - state->unprocessed_bits = 0; - bcopy(i256p2, hashState256(state)->DoublePipe, - 16 * sizeof (uint32_t)); - break; - - case 384: - state->hashbitlen = 384; - state->bits_processed = 0; - state->unprocessed_bits = 0; - bcopy(i384p2, hashState384(state)->DoublePipe, - 16 * sizeof (uint64_t)); - break; - - case 512: - state->hashbitlen = 512; - state->bits_processed = 0; - state->unprocessed_bits = 0; - bcopy(i512p2, hashState224(state)->DoublePipe, - 16 * sizeof (uint64_t)); - break; - } + state->bits_processed = 0; + state->unprocessed_bits = 0; + memcpy(hashState512(state)->DoublePipe, i512p2, sizeof (i512p2)); } - void EdonRUpdate(EdonRState *state, const uint8_t *data, size_t databitlen) { - uint32_t *data32; uint64_t *data64; - size_t bits_processed; - ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen)); - switch (state->hashbitlen) { - case 224: - case 256: - if (state->unprocessed_bits > 0) { - /* LastBytes = databitlen / 8 */ - int LastBytes = (int)databitlen >> 3; - - ASSERT(state->unprocessed_bits + databitlen <= - EdonR256_BLOCK_SIZE * 8); - - bcopy(data, hashState256(state)->LastPart - + (state->unprocessed_bits >> 3), LastBytes); - state->unprocessed_bits += (int)databitlen; - databitlen = state->unprocessed_bits; - /* LINTED E_BAD_PTR_CAST_ALIGN */ - data32 = (uint32_t *)hashState256(state)->LastPart; - } else - /* LINTED E_BAD_PTR_CAST_ALIGN */ - data32 = (uint32_t *)data; - - bits_processed = Q256(databitlen, data32, - hashState256(state)->DoublePipe); - state->bits_processed += bits_processed; - databitlen -= bits_processed; - state->unprocessed_bits = (int)databitlen; - if (databitlen > 0) { - /* LastBytes = Ceil(databitlen / 8) */ - int LastBytes = - ((~(((-(int)databitlen) >> 3) & 0x01ff)) + - 1) & 0x01ff; - - data32 += bits_processed >> 5; /* byte size update */ - bcopy(data32, hashState256(state)->LastPart, LastBytes); - } - break; - - case 384: - case 512: - if (state->unprocessed_bits > 0) { - /* LastBytes = databitlen / 8 */ - int LastBytes = (int)databitlen >> 3; - - ASSERT(state->unprocessed_bits + databitlen <= - EdonR512_BLOCK_SIZE * 8); - - bcopy(data, hashState512(state)->LastPart - + (state->unprocessed_bits >> 3), LastBytes); - state->unprocessed_bits += (int)databitlen; - databitlen = state->unprocessed_bits; - /* LINTED E_BAD_PTR_CAST_ALIGN */ - data64 = (uint64_t *)hashState512(state)->LastPart; - } else - /* LINTED E_BAD_PTR_CAST_ALIGN */ - data64 = (uint64_t *)data; - - bits_processed = Q512(databitlen, data64, - hashState512(state)->DoublePipe); - state->bits_processed += bits_processed; - databitlen -= bits_processed; - state->unprocessed_bits = (int)databitlen; - if (databitlen > 0) { - /* LastBytes = Ceil(databitlen / 8) */ - int LastBytes = - ((~(((-(int)databitlen) >> 3) & 0x03ff)) + - 1) & 0x03ff; - - data64 += bits_processed >> 6; /* byte size update */ - bcopy(data64, hashState512(state)->LastPart, LastBytes); - } - break; + if (state->unprocessed_bits > 0) { + /* LastBytes = databitlen / 8 */ + int LastBytes = (int)databitlen >> 3; + + ASSERT(state->unprocessed_bits + databitlen <= + EdonR512_BLOCK_SIZE * 8); + + memcpy(hashState512(state)->LastPart + + (state->unprocessed_bits >> 3), data, LastBytes); + state->unprocessed_bits += (int)databitlen; + databitlen = state->unprocessed_bits; + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data64 = (uint64_t *)hashState512(state)->LastPart; + } else + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data64 = (uint64_t *)data; + + bits_processed = Q512(databitlen, data64, + hashState512(state)->DoublePipe); + state->bits_processed += bits_processed; + databitlen -= bits_processed; + state->unprocessed_bits = (int)databitlen; + if (databitlen > 0) { + /* LastBytes = Ceil(databitlen / 8) */ + int LastBytes = ((~(((-(int)databitlen) >> 3) & 0x03ff)) + 1) \ + & 0x03ff; + + data64 += bits_processed >> 6; /* byte size update */ + memmove(hashState512(state)->LastPart, data64, LastBytes); } } void EdonRFinal(EdonRState *state, uint8_t *hashval) { - uint32_t *data32; uint64_t *data64, num_bits; - size_t databitlen; int LastByte, PadOnePosition; num_bits = state->bits_processed + state->unprocessed_bits; - ASSERT(EDONR_VALID_HASHBITLEN(state->hashbitlen)); - switch (state->hashbitlen) { - case 224: - case 256: - LastByte = (int)state->unprocessed_bits >> 3; - PadOnePosition = 7 - (state->unprocessed_bits & 0x07); - hashState256(state)->LastPart[LastByte] = - (hashState256(state)->LastPart[LastByte] - & (0xff << (PadOnePosition + 1))) ^ - (0x01 << PadOnePosition); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - data64 = (uint64_t *)hashState256(state)->LastPart; - - if (state->unprocessed_bits < 448) { - (void) memset((hashState256(state)->LastPart) + - LastByte + 1, 0x00, - EdonR256_BLOCK_SIZE - LastByte - 9); - databitlen = EdonR256_BLOCK_SIZE * 8; -#if defined(MACHINE_IS_BIG_ENDIAN) - st_swap64(num_bits, data64 + 7); -#else - data64[7] = num_bits; -#endif - } else { - (void) memset((hashState256(state)->LastPart) + - LastByte + 1, 0x00, - EdonR256_BLOCK_SIZE * 2 - LastByte - 9); - databitlen = EdonR256_BLOCK_SIZE * 16; -#if defined(MACHINE_IS_BIG_ENDIAN) - st_swap64(num_bits, data64 + 15); -#else - data64[15] = num_bits; -#endif - } - - /* LINTED E_BAD_PTR_CAST_ALIGN */ - data32 = (uint32_t *)hashState256(state)->LastPart; - state->bits_processed += Q256(databitlen, data32, - hashState256(state)->DoublePipe); - break; - - case 384: - case 512: - LastByte = (int)state->unprocessed_bits >> 3; - PadOnePosition = 7 - (state->unprocessed_bits & 0x07); - hashState512(state)->LastPart[LastByte] = - (hashState512(state)->LastPart[LastByte] - & (0xff << (PadOnePosition + 1))) ^ - (0x01 << PadOnePosition); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - data64 = (uint64_t *)hashState512(state)->LastPart; - - if (state->unprocessed_bits < 960) { - (void) memset((hashState512(state)->LastPart) + - LastByte + 1, 0x00, - EdonR512_BLOCK_SIZE - LastByte - 9); - databitlen = EdonR512_BLOCK_SIZE * 8; -#if defined(MACHINE_IS_BIG_ENDIAN) - st_swap64(num_bits, data64 + 15); + LastByte = (int)state->unprocessed_bits >> 3; + PadOnePosition = 7 - (state->unprocessed_bits & 0x07); + hashState512(state)->LastPart[LastByte] = + (hashState512(state)->LastPart[LastByte] \ + & (0xff << (PadOnePosition + 1))) ^ (0x01 << PadOnePosition); + /* LINTED E_BAD_PTR_CAST_ALIGN */ + data64 = (uint64_t *)hashState512(state)->LastPart; + + if (state->unprocessed_bits < 960) { + memset((hashState512(state)->LastPart) + + LastByte + 1, 0x00, EdonR512_BLOCK_SIZE - LastByte - 9); + databitlen = EdonR512_BLOCK_SIZE * 8; +#if defined(_ZFS_BIG_ENDIAN) + st_swap64(num_bits, data64 + 15); #else - data64[15] = num_bits; + data64[15] = num_bits; #endif - } else { - (void) memset((hashState512(state)->LastPart) + - LastByte + 1, 0x00, - EdonR512_BLOCK_SIZE * 2 - LastByte - 9); - databitlen = EdonR512_BLOCK_SIZE * 16; -#if defined(MACHINE_IS_BIG_ENDIAN) - st_swap64(num_bits, data64 + 31); + } else { + memset((hashState512(state)->LastPart) + LastByte + 1, + 0x00, EdonR512_BLOCK_SIZE * 2 - LastByte - 9); + databitlen = EdonR512_BLOCK_SIZE * 16; +#if defined(_ZFS_BIG_ENDIAN) + st_swap64(num_bits, data64 + 31); #else - data64[31] = num_bits; + data64[31] = num_bits; #endif - } - - state->bits_processed += Q512(databitlen, data64, - hashState512(state)->DoublePipe); - break; } - switch (state->hashbitlen) { - case 224: { -#if defined(MACHINE_IS_BIG_ENDIAN) - uint32_t *d32 = (uint32_t *)hashval; - uint32_t *s32 = hashState224(state)->DoublePipe + 9; - int j; + state->bits_processed += Q512(databitlen, data64, + hashState512(state)->DoublePipe); - for (j = 0; j < EdonR224_DIGEST_SIZE >> 2; j++) - st_swap32(s32[j], d32 + j); -#else - bcopy(hashState256(state)->DoublePipe + 9, hashval, - EdonR224_DIGEST_SIZE); -#endif - break; - } - case 256: { -#if defined(MACHINE_IS_BIG_ENDIAN) - uint32_t *d32 = (uint32_t *)hashval; - uint32_t *s32 = hashState224(state)->DoublePipe + 8; - int j; - - for (j = 0; j < EdonR256_DIGEST_SIZE >> 2; j++) - st_swap32(s32[j], d32 + j); -#else - bcopy(hashState256(state)->DoublePipe + 8, hashval, - EdonR256_DIGEST_SIZE); -#endif - break; - } - case 384: { -#if defined(MACHINE_IS_BIG_ENDIAN) - uint64_t *d64 = (uint64_t *)hashval; - uint64_t *s64 = hashState384(state)->DoublePipe + 10; - int j; - - for (j = 0; j < EdonR384_DIGEST_SIZE >> 3; j++) - st_swap64(s64[j], d64 + j); -#else - bcopy(hashState384(state)->DoublePipe + 10, hashval, - EdonR384_DIGEST_SIZE); -#endif - break; - } - case 512: { -#if defined(MACHINE_IS_BIG_ENDIAN) - uint64_t *d64 = (uint64_t *)hashval; - uint64_t *s64 = hashState512(state)->DoublePipe + 8; - int j; - - for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++) - st_swap64(s64[j], d64 + j); +#if defined(_ZFS_BIG_ENDIAN) + data64 = (uint64_t *)hashval; + uint64_t *s64 = hashState512(state)->DoublePipe + 8; + int j; + + for (j = 0; j < EdonR512_DIGEST_SIZE >> 3; j++) + st_swap64(s64[j], data64 + j); #else - bcopy(hashState512(state)->DoublePipe + 8, hashval, - EdonR512_DIGEST_SIZE); + memcpy(hashval, hashState512(state)->DoublePipe + 8, + EdonR512_DIGEST_SIZE); #endif - break; - } - } } - void -EdonRHash(size_t hashbitlen, const uint8_t *data, size_t databitlen, - uint8_t *hashval) +EdonRHash(const uint8_t *data, size_t databitlen, uint8_t *hashval) { EdonRState state; - EdonRInit(&state, hashbitlen); + EdonRInit(&state); EdonRUpdate(&state, data, databitlen); EdonRFinal(&state, hashval); } diff --git a/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h b/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h deleted file mode 100644 index 2b5d48287f26..000000000000 --- a/sys/contrib/openzfs/module/icp/algs/edonr/edonr_byteorder.h +++ /dev/null @@ -1,216 +0,0 @@ -/* - * IDI,NTNU - * - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - * - * Copyright (C) 2009, 2010, Jorn Amundsen <jorn.amundsen@ntnu.no> - * - * C header file to determine compile machine byte order. Take care when cross - * compiling. - * - * $Id: byteorder.h 517 2013-02-17 20:34:39Z joern $ - */ -/* - * Portions copyright (c) 2013, Saso Kiselkov, All rights reserved - */ - -#ifndef _CRYPTO_EDONR_BYTEORDER_H -#define _CRYPTO_EDONR_BYTEORDER_H - -#include <sys/sysmacros.h> -#include <sys/param.h> - -#if defined(__BYTE_ORDER) -#if (__BYTE_ORDER == __BIG_ENDIAN) -#define MACHINE_IS_BIG_ENDIAN -#elif (__BYTE_ORDER == __LITTLE_ENDIAN) -#define MACHINE_IS_LITTLE_ENDIAN -#endif -#elif defined(BYTE_ORDER) -#if (BYTE_ORDER == BIG_ENDIAN) -#define MACHINE_IS_BIG_ENDIAN -#elif (BYTE_ORDER == LITTLE_ENDIAN) -#define MACHINE_IS_LITTLE_ENDIAN -#endif -#endif /* __BYTE_ORDER || BYTE_ORDER */ - -#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN) -#if defined(_ZFS_BIG_ENDIAN) || defined(_MIPSEB) -#define MACHINE_IS_BIG_ENDIAN -#endif -#if defined(_ZFS_LITTLE_ENDIAN) || defined(_MIPSEL) -#define MACHINE_IS_LITTLE_ENDIAN -#endif -#endif /* !MACHINE_IS_BIG_ENDIAN && !MACHINE_IS_LITTLE_ENDIAN */ - -#if !defined(MACHINE_IS_BIG_ENDIAN) && !defined(MACHINE_IS_LITTLE_ENDIAN) -#error unknown machine byte sex -#endif - -#define BYTEORDER_INCLUDED - -#if defined(MACHINE_IS_BIG_ENDIAN) -/* - * Byte swapping macros for big endian architectures and compilers, - * add as appropriate for other architectures and/or compilers. - * - * ld_swap64(src,dst) : uint64_t dst = *(src) - * st_swap64(src,dst) : *(dst) = uint64_t src - */ - -#if defined(__PPC__) || defined(_ARCH_PPC) - -#if defined(__64BIT__) -#if defined(_ARCH_PWR7) -#define aix_ld_swap64(s64, d64)\ - __asm__("ldbrx %0,0,%1" : "=r"(d64) : "r"(s64)) -#define aix_st_swap64(s64, d64)\ - __asm__ volatile("stdbrx %1,0,%0" : : "r"(d64), "r"(s64)) -#else -#define aix_ld_swap64(s64, d64) \ -{ \ - uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \ - \ - __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0;rldimi %1,%2,32,0"\ - : "+r"(s4), "=r"(d64), "=r"(h) : "b"(s64)); \ -} - -#define aix_st_swap64(s64, d64) \ -{ \ - uint64_t *s4 = 0, h; /* initialize to zero for gcc warning */ \ - h = (s64) >> 32; \ - __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \ - : "+r"(s4) : "r"(s64), "r"(h), "b"(d64)); \ -} -#endif /* 64BIT && PWR7 */ -#else -#define aix_ld_swap64(s64, d64) \ -{ \ - uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\ - __asm__("addi %0,%3,4;lwbrx %1,0,%3;lwbrx %2,0,%0" \ - : "+r"(s4), "=r"(l), "=r"(h) : "b"(s64)); \ - d64 = ((uint64_t)h<<32) | l; \ -} - -#define aix_st_swap64(s64, d64) \ -{ \ - uint32_t *s4 = 0, h, l; /* initialize to zero for gcc warning */\ - l = (s64) & 0xfffffffful, h = (s64) >> 32; \ - __asm__ volatile("addi %0,%3,4;stwbrx %1,0,%3;stwbrx %2,0,%0" \ - : "+r"(s4) : "r"(l), "r"(h), "b"(d64)); \ -} -#endif /* __64BIT__ */ -#define aix_ld_swap32(s32, d32)\ - __asm__("lwbrx %0,0,%1" : "=r"(d32) : "r"(s32)) -#define aix_st_swap32(s32, d32)\ - __asm__ volatile("stwbrx %1,0,%0" : : "r"(d32), "r"(s32)) -#define ld_swap32(s, d) aix_ld_swap32(s, d) -#define st_swap32(s, d) aix_st_swap32(s, d) -#define ld_swap64(s, d) aix_ld_swap64(s, d) -#define st_swap64(s, d) aix_st_swap64(s, d) -#endif /* __PPC__ || _ARCH_PPC */ - -#if defined(__sparc) -#if !defined(__arch64__) && !defined(__sparcv8) && defined(__sparcv9) -#define __arch64__ -#endif -#if defined(__GNUC__) || (defined(__SUNPRO_C) && __SUNPRO_C > 0x590) -/* need Sun Studio C 5.10 and above for GNU inline assembly */ -#if defined(__arch64__) -#define sparc_ld_swap64(s64, d64) \ - __asm__("ldxa [%1]0x88,%0" : "=r"(d64) : "r"(s64)) -#define sparc_st_swap64(s64, d64) \ - __asm__ volatile("stxa %0,[%1]0x88" : : "r"(s64), "r"(d64)) -#define st_swap64(s, d) sparc_st_swap64(s, d) -#else -#define sparc_ld_swap64(s64, d64) \ -{ \ - uint32_t *s4, h, l; \ - __asm__("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2" \ - : "+r"(s4), "=r"(l), "=r"(h) : "r"(s64)); \ - d64 = ((uint64_t)h<<32) | l; \ -} -#define sparc_st_swap64(s64, d64) \ -{ \ - uint32_t *s4, h, l; \ - l = (s64) & 0xfffffffful, h = (s64) >> 32; \ - __asm__ volatile("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88"\ - : "+r"(s4) : "r"(l), "r"(h), "r"(d64)); \ -} -#endif /* sparc64 */ -#define sparc_ld_swap32(s32, d32)\ - __asm__("lda [%1]0x88,%0" : "=r"(d32) : "r"(s32)) -#define sparc_st_swap32(s32, d32)\ - __asm__ volatile("sta %0,[%1]0x88" : : "r"(s32), "r"(d32)) -#define ld_swap32(s, d) sparc_ld_swap32(s, d) -#define st_swap32(s, d) sparc_st_swap32(s, d) -#define ld_swap64(s, d) sparc_ld_swap64(s, d) -#define st_swap64(s, d) sparc_st_swap64(s, d) -#endif /* GCC || Sun Studio C > 5.9 */ -#endif /* sparc */ - -/* GCC fallback */ -#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap32) -#define ld_swap32(s, d) (d = __builtin_bswap32(*(s))) -#define st_swap32(s, d) (*(d) = __builtin_bswap32(s)) -#endif /* GCC4/PGIC && !swap32 */ -#if ((__GNUC__ >= 4) || defined(__PGIC__)) && !defined(ld_swap64) -#define ld_swap64(s, d) (d = __builtin_bswap64(*(s))) -#define st_swap64(s, d) (*(d) = __builtin_bswap64(s)) -#endif /* GCC4/PGIC && !swap64 */ - -/* generic fallback */ -#if !defined(ld_swap32) -#define ld_swap32(s, d) \ - (d = (*(s) >> 24) | (*(s) >> 8 & 0xff00) | \ - (*(s) << 8 & 0xff0000) | (*(s) << 24)) -#define st_swap32(s, d) \ - (*(d) = ((s) >> 24) | ((s) >> 8 & 0xff00) | \ - ((s) << 8 & 0xff0000) | ((s) << 24)) -#endif -#if !defined(ld_swap64) -#define ld_swap64(s, d) \ - (d = (*(s) >> 56) | (*(s) >> 40 & 0xff00) | \ - (*(s) >> 24 & 0xff0000) | (*(s) >> 8 & 0xff000000) | \ - (*(s) & 0xff000000) << 8 | (*(s) & 0xff0000) << 24 | \ - (*(s) & 0xff00) << 40 | *(s) << 56) -#define st_swap64(s, d) \ - (*(d) = ((s) >> 56) | ((s) >> 40 & 0xff00) | \ - ((s) >> 24 & 0xff0000) | ((s) >> 8 & 0xff000000) | \ - ((s) & 0xff000000) << 8 | ((s) & 0xff0000) << 24 | \ - ((s) & 0xff00) << 40 | (s) << 56) -#endif - -#endif /* MACHINE_IS_BIG_ENDIAN */ - - -#if defined(MACHINE_IS_LITTLE_ENDIAN) -/* replace swaps with simple assignments on little endian systems */ -#undef ld_swap32 -#undef st_swap32 -#define ld_swap32(s, d) (d = *(s)) -#define st_swap32(s, d) (*(d) = s) -#undef ld_swap64 -#undef st_swap64 -#define ld_swap64(s, d) (d = *(s)) -#define st_swap64(s, d) (*(d) = s) -#endif /* MACHINE_IS_LITTLE_ENDIAN */ - -#endif /* _CRYPTO_EDONR_BYTEORDER_H */ diff --git a/sys/contrib/openzfs/module/icp/algs/modes/cbc.c b/sys/contrib/openzfs/module/icp/algs/modes/cbc.c index 85864f56dead..d0219fb24c49 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/cbc.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/cbc.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -51,8 +51,8 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, if (length + ctx->cbc_remainder_len < block_size) { /* accumulate bytes here and return */ - bcopy(datap, - (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len, + memcpy((uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len, + datap, length); ctx->cbc_remainder_len += length; ctx->cbc_copy_to = datap; @@ -70,8 +70,8 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); - bcopy(datap, &((uint8_t *)ctx->cbc_remainder) - [ctx->cbc_remainder_len], need); + memcpy(&((uint8_t *)ctx->cbc_remainder) + [ctx->cbc_remainder_len], datap, need); blockp = (uint8_t *)ctx->cbc_remainder; } else { @@ -91,10 +91,10 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, if (out_data_1_len == block_size) { copy_block(lastp, out_data_1); } else { - bcopy(lastp, out_data_1, out_data_1_len); + memcpy(out_data_1, lastp, out_data_1_len); if (out_data_2 != NULL) { - bcopy(lastp + out_data_1_len, - out_data_2, + memcpy(out_data_2, + lastp + out_data_1_len, block_size - out_data_1_len); } } @@ -113,7 +113,7 @@ cbc_encrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { - bcopy(datap, ctx->cbc_remainder, remainder); + memcpy(ctx->cbc_remainder, datap, remainder); ctx->cbc_remainder_len = remainder; ctx->cbc_copy_to = datap; goto out; @@ -137,7 +137,6 @@ out: #define OTHER(a, ctx) \ (((a) == (ctx)->cbc_lastblock) ? (ctx)->cbc_iv : (ctx)->cbc_lastblock) -/* ARGSUSED */ int cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, @@ -158,8 +157,8 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, if (length + ctx->cbc_remainder_len < block_size) { /* accumulate bytes here and return */ - bcopy(datap, - (uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len, + memcpy((uint8_t *)ctx->cbc_remainder + ctx->cbc_remainder_len, + datap, length); ctx->cbc_remainder_len += length; ctx->cbc_copy_to = datap; @@ -177,8 +176,8 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, if (need > remainder) return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); - bcopy(datap, &((uint8_t *)ctx->cbc_remainder) - [ctx->cbc_remainder_len], need); + memcpy(&((uint8_t *)ctx->cbc_remainder) + [ctx->cbc_remainder_len], datap, need); blockp = (uint8_t *)ctx->cbc_remainder; } else { @@ -204,9 +203,9 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, block_size); - bcopy(blockp, out_data_1, out_data_1_len); + memcpy(out_data_1, blockp, out_data_1_len); if (out_data_2 != NULL) { - bcopy(blockp + out_data_1_len, out_data_2, + memcpy(out_data_2, blockp + out_data_1_len, block_size - out_data_1_len); } @@ -225,7 +224,7 @@ cbc_decrypt_contiguous_blocks(cbc_ctx_t *ctx, char *data, size_t length, /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { - bcopy(datap, ctx->cbc_remainder, remainder); + memcpy(ctx->cbc_remainder, datap, remainder); ctx->cbc_remainder_len = remainder; ctx->cbc_lastp = lastp; ctx->cbc_copy_to = datap; @@ -243,23 +242,15 @@ int cbc_init_ctx(cbc_ctx_t *cbc_ctx, char *param, size_t param_len, size_t block_size, void (*copy_block)(uint8_t *, uint64_t *)) { - /* - * Copy IV into context. - * - * If cm_param == NULL then the IV comes from the - * cd_miscdata field in the crypto_data structure. - */ - if (param != NULL) { - ASSERT(param_len == block_size); - copy_block((uchar_t *)param, cbc_ctx->cbc_iv); - } + /* Copy IV into context. */ + ASSERT3P(param, !=, NULL); + ASSERT3U(param_len, ==, block_size); + + copy_block((uchar_t *)param, cbc_ctx->cbc_iv); - cbc_ctx->cbc_lastp = (uint8_t *)&cbc_ctx->cbc_iv[0]; - cbc_ctx->cbc_flags |= CBC_MODE; return (CRYPTO_SUCCESS); } -/* ARGSUSED */ void * cbc_alloc_ctx(int kmflag) { diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c index 5d6507c49db1..1371676d6e68 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/ccm.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/ccm.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -59,15 +59,14 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, if (length + ctx->ccm_remainder_len < block_size) { /* accumulate bytes here and return */ - bcopy(datap, - (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len, + memcpy((uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len, + datap, length); ctx->ccm_remainder_len += length; ctx->ccm_copy_to = datap; return (CRYPTO_SUCCESS); } - lastp = (uint8_t *)ctx->ccm_cb; crypto_init_ptrs(out, &iov_or_mp, &offset); mac_buf = (uint8_t *)ctx->ccm_mac_buf; @@ -80,8 +79,8 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); - bcopy(datap, &((uint8_t *)ctx->ccm_remainder) - [ctx->ccm_remainder_len], need); + memcpy(&((uint8_t *)ctx->ccm_remainder) + [ctx->ccm_remainder_len], datap, need); blockp = (uint8_t *)ctx->ccm_remainder; } else { @@ -132,10 +131,10 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, if (out_data_1_len == block_size) { copy_block(lastp, out_data_1); } else { - bcopy(lastp, out_data_1, out_data_1_len); + memcpy(out_data_1, lastp, out_data_1_len); if (out_data_2 != NULL) { - bcopy(lastp + out_data_1_len, - out_data_2, + memcpy(out_data_2, + lastp + out_data_1_len, block_size - out_data_1_len); } } @@ -154,7 +153,7 @@ ccm_mode_encrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { - bcopy(datap, ctx->ccm_remainder, remainder); + memcpy(ctx->ccm_remainder, datap, remainder); ctx->ccm_remainder_len = remainder; ctx->ccm_copy_to = datap; goto out; @@ -190,7 +189,6 @@ calculate_ccm_mac(ccm_ctx_t *ctx, uint8_t *ccm_mac, } } -/* ARGSUSED */ int ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), @@ -225,10 +223,10 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, /* ccm_mac_input_buf is not used for encryption */ macp = (uint8_t *)ctx->ccm_mac_input_buf; - bzero(macp, block_size); + memset(macp, 0, block_size); /* copy remainder to temporary buffer */ - bcopy(ctx->ccm_remainder, macp, ctx->ccm_remainder_len); + memcpy(macp, ctx->ccm_remainder, ctx->ccm_remainder_len); /* calculate the CBC MAC */ xor_block(macp, mac_buf); @@ -255,33 +253,32 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, ctx->ccm_remainder_len + ctx->ccm_mac_len); if (ctx->ccm_remainder_len > 0) { - /* copy temporary block to where it belongs */ if (out_data_2 == NULL) { /* everything will fit in out_data_1 */ - bcopy(macp, out_data_1, ctx->ccm_remainder_len); - bcopy(ccm_mac_p, out_data_1 + ctx->ccm_remainder_len, + memcpy(out_data_1, macp, ctx->ccm_remainder_len); + memcpy(out_data_1 + ctx->ccm_remainder_len, ccm_mac_p, ctx->ccm_mac_len); } else { - if (out_data_1_len < ctx->ccm_remainder_len) { - size_t data_2_len_used; - bcopy(macp, out_data_1, out_data_1_len); + memcpy(out_data_1, macp, out_data_1_len); data_2_len_used = ctx->ccm_remainder_len - out_data_1_len; - bcopy((uint8_t *)macp + out_data_1_len, - out_data_2, data_2_len_used); - bcopy(ccm_mac_p, out_data_2 + data_2_len_used, + memcpy(out_data_2, + (uint8_t *)macp + out_data_1_len, + data_2_len_used); + memcpy(out_data_2 + data_2_len_used, + ccm_mac_p, ctx->ccm_mac_len); } else { - bcopy(macp, out_data_1, out_data_1_len); + memcpy(out_data_1, macp, out_data_1_len); if (out_data_1_len == ctx->ccm_remainder_len) { /* mac will be in out_data_2 */ - bcopy(ccm_mac_p, out_data_2, + memcpy(out_data_2, ccm_mac_p, ctx->ccm_mac_len); } else { size_t len_not_used = out_data_1_len - @@ -291,11 +288,11 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, * out_data_1, part of the mac will be * in out_data_2 */ - bcopy(ccm_mac_p, - out_data_1 + ctx->ccm_remainder_len, - len_not_used); - bcopy(ccm_mac_p + len_not_used, - out_data_2, + memcpy(out_data_1 + + ctx->ccm_remainder_len, + ccm_mac_p, len_not_used); + memcpy(out_data_2, + ccm_mac_p + len_not_used, ctx->ccm_mac_len - len_not_used); } @@ -303,9 +300,9 @@ ccm_encrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, } } else { /* copy block to where it belongs */ - bcopy(ccm_mac_p, out_data_1, out_data_1_len); + memcpy(out_data_1, ccm_mac_p, out_data_1_len); if (out_data_2 != NULL) { - bcopy(ccm_mac_p + out_data_1_len, out_data_2, + memcpy(out_data_2, ccm_mac_p + out_data_1_len, block_size - out_data_1_len); } } @@ -342,7 +339,6 @@ ccm_decrypt_incomplete_block(ccm_ctx_t *ctx, * returned to the caller. It will be returned when decrypt_final() is * called if the MAC matches */ -/* ARGSUSED */ int ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, @@ -350,6 +346,7 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { + (void) out; size_t remainder = length; size_t need = 0; uint8_t *datap = (uint8_t *)data; @@ -373,7 +370,7 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, } tmp = (uint8_t *)ctx->ccm_mac_input_buf; - bcopy(datap, tmp + pm_len, length); + memcpy(tmp + pm_len, datap, length); ctx->ccm_processed_mac_len += length; return (CRYPTO_SUCCESS); @@ -406,15 +403,15 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, mac_len = length - pt_part; ctx->ccm_processed_mac_len = mac_len; - bcopy(data + pt_part, ctx->ccm_mac_input_buf, mac_len); + memcpy(ctx->ccm_mac_input_buf, data + pt_part, mac_len); if (pt_part + ctx->ccm_remainder_len < block_size) { /* * since this is last of the ciphertext, will * just decrypt with it here */ - bcopy(datap, &((uint8_t *)ctx->ccm_remainder) - [ctx->ccm_remainder_len], pt_part); + memcpy(&((uint8_t *)ctx->ccm_remainder) + [ctx->ccm_remainder_len], datap, pt_part); ctx->ccm_remainder_len += pt_part; ccm_decrypt_incomplete_block(ctx, encrypt_block); ctx->ccm_processed_data_len += ctx->ccm_remainder_len; @@ -425,9 +422,9 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, length = pt_part; } } else if (length + ctx->ccm_remainder_len < block_size) { - /* accumulate bytes here and return */ - bcopy(datap, - (uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len, + /* accumulate bytes here and return */ + memcpy((uint8_t *)ctx->ccm_remainder + ctx->ccm_remainder_len, + datap, length); ctx->ccm_remainder_len += length; ctx->ccm_copy_to = datap; @@ -442,8 +439,8 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, if (need > remainder) return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); - bcopy(datap, &((uint8_t *)ctx->ccm_remainder) - [ctx->ccm_remainder_len], need); + memcpy(&((uint8_t *)ctx->ccm_remainder) + [ctx->ccm_remainder_len], datap, need); blockp = (uint8_t *)ctx->ccm_remainder; } else { @@ -493,7 +490,7 @@ ccm_mode_decrypt_contiguous_blocks(ccm_ctx_t *ctx, char *data, size_t length, /* Incomplete last block */ if (remainder > 0 && remainder < block_size) { - bcopy(datap, ctx->ccm_remainder, remainder); + memcpy(ctx->ccm_remainder, datap, remainder); ctx->ccm_remainder_len = remainder; ctx->ccm_copy_to = datap; if (ctx->ccm_processed_mac_len > 0) { @@ -540,10 +537,9 @@ ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, macp = (uint8_t *)ctx->ccm_tmp; while (mac_remain > 0) { - if (mac_remain < block_size) { - bzero(macp, block_size); - bcopy(pt, macp, mac_remain); + memset(macp, 0, block_size); + memcpy(macp, pt, mac_remain); mac_remain = 0; } else { copy_block(pt, macp); @@ -561,7 +557,7 @@ ccm_decrypt_final(ccm_ctx_t *ctx, crypto_data_t *out, size_t block_size, calculate_ccm_mac((ccm_ctx_t *)ctx, ccm_mac_p, encrypt_block); /* compare the input CCM MAC value with what we calculated */ - if (bcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) { + if (memcmp(ctx->ccm_mac_input_buf, ccm_mac_p, ctx->ccm_mac_len)) { /* They don't match */ return (CRYPTO_INVALID_MAC); } else { @@ -655,13 +651,13 @@ ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize, b0[0] = (have_adata << 6) | (((t - 2) / 2) << 3) | (q - 1); /* copy the nonce value into b0 */ - bcopy(nonce, &(b0[1]), nonceSize); + memcpy(&(b0[1]), nonce, nonceSize); /* store the length of the payload into b0 */ - bzero(&(b0[1+nonceSize]), q); + memset(&(b0[1+nonceSize]), 0, q); payloadSize = aes_ctx->ccm_data_len; - limit = 8 < q ? 8 : q; + limit = MIN(8, q); for (i = 0, j = 0, k = 15; i < limit; i++, j += 8, k--) { b0[k] = (uint8_t)((payloadSize >> j) & 0xFF); @@ -674,9 +670,9 @@ ccm_format_initial_blocks(uchar_t *nonce, ulong_t nonceSize, cb[0] = 0x07 & (q-1); /* first byte */ /* copy the nonce value into the counter block */ - bcopy(nonce, &(cb[1]), nonceSize); + memcpy(&(cb[1]), nonce, nonceSize); - bzero(&(cb[1+nonceSize]), q); + memset(&(cb[1+nonceSize]), 0, q); /* Create the mask for the counter field based on the size of nonce */ q <<= 3; @@ -783,7 +779,7 @@ ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len, /* The IV for CBC MAC for AES CCM mode is always zero */ ivp = (uint8_t *)ctx->ccm_tmp; - bzero(ivp, block_size); + memset(ivp, 0, block_size); xor_block(ivp, mac_buf); @@ -801,14 +797,14 @@ ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len, /* 1st block: it contains encoded associated data, and some data */ authp = (uint8_t *)ctx->ccm_tmp; - bzero(authp, block_size); - bcopy(encoded_a, authp, encoded_a_len); + memset(authp, 0, block_size); + memcpy(authp, encoded_a, encoded_a_len); processed = block_size - encoded_a_len; if (processed > auth_data_len) { /* in case auth_data is very small */ processed = auth_data_len; } - bcopy(auth_data, authp+encoded_a_len, processed); + memcpy(authp+encoded_a_len, auth_data, processed); /* xor with previous buffer */ xor_block(authp, mac_buf); encrypt_block(ctx->ccm_keysched, mac_buf, mac_buf); @@ -824,8 +820,8 @@ ccm_init(ccm_ctx_t *ctx, unsigned char *nonce, size_t nonce_len, * There's not a block full of data, pad rest of * buffer with zero */ - bzero(authp, block_size); - bcopy(&(auth_data[processed]), authp, remainder); + memset(authp, 0, block_size); + memcpy(authp, &(auth_data[processed]), remainder); datap = (uint8_t *)authp; remainder = 0; } else { diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ctr.c b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c index 0188bdd395ff..db6b1c71d5cd 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/ctr.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/ctr.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -52,15 +52,14 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length, if (length + ctx->ctr_remainder_len < block_size) { /* accumulate bytes here and return */ - bcopy(datap, - (uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len, + memcpy((uint8_t *)ctx->ctr_remainder + ctx->ctr_remainder_len, + datap, length); ctx->ctr_remainder_len += length; ctx->ctr_copy_to = datap; return (CRYPTO_SUCCESS); } - lastp = (uint8_t *)ctx->ctr_cb; crypto_init_ptrs(out, &iov_or_mp, &offset); do { @@ -71,8 +70,8 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length, if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); - bcopy(datap, &((uint8_t *)ctx->ctr_remainder) - [ctx->ctr_remainder_len], need); + memcpy(&((uint8_t *)ctx->ctr_remainder) + [ctx->ctr_remainder_len], datap, need); blockp = (uint8_t *)ctx->ctr_remainder; } else { @@ -114,9 +113,9 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length, &out_data_1_len, &out_data_2, block_size); /* copy block to where it belongs */ - bcopy(lastp, out_data_1, out_data_1_len); + memcpy(out_data_1, lastp, out_data_1_len); if (out_data_2 != NULL) { - bcopy(lastp + out_data_1_len, out_data_2, + memcpy(out_data_2, lastp + out_data_1_len, block_size - out_data_1_len); } /* update offset */ @@ -134,7 +133,7 @@ ctr_mode_contiguous_blocks(ctr_ctx_t *ctx, char *data, size_t length, /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { - bcopy(datap, ctx->ctr_remainder, remainder); + memcpy(ctx->ctr_remainder, datap, remainder); ctx->ctr_remainder_len = remainder; ctx->ctr_copy_to = datap; goto out; @@ -176,10 +175,11 @@ ctr_mode_final(ctr_ctx_t *ctx, crypto_data_t *out, crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, &out_data_1_len, &out_data_2, ctx->ctr_remainder_len); - bcopy(p, out_data_1, out_data_1_len); + memcpy(out_data_1, p, out_data_1_len); if (out_data_2 != NULL) { - bcopy((uint8_t *)p + out_data_1_len, - out_data_2, ctx->ctr_remainder_len - out_data_1_len); + memcpy(out_data_2, + (uint8_t *)p + out_data_1_len, + ctx->ctr_remainder_len - out_data_1_len); } out->cd_offset += ctx->ctr_remainder_len; ctx->ctr_remainder_len = 0; @@ -214,7 +214,6 @@ ctr_init_ctx(ctr_ctx_t *ctr_ctx, ulong_t count, uint8_t *cb, return (CRYPTO_SUCCESS); } -/* ARGSUSED */ void * ctr_alloc_ctx(int kmflag) { diff --git a/sys/contrib/openzfs/module/icp/algs/modes/ecb.c b/sys/contrib/openzfs/module/icp/algs/modes/ecb.c index 025f5825cf04..e2d8e71c161c 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/ecb.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/ecb.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -49,8 +49,8 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length, if (length + ctx->ecb_remainder_len < block_size) { /* accumulate bytes here and return */ - bcopy(datap, - (uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len, + memcpy((uint8_t *)ctx->ecb_remainder + ctx->ecb_remainder_len, + datap, length); ctx->ecb_remainder_len += length; ctx->ecb_copy_to = datap; @@ -68,8 +68,8 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length, if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); - bcopy(datap, &((uint8_t *)ctx->ecb_remainder) - [ctx->ecb_remainder_len], need); + memcpy(&((uint8_t *)ctx->ecb_remainder) + [ctx->ecb_remainder_len], datap, need); blockp = (uint8_t *)ctx->ecb_remainder; } else { @@ -81,9 +81,9 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length, &out_data_1_len, &out_data_2, block_size); /* copy block to where it belongs */ - bcopy(lastp, out_data_1, out_data_1_len); + memcpy(out_data_1, lastp, out_data_1_len); if (out_data_2 != NULL) { - bcopy(lastp + out_data_1_len, out_data_2, + memcpy(out_data_2, lastp + out_data_1_len, block_size - out_data_1_len); } /* update offset */ @@ -101,7 +101,7 @@ ecb_cipher_contiguous_blocks(ecb_ctx_t *ctx, char *data, size_t length, /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { - bcopy(datap, ctx->ecb_remainder, remainder); + memcpy(ctx->ecb_remainder, datap, remainder); ctx->ecb_remainder_len = remainder; ctx->ecb_copy_to = datap; goto out; @@ -114,7 +114,6 @@ out: return (CRYPTO_SUCCESS); } -/* ARGSUSED */ void * ecb_alloc_ctx(int kmflag) { diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c index 7332834cbe37..dd8db6f97460 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -23,6 +23,7 @@ */ #include <sys/zfs_context.h> +#include <sys/cmn_err.h> #include <modes/modes.h> #include <sys/crypto/common.h> #include <sys/crypto/icp.h> @@ -49,6 +50,11 @@ static uint32_t icp_gcm_impl = IMPL_FASTEST; static uint32_t user_sel_impl = IMPL_FASTEST; +static inline int gcm_init_ctx_impl(boolean_t, gcm_ctx_t *, char *, size_t, + int (*)(const void *, const uint8_t *, uint8_t *), + void (*)(uint8_t *, uint8_t *), + void (*)(uint8_t *, uint8_t *)); + #ifdef CAN_USE_GCM_ASM /* Does the architecture we run on support the MOVBE instruction? */ boolean_t gcm_avx_can_use_movbe = B_FALSE; @@ -59,7 +65,7 @@ boolean_t gcm_avx_can_use_movbe = B_FALSE; static boolean_t gcm_use_avx = B_FALSE; #define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) -extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); +extern boolean_t ASMABI atomic_toggle_boolean_nv(volatile boolean_t *); static inline boolean_t gcm_avx_will_work(void); static inline void gcm_set_avx(boolean_t); @@ -71,7 +77,7 @@ static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); -static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, +static int gcm_init_avx(gcm_ctx_t *, const uint8_t *, size_t, const uint8_t *, size_t, size_t); #endif /* ifdef CAN_USE_GCM_ASM */ @@ -108,8 +114,8 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, if (length + ctx->gcm_remainder_len < block_size) { /* accumulate bytes here and return */ - bcopy(datap, - (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, + memcpy((uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, + datap, length); ctx->gcm_remainder_len += length; if (ctx->gcm_copy_to == NULL) { @@ -118,7 +124,6 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, return (CRYPTO_SUCCESS); } - lastp = (uint8_t *)ctx->gcm_cb; crypto_init_ptrs(out, &iov_or_mp, &offset); gops = gcm_impl_get_ops(); @@ -130,8 +135,8 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, if (need > remainder) return (CRYPTO_DATA_LEN_RANGE); - bcopy(datap, &((uint8_t *)ctx->gcm_remainder) - [ctx->gcm_remainder_len], need); + memcpy(&((uint8_t *)ctx->gcm_remainder) + [ctx->gcm_remainder_len], datap, need); blockp = (uint8_t *)ctx->gcm_remainder; } else { @@ -162,10 +167,10 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, if (out_data_1_len == block_size) { copy_block(lastp, out_data_1); } else { - bcopy(lastp, out_data_1, out_data_1_len); + memcpy(out_data_1, lastp, out_data_1_len); if (out_data_2 != NULL) { - bcopy(lastp + out_data_1_len, - out_data_2, + memcpy(out_data_2, + lastp + out_data_1_len, block_size - out_data_1_len); } } @@ -187,7 +192,7 @@ gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, /* Incomplete last block. */ if (remainder > 0 && remainder < block_size) { - bcopy(datap, ctx->gcm_remainder, remainder); + memcpy(ctx->gcm_remainder, datap, remainder); ctx->gcm_remainder_len = remainder; ctx->gcm_copy_to = datap; goto out; @@ -199,13 +204,13 @@ out: return (CRYPTO_SUCCESS); } -/* ARGSUSED */ int gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { + (void) copy_block; #ifdef CAN_USE_GCM_ASM if (ctx->gcm_use_avx == B_TRUE) return (gcm_encrypt_final_avx(ctx, out, block_size)); @@ -245,7 +250,7 @@ gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, (uint8_t *)ctx->gcm_tmp); macp = (uint8_t *)ctx->gcm_remainder; - bzero(macp + ctx->gcm_remainder_len, + memset(macp + ctx->gcm_remainder_len, 0, block_size - ctx->gcm_remainder_len); /* XOR with counter block */ @@ -309,8 +314,8 @@ gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, counterp = (uint8_t *)ctx->gcm_tmp; /* authentication tag */ - bzero((uint8_t *)ctx->gcm_tmp, block_size); - bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len); + memset((uint8_t *)ctx->gcm_tmp, 0, block_size); + memcpy((uint8_t *)ctx->gcm_tmp, datap, ctx->gcm_remainder_len); /* add ciphertext to the hash */ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); @@ -324,7 +329,6 @@ gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, } } -/* ARGSUSED */ int gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, crypto_data_t *out, size_t block_size, @@ -332,6 +336,8 @@ gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { + (void) out, (void) block_size, (void) encrypt_block, (void) copy_block, + (void) xor_block; size_t new_len; uint8_t *new; @@ -341,17 +347,23 @@ gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, */ if (length > 0) { new_len = ctx->gcm_pt_buf_len + length; - new = vmem_alloc(new_len, ctx->gcm_kmflag); + new = vmem_alloc(new_len, KM_SLEEP); if (new == NULL) { vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); ctx->gcm_pt_buf = NULL; return (CRYPTO_HOST_MEMORY); } - bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len); - vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); + + if (ctx->gcm_pt_buf != NULL) { + memcpy(new, ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); + vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); + } else { + ASSERT0(ctx->gcm_pt_buf_len); + } + ctx->gcm_pt_buf = new; ctx->gcm_pt_buf_len = new_len; - bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len], + memcpy(&ctx->gcm_pt_buf[ctx->gcm_processed_data_len], data, length); ctx->gcm_processed_data_len += length; } @@ -390,7 +402,7 @@ gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, while (remainder > 0) { /* Incomplete last block */ if (remainder < block_size) { - bcopy(blockp, ctx->gcm_remainder, remainder); + memcpy(ctx->gcm_remainder, blockp, remainder); ctx->gcm_remainder_len = remainder; /* * not expecting anymore ciphertext, just @@ -431,7 +443,7 @@ out: xor_block((uint8_t *)ctx->gcm_J0, ghash); /* compare the input authentication tag with what we calculated */ - if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { + if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { /* They don't match */ return (CRYPTO_INVALID_MAC); } else { @@ -472,7 +484,7 @@ gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) } static void -gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, +gcm_format_initial_blocks(const uint8_t *iv, ulong_t iv_len, gcm_ctx_t *ctx, size_t block_size, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) @@ -488,7 +500,7 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, ghash = (uint8_t *)ctx->gcm_ghash; cb = (uint8_t *)ctx->gcm_cb; if (iv_len == 12) { - bcopy(iv, cb, 12); + memcpy(cb, iv, 12); cb[12] = 0; cb[13] = 0; cb[14] = 0; @@ -499,8 +511,8 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, /* GHASH the IV */ do { if (remainder < block_size) { - bzero(cb, block_size); - bcopy(&(iv[processed]), cb, remainder); + memset(cb, 0, block_size); + memcpy(cb, &(iv[processed]), remainder); datap = (uint8_t *)cb; remainder = 0; } else { @@ -521,8 +533,8 @@ gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, } static int -gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, - unsigned char *auth_data, size_t auth_data_len, size_t block_size, +gcm_init(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len, + const uint8_t *auth_data, size_t auth_data_len, size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) @@ -532,7 +544,7 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, size_t remainder, processed; /* encrypt zero block to get subkey H */ - bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); + memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, (uint8_t *)ctx->gcm_H); @@ -542,8 +554,8 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, gops = gcm_impl_get_ops(); authp = (uint8_t *)ctx->gcm_tmp; ghash = (uint8_t *)ctx->gcm_ghash; - bzero(authp, block_size); - bzero(ghash, block_size); + memset(authp, 0, block_size); + memset(ghash, 0, block_size); processed = 0; remainder = auth_data_len; @@ -553,8 +565,15 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, * There's not a block full of data, pad rest of * buffer with zero */ - bzero(authp, block_size); - bcopy(&(auth_data[processed]), authp, remainder); + + if (auth_data != NULL) { + memset(authp, 0, block_size); + memcpy(authp, &(auth_data[processed]), + remainder); + } else { + ASSERT0(remainder); + } + datap = (uint8_t *)authp; remainder = 0; } else { @@ -574,8 +593,6 @@ gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, /* * The following function is called at encrypt or decrypt init time * for AES GCM mode. - * - * Init the GCM context struct. Handle the cycle and avx implementations here. */ int gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, @@ -583,31 +600,75 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, void (*copy_block)(uint8_t *, uint8_t *), void (*xor_block)(uint8_t *, uint8_t *)) { - int rv; + return (gcm_init_ctx_impl(B_FALSE, gcm_ctx, param, block_size, + encrypt_block, copy_block, xor_block)); +} + +/* + * The following function is called at encrypt or decrypt init time + * for AES GMAC mode. + */ +int +gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ + return (gcm_init_ctx_impl(B_TRUE, gcm_ctx, param, block_size, + encrypt_block, copy_block, xor_block)); +} + +/* + * Init the GCM context struct. Handle the cycle and avx implementations here. + * Initialization of a GMAC context differs slightly from a GCM context. + */ +static inline int +gcm_init_ctx_impl(boolean_t gmac_mode, gcm_ctx_t *gcm_ctx, char *param, + size_t block_size, int (*encrypt_block)(const void *, const uint8_t *, + uint8_t *), void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ CK_AES_GCM_PARAMS *gcm_param; + int rv = CRYPTO_SUCCESS; + size_t tag_len, iv_len; if (param != NULL) { gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; - if ((rv = gcm_validate_args(gcm_param)) != 0) { - return (rv); - } + if (gmac_mode == B_FALSE) { + /* GCM mode. */ + if ((rv = gcm_validate_args(gcm_param)) != 0) { + return (rv); + } + gcm_ctx->gcm_flags |= GCM_MODE; - gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; - gcm_ctx->gcm_tag_len >>= 3; + size_t tbits = gcm_param->ulTagBits; + tag_len = CRYPTO_BITS2BYTES(tbits); + iv_len = gcm_param->ulIvLen; + } else { + /* GMAC mode. */ + gcm_ctx->gcm_flags |= GMAC_MODE; + tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); + iv_len = AES_GMAC_IV_LEN; + } + gcm_ctx->gcm_tag_len = tag_len; gcm_ctx->gcm_processed_data_len = 0; /* these values are in bits */ gcm_ctx->gcm_len_a_len_c[0] = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); - - rv = CRYPTO_SUCCESS; - gcm_ctx->gcm_flags |= GCM_MODE; } else { return (CRYPTO_MECHANISM_PARAM_INVALID); } + const uint8_t *iv = (const uint8_t *)gcm_param->pIv; + const uint8_t *aad = (const uint8_t *)gcm_param->pAAD; + size_t aad_len = gcm_param->ulAADLen; + #ifdef CAN_USE_GCM_ASM + boolean_t needs_bswap = + ((aes_key_t *)gcm_ctx->gcm_keysched)->ops->needs_byteswap; + if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; } else { @@ -616,96 +677,41 @@ gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, * non-avx contexts alternately. */ gcm_ctx->gcm_use_avx = gcm_toggle_avx(); - /* - * We don't handle byte swapped key schedules in the avx - * code path. - */ - aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; - if (ks->ops->needs_byteswap == B_TRUE) { + + /* The avx impl. doesn't handle byte swapped key schedules. */ + if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) { gcm_ctx->gcm_use_avx = B_FALSE; } - /* Use the MOVBE and the BSWAP variants alternately. */ - if (gcm_ctx->gcm_use_avx == B_TRUE && + /* + * If this is a GCM context, use the MOVBE and the BSWAP + * variants alternately. GMAC contexts code paths do not + * use the MOVBE instruction. + */ + if (gcm_ctx->gcm_use_avx == B_TRUE && gmac_mode == B_FALSE && zfs_movbe_available() == B_TRUE) { (void) atomic_toggle_boolean_nv( (volatile boolean_t *)&gcm_avx_can_use_movbe); } } - /* Allocate Htab memory as needed. */ - if (gcm_ctx->gcm_use_avx == B_TRUE) { - size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); - - if (htab_len == 0) { - return (CRYPTO_MECHANISM_PARAM_INVALID); - } - gcm_ctx->gcm_htab_len = htab_len; - gcm_ctx->gcm_Htable = - (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); - - if (gcm_ctx->gcm_Htable == NULL) { - return (CRYPTO_HOST_MEMORY); - } - } - /* Avx and non avx context initialization differs from here on. */ - if (gcm_ctx->gcm_use_avx == B_FALSE) { -#endif /* ifdef CAN_USE_GCM_ASM */ - if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, - gcm_param->pAAD, gcm_param->ulAADLen, block_size, - encrypt_block, copy_block, xor_block) != 0) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - } -#ifdef CAN_USE_GCM_ASM - } else { - if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, - gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { - rv = CRYPTO_MECHANISM_PARAM_INVALID; - } - } -#endif /* ifdef CAN_USE_GCM_ASM */ - - return (rv); -} - -int -gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, - int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), - void (*copy_block)(uint8_t *, uint8_t *), - void (*xor_block)(uint8_t *, uint8_t *)) -{ - int rv; - CK_AES_GMAC_PARAMS *gmac_param; - - if (param != NULL) { - gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; - - gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); - gcm_ctx->gcm_processed_data_len = 0; - - /* these values are in bits */ - gcm_ctx->gcm_len_a_len_c[0] - = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); - - rv = CRYPTO_SUCCESS; - gcm_ctx->gcm_flags |= GMAC_MODE; - } else { - return (CRYPTO_MECHANISM_PARAM_INVALID); - } - -#ifdef CAN_USE_GCM_ASM /* - * Handle the "cycle" implementation by creating avx and non avx - * contexts alternately. + * We don't handle byte swapped key schedules in the avx code path, + * still they could be created by the aes generic implementation. + * Make sure not to use them since we'll corrupt data if we do. */ - if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { - gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; - } else { - gcm_ctx->gcm_use_avx = gcm_toggle_avx(); - } - /* We don't handle byte swapped key schedules in the avx code path. */ - aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; - if (ks->ops->needs_byteswap == B_TRUE) { + if (gcm_ctx->gcm_use_avx == B_TRUE && needs_bswap == B_TRUE) { gcm_ctx->gcm_use_avx = B_FALSE; + + cmn_err_once(CE_WARN, + "ICP: Can't use the aes generic or cycle implementations " + "in combination with the gcm avx implementation!"); + cmn_err_once(CE_WARN, + "ICP: Falling back to a compatible implementation, " + "aes-gcm performance will likely be degraded."); + cmn_err_once(CE_WARN, + "ICP: Choose at least the x86_64 aes implementation to " + "restore performance."); } + /* Allocate Htab memory as needed. */ if (gcm_ctx->gcm_use_avx == B_TRUE) { size_t htab_len = gcm_simd_get_htab_size(gcm_ctx->gcm_use_avx); @@ -715,25 +721,23 @@ gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, } gcm_ctx->gcm_htab_len = htab_len; gcm_ctx->gcm_Htable = - (uint64_t *)kmem_alloc(htab_len, gcm_ctx->gcm_kmflag); + kmem_alloc(htab_len, KM_SLEEP); if (gcm_ctx->gcm_Htable == NULL) { return (CRYPTO_HOST_MEMORY); } } - /* Avx and non avx context initialization differs from here on. */ if (gcm_ctx->gcm_use_avx == B_FALSE) { -#endif /* ifdef CAN_USE_GCM_ASM */ - if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, - gmac_param->pAAD, gmac_param->ulAADLen, block_size, - encrypt_block, copy_block, xor_block) != 0) { +#endif /* ifdef CAN_USE_GCM_ASM */ + if (gcm_init(gcm_ctx, iv, iv_len, aad, aad_len, block_size, + encrypt_block, copy_block, xor_block) != CRYPTO_SUCCESS) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } #ifdef CAN_USE_GCM_ASM } else { - if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, - gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { + if (gcm_init_avx(gcm_ctx, iv, iv_len, aad, aad_len, + block_size) != CRYPTO_SUCCESS) { rv = CRYPTO_MECHANISM_PARAM_INVALID; } } @@ -766,19 +770,13 @@ gmac_alloc_ctx(int kmflag) return (gcm_ctx); } -void -gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag) -{ - ctx->gcm_kmflag = kmflag; -} - /* GCM implementation that contains the fastest methods */ static gcm_impl_ops_t gcm_fastest_impl = { .name = "fastest" }; /* All compiled in implementations */ -const gcm_impl_ops_t *gcm_all_impl[] = { +static const gcm_impl_ops_t *gcm_all_impl[] = { &gcm_generic_impl, #if defined(__x86_64) && defined(HAVE_PCLMULQDQ) &gcm_pclmulqdq_impl, @@ -798,7 +796,7 @@ static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; * fallback to the fastest generic implementation. */ const gcm_impl_ops_t * -gcm_impl_get_ops() +gcm_impl_get_ops(void) { if (!kfpu_allowed()) return (&gcm_generic_impl); @@ -899,7 +897,7 @@ gcm_impl_init(void) } static const struct { - char *name; + const char *name; uint32_t sel; } gcm_impl_opts[] = { { "cycle", IMPL_CYCLE }, @@ -1013,13 +1011,15 @@ icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) } #endif fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; - cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); + cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, + gcm_impl_opts[i].name); } /* list all supported implementations */ for (i = 0; i < gcm_supp_impl_cnt; i++) { fmt = (i == impl) ? "[%s] " : "%s "; - cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name); + cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, + gcm_supp_impl[i]->name); } return (cnt); @@ -1045,9 +1045,6 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); #define GCM_AVX_MAX_CHUNK_SIZE \ (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) -/* Get the chunk size module parameter. */ -#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size - /* Clear the FPU registers since they hold sensitive internal state. */ #define clear_fpu_regs() clear_fpu_regs_avx() #define GHASH_AVX(ctx, in, len) \ @@ -1056,6 +1053,9 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); #define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) +/* Get the chunk size module parameter. */ +#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size + /* * Module parameter: number of bytes to process at once while owning the FPU. * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is @@ -1064,19 +1064,19 @@ MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); static uint32_t gcm_avx_chunk_size = ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; -extern void clear_fpu_regs_avx(void); -extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); -extern void aes_encrypt_intel(const uint32_t rk[], int nr, +extern void ASMABI clear_fpu_regs_avx(void); +extern void ASMABI gcm_xor_avx(const uint8_t *src, uint8_t *dst); +extern void ASMABI aes_encrypt_intel(const uint32_t rk[], int nr, const uint32_t pt[4], uint32_t ct[4]); -extern void gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); -extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, +extern void ASMABI gcm_init_htab_avx(uint64_t *Htable, const uint64_t H[2]); +extern void ASMABI gcm_ghash_avx(uint64_t ghash[2], const uint64_t *Htable, const uint8_t *in, size_t len); -extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, +extern size_t ASMABI aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); -extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, +extern size_t ASMABI aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, const void *, uint64_t *, uint64_t *); static inline boolean_t @@ -1118,24 +1118,6 @@ gcm_simd_get_htab_size(boolean_t simd_mode) } } -/* - * Clear sensitive data in the context. - * - * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and - * ctx->gcm_Htable contain the hash sub key which protects authentication. - * - * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for - * a known plaintext attack, they consists of the IV and the first and last - * counter respectively. If they should be cleared is debatable. - */ -static inline void -gcm_clear_ctx(gcm_ctx_t *ctx) -{ - bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); - bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); - bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); - bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); -} /* Increment the GCM counter block by n. */ static inline void @@ -1171,6 +1153,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, int rv = CRYPTO_SUCCESS; ASSERT(block_size == GCM_BLOCK_LEN); + ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==, + B_FALSE); /* * If the last call left an incomplete block, try to fill * it first. @@ -1179,8 +1163,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, need = block_size - ctx->gcm_remainder_len; if (length < need) { /* Accumulate bytes here and return. */ - bcopy(datap, (uint8_t *)ctx->gcm_remainder + - ctx->gcm_remainder_len, length); + memcpy((uint8_t *)ctx->gcm_remainder + + ctx->gcm_remainder_len, datap, length); ctx->gcm_remainder_len += length; if (ctx->gcm_copy_to == NULL) { @@ -1189,8 +1173,8 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, return (CRYPTO_SUCCESS); } else { /* Complete incomplete block. */ - bcopy(datap, (uint8_t *)ctx->gcm_remainder + - ctx->gcm_remainder_len, need); + memcpy((uint8_t *)ctx->gcm_remainder + + ctx->gcm_remainder_len, datap, need); ctx->gcm_copy_to = NULL; } @@ -1198,7 +1182,7 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, /* Allocate a buffer to encrypt to if there is enough input. */ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { - ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); + ct_buf = vmem_alloc(chunk_size, KM_SLEEP); if (ct_buf == NULL) { return (CRYPTO_HOST_MEMORY); } @@ -1268,7 +1252,7 @@ gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ while (bleft > 0) { if (bleft < block_size) { - bcopy(datap, ctx->gcm_remainder, bleft); + memcpy(ctx->gcm_remainder, datap, bleft); ctx->gcm_remainder_len = bleft; ctx->gcm_copy_to = datap; goto out; @@ -1315,6 +1299,8 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) int rv; ASSERT(block_size == GCM_BLOCK_LEN); + ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==, + B_FALSE); if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { return (CRYPTO_DATA_LEN_RANGE); @@ -1327,7 +1313,7 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) const uint32_t *cb = (uint32_t *)ctx->gcm_cb; aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); - bzero(remainder + rem_len, block_size - rem_len); + memset(remainder + rem_len, 0, block_size - rem_len); for (int i = 0; i < rem_len; i++) { remainder[i] ^= tmp[i]; } @@ -1358,8 +1344,6 @@ gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) return (rv); out->cd_offset += ctx->gcm_tag_len; - /* Clear sensitive data in the context before returning. */ - gcm_clear_ctx(ctx); return (CRYPTO_SUCCESS); } @@ -1372,6 +1356,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) { ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); ASSERT3U(block_size, ==, 16); + ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==, + B_FALSE); size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; @@ -1423,8 +1409,8 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) if (bleft < block_size) { uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; - bzero(lastb, block_size); - bcopy(datap, lastb, bleft); + memset(lastb, 0, block_size); + memcpy(lastb, datap, bleft); /* The GCM processing. */ GHASH_AVX(ctx, lastb, block_size); aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); @@ -1460,7 +1446,7 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) kfpu_end(); /* Compare the input authentication tag with what we calculated. */ - if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { + if (memcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { /* They don't match. */ return (CRYPTO_INVALID_MAC); } @@ -1469,7 +1455,6 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) return (rv); } out->cd_offset += pt_len; - gcm_clear_ctx(ctx); return (CRYPTO_SUCCESS); } @@ -1478,22 +1463,24 @@ gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) * initial counter block. */ static int -gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, - unsigned char *auth_data, size_t auth_data_len, size_t block_size) +gcm_init_avx(gcm_ctx_t *ctx, const uint8_t *iv, size_t iv_len, + const uint8_t *auth_data, size_t auth_data_len, size_t block_size) { uint8_t *cb = (uint8_t *)ctx->gcm_cb; uint64_t *H = ctx->gcm_H; const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; - uint8_t *datap = auth_data; + const uint8_t *datap = auth_data; size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; size_t bleft; ASSERT(block_size == GCM_BLOCK_LEN); + ASSERT3S(((aes_key_t *)ctx->gcm_keysched)->ops->needs_byteswap, ==, + B_FALSE); /* Init H (encrypt zero block) and create the initial counter block. */ - bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); - bzero(H, sizeof (ctx->gcm_H)); + memset(ctx->gcm_ghash, 0, sizeof (ctx->gcm_ghash)); + memset(H, 0, sizeof (ctx->gcm_H)); kfpu_begin(); aes_encrypt_intel(keysched, aes_rounds, (const uint32_t *)H, (uint32_t *)H); @@ -1501,13 +1488,13 @@ gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, gcm_init_htab_avx(ctx->gcm_Htable, H); if (iv_len == 12) { - bcopy(iv, cb, 12); + memcpy(cb, iv, 12); cb[12] = 0; cb[13] = 0; cb[14] = 0; cb[15] = 1; /* We need the ICB later. */ - bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); + memcpy(ctx->gcm_J0, cb, sizeof (ctx->gcm_J0)); } else { /* * Most consumers use 12 byte IVs, so it's OK to use the @@ -1545,8 +1532,8 @@ gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, /* Zero pad and hash incomplete last block. */ uint8_t *authp = (uint8_t *)ctx->gcm_tmp; - bzero(authp, block_size); - bcopy(datap, authp, incomp); + memset(authp, 0, block_size); + memcpy(authp, datap, incomp); GHASH_AVX(ctx, authp, block_size); } } diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c index 16b57998a92f..84e26d09cdcf 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm_generic.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c index 05920115ce86..737d2e47ecb7 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm_pclmulqdq.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -26,9 +26,10 @@ #include <sys/types.h> #include <sys/simd.h> +#include <sys/asm_linkage.h> /* These functions are used to execute pclmulqdq based assembly methods */ -extern void gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *); +extern void ASMABI gcm_mul_pclmulqdq(uint64_t *, uint64_t *, uint64_t *); #include <modes/gcm_impl.h> diff --git a/sys/contrib/openzfs/module/icp/algs/modes/modes.c b/sys/contrib/openzfs/module/icp/algs/modes/modes.c index 59743c7d6829..6f6649b3b58b 100644 --- a/sys/contrib/openzfs/module/icp/algs/modes/modes.c +++ b/sys/contrib/openzfs/module/icp/algs/modes/modes.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -106,8 +106,10 @@ crypto_get_ptrs(crypto_data_t *out, void **iov_or_mp, offset_t *current_offset, } else { /* one block spans two iovecs */ *out_data_1_len = iov_len - offset; - if (vec_idx == zfs_uio_iovcnt(uio)) + if (vec_idx == zfs_uio_iovcnt(uio)) { + *out_data_2 = NULL; return; + } vec_idx++; zfs_uio_iov_at_index(uio, vec_idx, &iov_base, &iov_len); *out_data_2 = (uint8_t *)iov_base; @@ -148,18 +150,47 @@ crypto_free_mode_ctx(void *ctx) case GCM_MODE: case GMAC_MODE: - if (((gcm_ctx_t *)ctx)->gcm_pt_buf != NULL) - vmem_free(((gcm_ctx_t *)ctx)->gcm_pt_buf, - ((gcm_ctx_t *)ctx)->gcm_pt_buf_len); - -#ifdef CAN_USE_GCM_ASM - if (((gcm_ctx_t *)ctx)->gcm_Htable != NULL) { - gcm_ctx_t *gcm_ctx = (gcm_ctx_t *)ctx; - bzero(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len); - kmem_free(gcm_ctx->gcm_Htable, gcm_ctx->gcm_htab_len); - } -#endif - + gcm_clear_ctx((gcm_ctx_t *)ctx); kmem_free(ctx, sizeof (gcm_ctx_t)); } } + +static void * +explicit_memset(void *s, int c, size_t n) +{ + memset(s, c, n); + __asm__ __volatile__("" :: "r"(s) : "memory"); + return (s); +} + +/* + * Clear sensitive data in the context and free allocated memory. + * + * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and + * ctx->gcm_Htable contain the hash sub key which protects authentication. + * ctx->gcm_pt_buf contains the plaintext result of decryption. + * + * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for + * a known plaintext attack, they consist of the IV and the first and last + * counter respectively. If they should be cleared is debatable. + */ +void +gcm_clear_ctx(gcm_ctx_t *ctx) +{ + explicit_memset(ctx->gcm_remainder, 0, sizeof (ctx->gcm_remainder)); + explicit_memset(ctx->gcm_H, 0, sizeof (ctx->gcm_H)); +#if defined(CAN_USE_GCM_ASM) + if (ctx->gcm_use_avx == B_TRUE) { + ASSERT3P(ctx->gcm_Htable, !=, NULL); + memset(ctx->gcm_Htable, 0, ctx->gcm_htab_len); + kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); + } +#endif + if (ctx->gcm_pt_buf != NULL) { + memset(ctx->gcm_pt_buf, 0, ctx->gcm_pt_buf_len); + vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); + } + /* Optional */ + explicit_memset(ctx->gcm_J0, 0, sizeof (ctx->gcm_J0)); + explicit_memset(ctx->gcm_tmp, 0, sizeof (ctx->gcm_tmp)); +} diff --git a/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c b/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c deleted file mode 100644 index da34222c8fc3..000000000000 --- a/sys/contrib/openzfs/module/icp/algs/sha1/sha1.c +++ /dev/null @@ -1,835 +0,0 @@ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * The basic framework for this code came from the reference - * implementation for MD5. That implementation is Copyright (C) - * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. - * - * License to copy and use this software is granted provided that it - * is identified as the "RSA Data Security, Inc. MD5 Message-Digest - * Algorithm" in all material mentioning or referencing this software - * or this function. - * - * License is also granted to make and use derivative works provided - * that such works are identified as "derived from the RSA Data - * Security, Inc. MD5 Message-Digest Algorithm" in all material - * mentioning or referencing the derived work. - * - * RSA Data Security, Inc. makes no representations concerning either - * the merchantability of this software or the suitability of this - * software for any particular purpose. It is provided "as is" - * without express or implied warranty of any kind. - * - * These notices must be retained in any copies of any part of this - * documentation and/or software. - * - * NOTE: Cleaned-up and optimized, version of SHA1, based on the FIPS 180-1 - * standard, available at http://www.itl.nist.gov/fipspubs/fip180-1.htm - * Not as fast as one would like -- further optimizations are encouraged - * and appreciated. - */ - -#include <sys/zfs_context.h> -#include <sha1/sha1.h> -#include <sha1/sha1_consts.h> - -#ifdef _LITTLE_ENDIAN -#include <sys/byteorder.h> -#define HAVE_HTONL -#endif - -#define _RESTRICT_KYWD - -static void Encode(uint8_t *, const uint32_t *, size_t); - -#if defined(__sparc) - -#define SHA1_TRANSFORM(ctx, in) \ - SHA1Transform((ctx)->state[0], (ctx)->state[1], (ctx)->state[2], \ - (ctx)->state[3], (ctx)->state[4], (ctx), (in)) - -static void SHA1Transform(uint32_t, uint32_t, uint32_t, uint32_t, uint32_t, - SHA1_CTX *, const uint8_t *); - -#elif defined(__amd64) - -#define SHA1_TRANSFORM(ctx, in) sha1_block_data_order((ctx), (in), 1) -#define SHA1_TRANSFORM_BLOCKS(ctx, in, num) sha1_block_data_order((ctx), \ - (in), (num)) - -void sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t num_blocks); - -#else - -#define SHA1_TRANSFORM(ctx, in) SHA1Transform((ctx), (in)) - -static void SHA1Transform(SHA1_CTX *, const uint8_t *); - -#endif - - -static uint8_t PADDING[64] = { 0x80, /* all zeros */ }; - -/* - * F, G, and H are the basic SHA1 functions. - */ -#define F(b, c, d) (((b) & (c)) | ((~b) & (d))) -#define G(b, c, d) ((b) ^ (c) ^ (d)) -#define H(b, c, d) (((b) & (c)) | (((b)|(c)) & (d))) - -/* - * SHA1Init() - * - * purpose: initializes the sha1 context and begins and sha1 digest operation - * input: SHA1_CTX * : the context to initializes. - * output: void - */ - -void -SHA1Init(SHA1_CTX *ctx) -{ - ctx->count[0] = ctx->count[1] = 0; - - /* - * load magic initialization constants. Tell lint - * that these constants are unsigned by using U. - */ - - ctx->state[0] = 0x67452301U; - ctx->state[1] = 0xefcdab89U; - ctx->state[2] = 0x98badcfeU; - ctx->state[3] = 0x10325476U; - ctx->state[4] = 0xc3d2e1f0U; -} - -void -SHA1Update(SHA1_CTX *ctx, const void *inptr, size_t input_len) -{ - uint32_t i, buf_index, buf_len; - const uint8_t *input = inptr; -#if defined(__amd64) - uint32_t block_count; -#endif /* __amd64 */ - - /* check for noop */ - if (input_len == 0) - return; - - /* compute number of bytes mod 64 */ - buf_index = (ctx->count[1] >> 3) & 0x3F; - - /* update number of bits */ - if ((ctx->count[1] += (input_len << 3)) < (input_len << 3)) - ctx->count[0]++; - - ctx->count[0] += (input_len >> 29); - - buf_len = 64 - buf_index; - - /* transform as many times as possible */ - i = 0; - if (input_len >= buf_len) { - - /* - * general optimization: - * - * only do initial bcopy() and SHA1Transform() if - * buf_index != 0. if buf_index == 0, we're just - * wasting our time doing the bcopy() since there - * wasn't any data left over from a previous call to - * SHA1Update(). - */ - - if (buf_index) { - bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); - SHA1_TRANSFORM(ctx, ctx->buf_un.buf8); - i = buf_len; - } - -#if !defined(__amd64) - for (; i + 63 < input_len; i += 64) - SHA1_TRANSFORM(ctx, &input[i]); -#else - block_count = (input_len - i) >> 6; - if (block_count > 0) { - SHA1_TRANSFORM_BLOCKS(ctx, &input[i], block_count); - i += block_count << 6; - } -#endif /* !__amd64 */ - - /* - * general optimization: - * - * if i and input_len are the same, return now instead - * of calling bcopy(), since the bcopy() in this case - * will be an expensive nop. - */ - - if (input_len == i) - return; - - buf_index = 0; - } - - /* buffer remaining input */ - bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i); -} - -/* - * SHA1Final() - * - * purpose: ends an sha1 digest operation, finalizing the message digest and - * zeroing the context. - * input: uchar_t * : A buffer to store the digest. - * : The function actually uses void* because many - * : callers pass things other than uchar_t here. - * SHA1_CTX * : the context to finalize, save, and zero - * output: void - */ - -void -SHA1Final(void *digest, SHA1_CTX *ctx) -{ - uint8_t bitcount_be[sizeof (ctx->count)]; - uint32_t index = (ctx->count[1] >> 3) & 0x3f; - - /* store bit count, big endian */ - Encode(bitcount_be, ctx->count, sizeof (bitcount_be)); - - /* pad out to 56 mod 64 */ - SHA1Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index); - - /* append length (before padding) */ - SHA1Update(ctx, bitcount_be, sizeof (bitcount_be)); - - /* store state in digest */ - Encode(digest, ctx->state, sizeof (ctx->state)); - - /* zeroize sensitive information */ - bzero(ctx, sizeof (*ctx)); -} - - -#if !defined(__amd64) - -typedef uint32_t sha1word; - -/* - * sparc optimization: - * - * on the sparc, we can load big endian 32-bit data easily. note that - * special care must be taken to ensure the address is 32-bit aligned. - * in the interest of speed, we don't check to make sure, since - * careful programming can guarantee this for us. - */ - -#if defined(_ZFS_BIG_ENDIAN) -#define LOAD_BIG_32(addr) (*(uint32_t *)(addr)) - -#elif defined(HAVE_HTONL) -#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr))) - -#else -#define LOAD_BIG_32(addr) BE_32(*((uint32_t *)(addr))) -#endif /* _BIG_ENDIAN */ - -/* - * SHA1Transform() - */ -#if defined(W_ARRAY) -#define W(n) w[n] -#else /* !defined(W_ARRAY) */ -#define W(n) w_ ## n -#endif /* !defined(W_ARRAY) */ - -/* - * ROTATE_LEFT rotates x left n bits. - */ - -#if defined(__GNUC__) && defined(_LP64) -static __inline__ uint64_t -ROTATE_LEFT(uint64_t value, uint32_t n) -{ - uint32_t t32; - - t32 = (uint32_t)value; - return ((t32 << n) | (t32 >> (32 - n))); -} - -#else - -#define ROTATE_LEFT(x, n) \ - (((x) << (n)) | ((x) >> ((sizeof (x) * NBBY)-(n)))) - -#endif - -#if defined(__sparc) - - -/* - * sparc register window optimization: - * - * `a', `b', `c', `d', and `e' are passed into SHA1Transform - * explicitly since it increases the number of registers available to - * the compiler. under this scheme, these variables can be held in - * %i0 - %i4, which leaves more local and out registers available. - * - * purpose: sha1 transformation -- updates the digest based on `block' - * input: uint32_t : bytes 1 - 4 of the digest - * uint32_t : bytes 5 - 8 of the digest - * uint32_t : bytes 9 - 12 of the digest - * uint32_t : bytes 12 - 16 of the digest - * uint32_t : bytes 16 - 20 of the digest - * SHA1_CTX * : the context to update - * uint8_t [64]: the block to use to update the digest - * output: void - */ - - -void -SHA1Transform(uint32_t a, uint32_t b, uint32_t c, uint32_t d, uint32_t e, - SHA1_CTX *ctx, const uint8_t blk[64]) -{ - /* - * sparc optimization: - * - * while it is somewhat counter-intuitive, on sparc, it is - * more efficient to place all the constants used in this - * function in an array and load the values out of the array - * than to manually load the constants. this is because - * setting a register to a 32-bit value takes two ops in most - * cases: a `sethi' and an `or', but loading a 32-bit value - * from memory only takes one `ld' (or `lduw' on v9). while - * this increases memory usage, the compiler can find enough - * other things to do while waiting to keep the pipeline does - * not stall. additionally, it is likely that many of these - * constants are cached so that later accesses do not even go - * out to the bus. - * - * this array is declared `static' to keep the compiler from - * having to bcopy() this array onto the stack frame of - * SHA1Transform() each time it is called -- which is - * unacceptably expensive. - * - * the `const' is to ensure that callers are good citizens and - * do not try to munge the array. since these routines are - * going to be called from inside multithreaded kernelland, - * this is a good safety check. -- `sha1_consts' will end up in - * .rodata. - * - * unfortunately, loading from an array in this manner hurts - * performance under Intel. So, there is a macro, - * SHA1_CONST(), used in SHA1Transform(), that either expands to - * a reference to this array, or to the actual constant, - * depending on what platform this code is compiled for. - */ - - - static const uint32_t sha1_consts[] = { - SHA1_CONST_0, SHA1_CONST_1, SHA1_CONST_2, SHA1_CONST_3 - }; - - - /* - * general optimization: - * - * use individual integers instead of using an array. this is a - * win, although the amount it wins by seems to vary quite a bit. - */ - - - uint32_t w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7; - uint32_t w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; - - - /* - * sparc optimization: - * - * if `block' is already aligned on a 4-byte boundary, use - * LOAD_BIG_32() directly. otherwise, bcopy() into a - * buffer that *is* aligned on a 4-byte boundary and then do - * the LOAD_BIG_32() on that buffer. benchmarks have shown - * that using the bcopy() is better than loading the bytes - * individually and doing the endian-swap by hand. - * - * even though it's quite tempting to assign to do: - * - * blk = bcopy(ctx->buf_un.buf32, blk, sizeof (ctx->buf_un.buf32)); - * - * and only have one set of LOAD_BIG_32()'s, the compiler - * *does not* like that, so please resist the urge. - */ - - - if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */ - bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); - w_15 = LOAD_BIG_32(ctx->buf_un.buf32 + 15); - w_14 = LOAD_BIG_32(ctx->buf_un.buf32 + 14); - w_13 = LOAD_BIG_32(ctx->buf_un.buf32 + 13); - w_12 = LOAD_BIG_32(ctx->buf_un.buf32 + 12); - w_11 = LOAD_BIG_32(ctx->buf_un.buf32 + 11); - w_10 = LOAD_BIG_32(ctx->buf_un.buf32 + 10); - w_9 = LOAD_BIG_32(ctx->buf_un.buf32 + 9); - w_8 = LOAD_BIG_32(ctx->buf_un.buf32 + 8); - w_7 = LOAD_BIG_32(ctx->buf_un.buf32 + 7); - w_6 = LOAD_BIG_32(ctx->buf_un.buf32 + 6); - w_5 = LOAD_BIG_32(ctx->buf_un.buf32 + 5); - w_4 = LOAD_BIG_32(ctx->buf_un.buf32 + 4); - w_3 = LOAD_BIG_32(ctx->buf_un.buf32 + 3); - w_2 = LOAD_BIG_32(ctx->buf_un.buf32 + 2); - w_1 = LOAD_BIG_32(ctx->buf_un.buf32 + 1); - w_0 = LOAD_BIG_32(ctx->buf_un.buf32 + 0); - } else { - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_15 = LOAD_BIG_32(blk + 60); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_14 = LOAD_BIG_32(blk + 56); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_13 = LOAD_BIG_32(blk + 52); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_12 = LOAD_BIG_32(blk + 48); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_11 = LOAD_BIG_32(blk + 44); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_10 = LOAD_BIG_32(blk + 40); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_9 = LOAD_BIG_32(blk + 36); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_8 = LOAD_BIG_32(blk + 32); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_7 = LOAD_BIG_32(blk + 28); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_6 = LOAD_BIG_32(blk + 24); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_5 = LOAD_BIG_32(blk + 20); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_4 = LOAD_BIG_32(blk + 16); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_3 = LOAD_BIG_32(blk + 12); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_2 = LOAD_BIG_32(blk + 8); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_1 = LOAD_BIG_32(blk + 4); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w_0 = LOAD_BIG_32(blk + 0); - } -#else /* !defined(__sparc) */ - -void /* CSTYLED */ -SHA1Transform(SHA1_CTX *ctx, const uint8_t blk[64]) -{ - /* CSTYLED */ - sha1word a = ctx->state[0]; - sha1word b = ctx->state[1]; - sha1word c = ctx->state[2]; - sha1word d = ctx->state[3]; - sha1word e = ctx->state[4]; - -#if defined(W_ARRAY) - sha1word w[16]; -#else /* !defined(W_ARRAY) */ - sha1word w_0, w_1, w_2, w_3, w_4, w_5, w_6, w_7; - sha1word w_8, w_9, w_10, w_11, w_12, w_13, w_14, w_15; -#endif /* !defined(W_ARRAY) */ - - W(0) = LOAD_BIG_32((void *)(blk + 0)); - W(1) = LOAD_BIG_32((void *)(blk + 4)); - W(2) = LOAD_BIG_32((void *)(blk + 8)); - W(3) = LOAD_BIG_32((void *)(blk + 12)); - W(4) = LOAD_BIG_32((void *)(blk + 16)); - W(5) = LOAD_BIG_32((void *)(blk + 20)); - W(6) = LOAD_BIG_32((void *)(blk + 24)); - W(7) = LOAD_BIG_32((void *)(blk + 28)); - W(8) = LOAD_BIG_32((void *)(blk + 32)); - W(9) = LOAD_BIG_32((void *)(blk + 36)); - W(10) = LOAD_BIG_32((void *)(blk + 40)); - W(11) = LOAD_BIG_32((void *)(blk + 44)); - W(12) = LOAD_BIG_32((void *)(blk + 48)); - W(13) = LOAD_BIG_32((void *)(blk + 52)); - W(14) = LOAD_BIG_32((void *)(blk + 56)); - W(15) = LOAD_BIG_32((void *)(blk + 60)); - -#endif /* !defined(__sparc) */ - - /* - * general optimization: - * - * even though this approach is described in the standard as - * being slower algorithmically, it is 30-40% faster than the - * "faster" version under SPARC, because this version has more - * of the constraints specified at compile-time and uses fewer - * variables (and therefore has better register utilization) - * than its "speedier" brother. (i've tried both, trust me) - * - * for either method given in the spec, there is an "assignment" - * phase where the following takes place: - * - * tmp = (main_computation); - * e = d; d = c; c = rotate_left(b, 30); b = a; a = tmp; - * - * we can make the algorithm go faster by not doing this work, - * but just pretending that `d' is now `e', etc. this works - * really well and obviates the need for a temporary variable. - * however, we still explicitly perform the rotate action, - * since it is cheaper on SPARC to do it once than to have to - * do it over and over again. - */ - - /* round 1 */ - e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(0) + SHA1_CONST(0); /* 0 */ - b = ROTATE_LEFT(b, 30); - - d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(1) + SHA1_CONST(0); /* 1 */ - a = ROTATE_LEFT(a, 30); - - c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(2) + SHA1_CONST(0); /* 2 */ - e = ROTATE_LEFT(e, 30); - - b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(3) + SHA1_CONST(0); /* 3 */ - d = ROTATE_LEFT(d, 30); - - a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(4) + SHA1_CONST(0); /* 4 */ - c = ROTATE_LEFT(c, 30); - - e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(5) + SHA1_CONST(0); /* 5 */ - b = ROTATE_LEFT(b, 30); - - d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(6) + SHA1_CONST(0); /* 6 */ - a = ROTATE_LEFT(a, 30); - - c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(7) + SHA1_CONST(0); /* 7 */ - e = ROTATE_LEFT(e, 30); - - b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(8) + SHA1_CONST(0); /* 8 */ - d = ROTATE_LEFT(d, 30); - - a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(9) + SHA1_CONST(0); /* 9 */ - c = ROTATE_LEFT(c, 30); - - e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(10) + SHA1_CONST(0); /* 10 */ - b = ROTATE_LEFT(b, 30); - - d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(11) + SHA1_CONST(0); /* 11 */ - a = ROTATE_LEFT(a, 30); - - c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(12) + SHA1_CONST(0); /* 12 */ - e = ROTATE_LEFT(e, 30); - - b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(13) + SHA1_CONST(0); /* 13 */ - d = ROTATE_LEFT(d, 30); - - a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(14) + SHA1_CONST(0); /* 14 */ - c = ROTATE_LEFT(c, 30); - - e = ROTATE_LEFT(a, 5) + F(b, c, d) + e + W(15) + SHA1_CONST(0); /* 15 */ - b = ROTATE_LEFT(b, 30); - - W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 16 */ - d = ROTATE_LEFT(e, 5) + F(a, b, c) + d + W(0) + SHA1_CONST(0); - a = ROTATE_LEFT(a, 30); - - W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 17 */ - c = ROTATE_LEFT(d, 5) + F(e, a, b) + c + W(1) + SHA1_CONST(0); - e = ROTATE_LEFT(e, 30); - - W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 18 */ - b = ROTATE_LEFT(c, 5) + F(d, e, a) + b + W(2) + SHA1_CONST(0); - d = ROTATE_LEFT(d, 30); - - W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 19 */ - a = ROTATE_LEFT(b, 5) + F(c, d, e) + a + W(3) + SHA1_CONST(0); - c = ROTATE_LEFT(c, 30); - - /* round 2 */ - W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 20 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(4) + SHA1_CONST(1); - b = ROTATE_LEFT(b, 30); - - W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 21 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(5) + SHA1_CONST(1); - a = ROTATE_LEFT(a, 30); - - W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 22 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(6) + SHA1_CONST(1); - e = ROTATE_LEFT(e, 30); - - W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 23 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(7) + SHA1_CONST(1); - d = ROTATE_LEFT(d, 30); - - W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 24 */ - a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(8) + SHA1_CONST(1); - c = ROTATE_LEFT(c, 30); - - W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 25 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(9) + SHA1_CONST(1); - b = ROTATE_LEFT(b, 30); - - W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 26 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(10) + SHA1_CONST(1); - a = ROTATE_LEFT(a, 30); - - W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 27 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(11) + SHA1_CONST(1); - e = ROTATE_LEFT(e, 30); - - W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 28 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(12) + SHA1_CONST(1); - d = ROTATE_LEFT(d, 30); - - W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 29 */ - a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(13) + SHA1_CONST(1); - c = ROTATE_LEFT(c, 30); - - W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 30 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(14) + SHA1_CONST(1); - b = ROTATE_LEFT(b, 30); - - W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 31 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(15) + SHA1_CONST(1); - a = ROTATE_LEFT(a, 30); - - W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 32 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(0) + SHA1_CONST(1); - e = ROTATE_LEFT(e, 30); - - W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 33 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(1) + SHA1_CONST(1); - d = ROTATE_LEFT(d, 30); - - W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 34 */ - a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(2) + SHA1_CONST(1); - c = ROTATE_LEFT(c, 30); - - W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 35 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(3) + SHA1_CONST(1); - b = ROTATE_LEFT(b, 30); - - W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 36 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(4) + SHA1_CONST(1); - a = ROTATE_LEFT(a, 30); - - W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 37 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(5) + SHA1_CONST(1); - e = ROTATE_LEFT(e, 30); - - W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 38 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(6) + SHA1_CONST(1); - d = ROTATE_LEFT(d, 30); - - W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 39 */ - a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(7) + SHA1_CONST(1); - c = ROTATE_LEFT(c, 30); - - /* round 3 */ - W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 40 */ - e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(8) + SHA1_CONST(2); - b = ROTATE_LEFT(b, 30); - - W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 41 */ - d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(9) + SHA1_CONST(2); - a = ROTATE_LEFT(a, 30); - - W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 42 */ - c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(10) + SHA1_CONST(2); - e = ROTATE_LEFT(e, 30); - - W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 43 */ - b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(11) + SHA1_CONST(2); - d = ROTATE_LEFT(d, 30); - - W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 44 */ - a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(12) + SHA1_CONST(2); - c = ROTATE_LEFT(c, 30); - - W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 45 */ - e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(13) + SHA1_CONST(2); - b = ROTATE_LEFT(b, 30); - - W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 46 */ - d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(14) + SHA1_CONST(2); - a = ROTATE_LEFT(a, 30); - - W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 47 */ - c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(15) + SHA1_CONST(2); - e = ROTATE_LEFT(e, 30); - - W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 48 */ - b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(0) + SHA1_CONST(2); - d = ROTATE_LEFT(d, 30); - - W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 49 */ - a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(1) + SHA1_CONST(2); - c = ROTATE_LEFT(c, 30); - - W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 50 */ - e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(2) + SHA1_CONST(2); - b = ROTATE_LEFT(b, 30); - - W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 51 */ - d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(3) + SHA1_CONST(2); - a = ROTATE_LEFT(a, 30); - - W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 52 */ - c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(4) + SHA1_CONST(2); - e = ROTATE_LEFT(e, 30); - - W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 53 */ - b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(5) + SHA1_CONST(2); - d = ROTATE_LEFT(d, 30); - - W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 54 */ - a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(6) + SHA1_CONST(2); - c = ROTATE_LEFT(c, 30); - - W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 55 */ - e = ROTATE_LEFT(a, 5) + H(b, c, d) + e + W(7) + SHA1_CONST(2); - b = ROTATE_LEFT(b, 30); - - W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 56 */ - d = ROTATE_LEFT(e, 5) + H(a, b, c) + d + W(8) + SHA1_CONST(2); - a = ROTATE_LEFT(a, 30); - - W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 57 */ - c = ROTATE_LEFT(d, 5) + H(e, a, b) + c + W(9) + SHA1_CONST(2); - e = ROTATE_LEFT(e, 30); - - W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 58 */ - b = ROTATE_LEFT(c, 5) + H(d, e, a) + b + W(10) + SHA1_CONST(2); - d = ROTATE_LEFT(d, 30); - - W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 59 */ - a = ROTATE_LEFT(b, 5) + H(c, d, e) + a + W(11) + SHA1_CONST(2); - c = ROTATE_LEFT(c, 30); - - /* round 4 */ - W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 60 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(12) + SHA1_CONST(3); - b = ROTATE_LEFT(b, 30); - - W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 61 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(13) + SHA1_CONST(3); - a = ROTATE_LEFT(a, 30); - - W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 62 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(14) + SHA1_CONST(3); - e = ROTATE_LEFT(e, 30); - - W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 63 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(15) + SHA1_CONST(3); - d = ROTATE_LEFT(d, 30); - - W(0) = ROTATE_LEFT((W(13) ^ W(8) ^ W(2) ^ W(0)), 1); /* 64 */ - a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(0) + SHA1_CONST(3); - c = ROTATE_LEFT(c, 30); - - W(1) = ROTATE_LEFT((W(14) ^ W(9) ^ W(3) ^ W(1)), 1); /* 65 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(1) + SHA1_CONST(3); - b = ROTATE_LEFT(b, 30); - - W(2) = ROTATE_LEFT((W(15) ^ W(10) ^ W(4) ^ W(2)), 1); /* 66 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(2) + SHA1_CONST(3); - a = ROTATE_LEFT(a, 30); - - W(3) = ROTATE_LEFT((W(0) ^ W(11) ^ W(5) ^ W(3)), 1); /* 67 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(3) + SHA1_CONST(3); - e = ROTATE_LEFT(e, 30); - - W(4) = ROTATE_LEFT((W(1) ^ W(12) ^ W(6) ^ W(4)), 1); /* 68 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(4) + SHA1_CONST(3); - d = ROTATE_LEFT(d, 30); - - W(5) = ROTATE_LEFT((W(2) ^ W(13) ^ W(7) ^ W(5)), 1); /* 69 */ - a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(5) + SHA1_CONST(3); - c = ROTATE_LEFT(c, 30); - - W(6) = ROTATE_LEFT((W(3) ^ W(14) ^ W(8) ^ W(6)), 1); /* 70 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(6) + SHA1_CONST(3); - b = ROTATE_LEFT(b, 30); - - W(7) = ROTATE_LEFT((W(4) ^ W(15) ^ W(9) ^ W(7)), 1); /* 71 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(7) + SHA1_CONST(3); - a = ROTATE_LEFT(a, 30); - - W(8) = ROTATE_LEFT((W(5) ^ W(0) ^ W(10) ^ W(8)), 1); /* 72 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(8) + SHA1_CONST(3); - e = ROTATE_LEFT(e, 30); - - W(9) = ROTATE_LEFT((W(6) ^ W(1) ^ W(11) ^ W(9)), 1); /* 73 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(9) + SHA1_CONST(3); - d = ROTATE_LEFT(d, 30); - - W(10) = ROTATE_LEFT((W(7) ^ W(2) ^ W(12) ^ W(10)), 1); /* 74 */ - a = ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(10) + SHA1_CONST(3); - c = ROTATE_LEFT(c, 30); - - W(11) = ROTATE_LEFT((W(8) ^ W(3) ^ W(13) ^ W(11)), 1); /* 75 */ - e = ROTATE_LEFT(a, 5) + G(b, c, d) + e + W(11) + SHA1_CONST(3); - b = ROTATE_LEFT(b, 30); - - W(12) = ROTATE_LEFT((W(9) ^ W(4) ^ W(14) ^ W(12)), 1); /* 76 */ - d = ROTATE_LEFT(e, 5) + G(a, b, c) + d + W(12) + SHA1_CONST(3); - a = ROTATE_LEFT(a, 30); - - W(13) = ROTATE_LEFT((W(10) ^ W(5) ^ W(15) ^ W(13)), 1); /* 77 */ - c = ROTATE_LEFT(d, 5) + G(e, a, b) + c + W(13) + SHA1_CONST(3); - e = ROTATE_LEFT(e, 30); - - W(14) = ROTATE_LEFT((W(11) ^ W(6) ^ W(0) ^ W(14)), 1); /* 78 */ - b = ROTATE_LEFT(c, 5) + G(d, e, a) + b + W(14) + SHA1_CONST(3); - d = ROTATE_LEFT(d, 30); - - W(15) = ROTATE_LEFT((W(12) ^ W(7) ^ W(1) ^ W(15)), 1); /* 79 */ - - ctx->state[0] += ROTATE_LEFT(b, 5) + G(c, d, e) + a + W(15) + - SHA1_CONST(3); - ctx->state[1] += b; - ctx->state[2] += ROTATE_LEFT(c, 30); - ctx->state[3] += d; - ctx->state[4] += e; - - /* zeroize sensitive information */ - W(0) = W(1) = W(2) = W(3) = W(4) = W(5) = W(6) = W(7) = W(8) = 0; - W(9) = W(10) = W(11) = W(12) = W(13) = W(14) = W(15) = 0; -} -#endif /* !__amd64 */ - - -/* - * Encode() - * - * purpose: to convert a list of numbers from little endian to big endian - * input: uint8_t * : place to store the converted big endian numbers - * uint32_t * : place to get numbers to convert from - * size_t : the length of the input in bytes - * output: void - */ - -static void -Encode(uint8_t *_RESTRICT_KYWD output, const uint32_t *_RESTRICT_KYWD input, - size_t len) -{ - size_t i, j; - -#if defined(__sparc) - if (IS_P2ALIGNED(output, sizeof (uint32_t))) { - for (i = 0, j = 0; j < len; i++, j += 4) { - /* LINTED E_BAD_PTR_CAST_ALIGN */ - *((uint32_t *)(output + j)) = input[i]; - } - } else { -#endif /* little endian -- will work on big endian, but slowly */ - - for (i = 0, j = 0; j < len; i++, j += 4) { - output[j] = (input[i] >> 24) & 0xff; - output[j + 1] = (input[i] >> 16) & 0xff; - output[j + 2] = (input[i] >> 8) & 0xff; - output[j + 3] = input[i] & 0xff; - } -#if defined(__sparc) - } -#endif -} diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c deleted file mode 100644 index 75f6a3c1af4b..000000000000 --- a/sys/contrib/openzfs/module/icp/algs/sha2/sha2.c +++ /dev/null @@ -1,956 +0,0 @@ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ -/* - * Copyright 2013 Saso Kiselkov. All rights reserved. - */ - -/* - * The basic framework for this code came from the reference - * implementation for MD5. That implementation is Copyright (C) - * 1991-2, RSA Data Security, Inc. Created 1991. All rights reserved. - * - * License to copy and use this software is granted provided that it - * is identified as the "RSA Data Security, Inc. MD5 Message-Digest - * Algorithm" in all material mentioning or referencing this software - * or this function. - * - * License is also granted to make and use derivative works provided - * that such works are identified as "derived from the RSA Data - * Security, Inc. MD5 Message-Digest Algorithm" in all material - * mentioning or referencing the derived work. - * - * RSA Data Security, Inc. makes no representations concerning either - * the merchantability of this software or the suitability of this - * software for any particular purpose. It is provided "as is" - * without express or implied warranty of any kind. - * - * These notices must be retained in any copies of any part of this - * documentation and/or software. - * - * NOTE: Cleaned-up and optimized, version of SHA2, based on the FIPS 180-2 - * standard, available at - * http://csrc.nist.gov/publications/fips/fips180-2/fips180-2.pdf - * Not as fast as one would like -- further optimizations are encouraged - * and appreciated. - */ - -#include <sys/zfs_context.h> -#define _SHA2_IMPL -#include <sys/sha2.h> -#include <sha2/sha2_consts.h> - -#define _RESTRICT_KYWD - -#ifdef _ZFS_LITTLE_ENDIAN -#include <sys/byteorder.h> -#define HAVE_HTONL -#endif -#include <sys/isa_defs.h> /* for _ILP32 */ - -static void Encode(uint8_t *, uint32_t *, size_t); -static void Encode64(uint8_t *, uint64_t *, size_t); - -/* userspace only supports the generic version */ -#if defined(__amd64) && defined(_KERNEL) -#define SHA512Transform(ctx, in) SHA512TransformBlocks((ctx), (in), 1) -#define SHA256Transform(ctx, in) SHA256TransformBlocks((ctx), (in), 1) - -void SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); -void SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num); - -#else -static void SHA256Transform(SHA2_CTX *, const uint8_t *); -static void SHA512Transform(SHA2_CTX *, const uint8_t *); -#endif /* __amd64 && _KERNEL */ - -static uint8_t PADDING[128] = { 0x80, /* all zeros */ }; - -/* - * The low-level checksum routines use a lot of stack space. On systems where - * small stacks are enforced (like 32-bit kernel builds), insert compiler memory - * barriers to reduce stack frame size. This can reduce the SHA512Transform() - * stack frame usage from 3k to <1k on ARM32, for example. - */ -#if defined(_ILP32) || defined(__powerpc) /* small stack */ -#define SMALL_STACK_MEMORY_BARRIER asm volatile("": : :"memory"); -#else -#define SMALL_STACK_MEMORY_BARRIER -#endif - -/* Ch and Maj are the basic SHA2 functions. */ -#define Ch(b, c, d) (((b) & (c)) ^ ((~b) & (d))) -#define Maj(b, c, d) (((b) & (c)) ^ ((b) & (d)) ^ ((c) & (d))) - -/* Rotates x right n bits. */ -#define ROTR(x, n) \ - (((x) >> (n)) | ((x) << ((sizeof (x) * NBBY)-(n)))) - -/* Shift x right n bits */ -#define SHR(x, n) ((x) >> (n)) - -/* SHA256 Functions */ -#define BIGSIGMA0_256(x) (ROTR((x), 2) ^ ROTR((x), 13) ^ ROTR((x), 22)) -#define BIGSIGMA1_256(x) (ROTR((x), 6) ^ ROTR((x), 11) ^ ROTR((x), 25)) -#define SIGMA0_256(x) (ROTR((x), 7) ^ ROTR((x), 18) ^ SHR((x), 3)) -#define SIGMA1_256(x) (ROTR((x), 17) ^ ROTR((x), 19) ^ SHR((x), 10)) - -#define SHA256ROUND(a, b, c, d, e, f, g, h, i, w) \ - T1 = h + BIGSIGMA1_256(e) + Ch(e, f, g) + SHA256_CONST(i) + w; \ - d += T1; \ - T2 = BIGSIGMA0_256(a) + Maj(a, b, c); \ - h = T1 + T2 - -/* SHA384/512 Functions */ -#define BIGSIGMA0(x) (ROTR((x), 28) ^ ROTR((x), 34) ^ ROTR((x), 39)) -#define BIGSIGMA1(x) (ROTR((x), 14) ^ ROTR((x), 18) ^ ROTR((x), 41)) -#define SIGMA0(x) (ROTR((x), 1) ^ ROTR((x), 8) ^ SHR((x), 7)) -#define SIGMA1(x) (ROTR((x), 19) ^ ROTR((x), 61) ^ SHR((x), 6)) -#define SHA512ROUND(a, b, c, d, e, f, g, h, i, w) \ - T1 = h + BIGSIGMA1(e) + Ch(e, f, g) + SHA512_CONST(i) + w; \ - d += T1; \ - T2 = BIGSIGMA0(a) + Maj(a, b, c); \ - h = T1 + T2; \ - SMALL_STACK_MEMORY_BARRIER; - -/* - * sparc optimization: - * - * on the sparc, we can load big endian 32-bit data easily. note that - * special care must be taken to ensure the address is 32-bit aligned. - * in the interest of speed, we don't check to make sure, since - * careful programming can guarantee this for us. - */ - -#if defined(_ZFS_BIG_ENDIAN) -#define LOAD_BIG_32(addr) (*(uint32_t *)(addr)) -#define LOAD_BIG_64(addr) (*(uint64_t *)(addr)) - -#elif defined(HAVE_HTONL) -#define LOAD_BIG_32(addr) htonl(*((uint32_t *)(addr))) -#define LOAD_BIG_64(addr) htonll(*((uint64_t *)(addr))) - -#else -/* little endian -- will work on big endian, but slowly */ -#define LOAD_BIG_32(addr) \ - (((addr)[0] << 24) | ((addr)[1] << 16) | ((addr)[2] << 8) | (addr)[3]) -#define LOAD_BIG_64(addr) \ - (((uint64_t)(addr)[0] << 56) | ((uint64_t)(addr)[1] << 48) | \ - ((uint64_t)(addr)[2] << 40) | ((uint64_t)(addr)[3] << 32) | \ - ((uint64_t)(addr)[4] << 24) | ((uint64_t)(addr)[5] << 16) | \ - ((uint64_t)(addr)[6] << 8) | (uint64_t)(addr)[7]) -#endif /* _BIG_ENDIAN */ - - -#if !defined(__amd64) || !defined(_KERNEL) -/* SHA256 Transform */ - -static void -SHA256Transform(SHA2_CTX *ctx, const uint8_t *blk) -{ - uint32_t a = ctx->state.s32[0]; - uint32_t b = ctx->state.s32[1]; - uint32_t c = ctx->state.s32[2]; - uint32_t d = ctx->state.s32[3]; - uint32_t e = ctx->state.s32[4]; - uint32_t f = ctx->state.s32[5]; - uint32_t g = ctx->state.s32[6]; - uint32_t h = ctx->state.s32[7]; - - uint32_t w0, w1, w2, w3, w4, w5, w6, w7; - uint32_t w8, w9, w10, w11, w12, w13, w14, w15; - uint32_t T1, T2; - -#if defined(__sparc) - static const uint32_t sha256_consts[] = { - SHA256_CONST_0, SHA256_CONST_1, SHA256_CONST_2, - SHA256_CONST_3, SHA256_CONST_4, SHA256_CONST_5, - SHA256_CONST_6, SHA256_CONST_7, SHA256_CONST_8, - SHA256_CONST_9, SHA256_CONST_10, SHA256_CONST_11, - SHA256_CONST_12, SHA256_CONST_13, SHA256_CONST_14, - SHA256_CONST_15, SHA256_CONST_16, SHA256_CONST_17, - SHA256_CONST_18, SHA256_CONST_19, SHA256_CONST_20, - SHA256_CONST_21, SHA256_CONST_22, SHA256_CONST_23, - SHA256_CONST_24, SHA256_CONST_25, SHA256_CONST_26, - SHA256_CONST_27, SHA256_CONST_28, SHA256_CONST_29, - SHA256_CONST_30, SHA256_CONST_31, SHA256_CONST_32, - SHA256_CONST_33, SHA256_CONST_34, SHA256_CONST_35, - SHA256_CONST_36, SHA256_CONST_37, SHA256_CONST_38, - SHA256_CONST_39, SHA256_CONST_40, SHA256_CONST_41, - SHA256_CONST_42, SHA256_CONST_43, SHA256_CONST_44, - SHA256_CONST_45, SHA256_CONST_46, SHA256_CONST_47, - SHA256_CONST_48, SHA256_CONST_49, SHA256_CONST_50, - SHA256_CONST_51, SHA256_CONST_52, SHA256_CONST_53, - SHA256_CONST_54, SHA256_CONST_55, SHA256_CONST_56, - SHA256_CONST_57, SHA256_CONST_58, SHA256_CONST_59, - SHA256_CONST_60, SHA256_CONST_61, SHA256_CONST_62, - SHA256_CONST_63 - }; -#endif /* __sparc */ - - if ((uintptr_t)blk & 0x3) { /* not 4-byte aligned? */ - bcopy(blk, ctx->buf_un.buf32, sizeof (ctx->buf_un.buf32)); - blk = (uint8_t *)ctx->buf_un.buf32; - } - - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w0 = LOAD_BIG_32(blk + 4 * 0); - SHA256ROUND(a, b, c, d, e, f, g, h, 0, w0); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w1 = LOAD_BIG_32(blk + 4 * 1); - SHA256ROUND(h, a, b, c, d, e, f, g, 1, w1); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w2 = LOAD_BIG_32(blk + 4 * 2); - SHA256ROUND(g, h, a, b, c, d, e, f, 2, w2); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w3 = LOAD_BIG_32(blk + 4 * 3); - SHA256ROUND(f, g, h, a, b, c, d, e, 3, w3); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w4 = LOAD_BIG_32(blk + 4 * 4); - SHA256ROUND(e, f, g, h, a, b, c, d, 4, w4); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w5 = LOAD_BIG_32(blk + 4 * 5); - SHA256ROUND(d, e, f, g, h, a, b, c, 5, w5); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w6 = LOAD_BIG_32(blk + 4 * 6); - SHA256ROUND(c, d, e, f, g, h, a, b, 6, w6); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w7 = LOAD_BIG_32(blk + 4 * 7); - SHA256ROUND(b, c, d, e, f, g, h, a, 7, w7); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w8 = LOAD_BIG_32(blk + 4 * 8); - SHA256ROUND(a, b, c, d, e, f, g, h, 8, w8); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w9 = LOAD_BIG_32(blk + 4 * 9); - SHA256ROUND(h, a, b, c, d, e, f, g, 9, w9); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w10 = LOAD_BIG_32(blk + 4 * 10); - SHA256ROUND(g, h, a, b, c, d, e, f, 10, w10); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w11 = LOAD_BIG_32(blk + 4 * 11); - SHA256ROUND(f, g, h, a, b, c, d, e, 11, w11); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w12 = LOAD_BIG_32(blk + 4 * 12); - SHA256ROUND(e, f, g, h, a, b, c, d, 12, w12); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w13 = LOAD_BIG_32(blk + 4 * 13); - SHA256ROUND(d, e, f, g, h, a, b, c, 13, w13); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w14 = LOAD_BIG_32(blk + 4 * 14); - SHA256ROUND(c, d, e, f, g, h, a, b, 14, w14); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w15 = LOAD_BIG_32(blk + 4 * 15); - SHA256ROUND(b, c, d, e, f, g, h, a, 15, w15); - - w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0; - SHA256ROUND(a, b, c, d, e, f, g, h, 16, w0); - w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1; - SHA256ROUND(h, a, b, c, d, e, f, g, 17, w1); - w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2; - SHA256ROUND(g, h, a, b, c, d, e, f, 18, w2); - w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3; - SHA256ROUND(f, g, h, a, b, c, d, e, 19, w3); - w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4; - SHA256ROUND(e, f, g, h, a, b, c, d, 20, w4); - w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5; - SHA256ROUND(d, e, f, g, h, a, b, c, 21, w5); - w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6; - SHA256ROUND(c, d, e, f, g, h, a, b, 22, w6); - w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7; - SHA256ROUND(b, c, d, e, f, g, h, a, 23, w7); - w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8; - SHA256ROUND(a, b, c, d, e, f, g, h, 24, w8); - w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9; - SHA256ROUND(h, a, b, c, d, e, f, g, 25, w9); - w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10; - SHA256ROUND(g, h, a, b, c, d, e, f, 26, w10); - w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11; - SHA256ROUND(f, g, h, a, b, c, d, e, 27, w11); - w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12; - SHA256ROUND(e, f, g, h, a, b, c, d, 28, w12); - w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13; - SHA256ROUND(d, e, f, g, h, a, b, c, 29, w13); - w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14; - SHA256ROUND(c, d, e, f, g, h, a, b, 30, w14); - w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15; - SHA256ROUND(b, c, d, e, f, g, h, a, 31, w15); - - w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0; - SHA256ROUND(a, b, c, d, e, f, g, h, 32, w0); - w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1; - SHA256ROUND(h, a, b, c, d, e, f, g, 33, w1); - w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2; - SHA256ROUND(g, h, a, b, c, d, e, f, 34, w2); - w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3; - SHA256ROUND(f, g, h, a, b, c, d, e, 35, w3); - w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4; - SHA256ROUND(e, f, g, h, a, b, c, d, 36, w4); - w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5; - SHA256ROUND(d, e, f, g, h, a, b, c, 37, w5); - w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6; - SHA256ROUND(c, d, e, f, g, h, a, b, 38, w6); - w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7; - SHA256ROUND(b, c, d, e, f, g, h, a, 39, w7); - w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8; - SHA256ROUND(a, b, c, d, e, f, g, h, 40, w8); - w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9; - SHA256ROUND(h, a, b, c, d, e, f, g, 41, w9); - w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10; - SHA256ROUND(g, h, a, b, c, d, e, f, 42, w10); - w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11; - SHA256ROUND(f, g, h, a, b, c, d, e, 43, w11); - w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12; - SHA256ROUND(e, f, g, h, a, b, c, d, 44, w12); - w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13; - SHA256ROUND(d, e, f, g, h, a, b, c, 45, w13); - w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14; - SHA256ROUND(c, d, e, f, g, h, a, b, 46, w14); - w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15; - SHA256ROUND(b, c, d, e, f, g, h, a, 47, w15); - - w0 = SIGMA1_256(w14) + w9 + SIGMA0_256(w1) + w0; - SHA256ROUND(a, b, c, d, e, f, g, h, 48, w0); - w1 = SIGMA1_256(w15) + w10 + SIGMA0_256(w2) + w1; - SHA256ROUND(h, a, b, c, d, e, f, g, 49, w1); - w2 = SIGMA1_256(w0) + w11 + SIGMA0_256(w3) + w2; - SHA256ROUND(g, h, a, b, c, d, e, f, 50, w2); - w3 = SIGMA1_256(w1) + w12 + SIGMA0_256(w4) + w3; - SHA256ROUND(f, g, h, a, b, c, d, e, 51, w3); - w4 = SIGMA1_256(w2) + w13 + SIGMA0_256(w5) + w4; - SHA256ROUND(e, f, g, h, a, b, c, d, 52, w4); - w5 = SIGMA1_256(w3) + w14 + SIGMA0_256(w6) + w5; - SHA256ROUND(d, e, f, g, h, a, b, c, 53, w5); - w6 = SIGMA1_256(w4) + w15 + SIGMA0_256(w7) + w6; - SHA256ROUND(c, d, e, f, g, h, a, b, 54, w6); - w7 = SIGMA1_256(w5) + w0 + SIGMA0_256(w8) + w7; - SHA256ROUND(b, c, d, e, f, g, h, a, 55, w7); - w8 = SIGMA1_256(w6) + w1 + SIGMA0_256(w9) + w8; - SHA256ROUND(a, b, c, d, e, f, g, h, 56, w8); - w9 = SIGMA1_256(w7) + w2 + SIGMA0_256(w10) + w9; - SHA256ROUND(h, a, b, c, d, e, f, g, 57, w9); - w10 = SIGMA1_256(w8) + w3 + SIGMA0_256(w11) + w10; - SHA256ROUND(g, h, a, b, c, d, e, f, 58, w10); - w11 = SIGMA1_256(w9) + w4 + SIGMA0_256(w12) + w11; - SHA256ROUND(f, g, h, a, b, c, d, e, 59, w11); - w12 = SIGMA1_256(w10) + w5 + SIGMA0_256(w13) + w12; - SHA256ROUND(e, f, g, h, a, b, c, d, 60, w12); - w13 = SIGMA1_256(w11) + w6 + SIGMA0_256(w14) + w13; - SHA256ROUND(d, e, f, g, h, a, b, c, 61, w13); - w14 = SIGMA1_256(w12) + w7 + SIGMA0_256(w15) + w14; - SHA256ROUND(c, d, e, f, g, h, a, b, 62, w14); - w15 = SIGMA1_256(w13) + w8 + SIGMA0_256(w0) + w15; - SHA256ROUND(b, c, d, e, f, g, h, a, 63, w15); - - ctx->state.s32[0] += a; - ctx->state.s32[1] += b; - ctx->state.s32[2] += c; - ctx->state.s32[3] += d; - ctx->state.s32[4] += e; - ctx->state.s32[5] += f; - ctx->state.s32[6] += g; - ctx->state.s32[7] += h; -} - - -/* SHA384 and SHA512 Transform */ - -static void -SHA512Transform(SHA2_CTX *ctx, const uint8_t *blk) -{ - - uint64_t a = ctx->state.s64[0]; - uint64_t b = ctx->state.s64[1]; - uint64_t c = ctx->state.s64[2]; - uint64_t d = ctx->state.s64[3]; - uint64_t e = ctx->state.s64[4]; - uint64_t f = ctx->state.s64[5]; - uint64_t g = ctx->state.s64[6]; - uint64_t h = ctx->state.s64[7]; - - uint64_t w0, w1, w2, w3, w4, w5, w6, w7; - uint64_t w8, w9, w10, w11, w12, w13, w14, w15; - uint64_t T1, T2; - -#if defined(__sparc) - static const uint64_t sha512_consts[] = { - SHA512_CONST_0, SHA512_CONST_1, SHA512_CONST_2, - SHA512_CONST_3, SHA512_CONST_4, SHA512_CONST_5, - SHA512_CONST_6, SHA512_CONST_7, SHA512_CONST_8, - SHA512_CONST_9, SHA512_CONST_10, SHA512_CONST_11, - SHA512_CONST_12, SHA512_CONST_13, SHA512_CONST_14, - SHA512_CONST_15, SHA512_CONST_16, SHA512_CONST_17, - SHA512_CONST_18, SHA512_CONST_19, SHA512_CONST_20, - SHA512_CONST_21, SHA512_CONST_22, SHA512_CONST_23, - SHA512_CONST_24, SHA512_CONST_25, SHA512_CONST_26, - SHA512_CONST_27, SHA512_CONST_28, SHA512_CONST_29, - SHA512_CONST_30, SHA512_CONST_31, SHA512_CONST_32, - SHA512_CONST_33, SHA512_CONST_34, SHA512_CONST_35, - SHA512_CONST_36, SHA512_CONST_37, SHA512_CONST_38, - SHA512_CONST_39, SHA512_CONST_40, SHA512_CONST_41, - SHA512_CONST_42, SHA512_CONST_43, SHA512_CONST_44, - SHA512_CONST_45, SHA512_CONST_46, SHA512_CONST_47, - SHA512_CONST_48, SHA512_CONST_49, SHA512_CONST_50, - SHA512_CONST_51, SHA512_CONST_52, SHA512_CONST_53, - SHA512_CONST_54, SHA512_CONST_55, SHA512_CONST_56, - SHA512_CONST_57, SHA512_CONST_58, SHA512_CONST_59, - SHA512_CONST_60, SHA512_CONST_61, SHA512_CONST_62, - SHA512_CONST_63, SHA512_CONST_64, SHA512_CONST_65, - SHA512_CONST_66, SHA512_CONST_67, SHA512_CONST_68, - SHA512_CONST_69, SHA512_CONST_70, SHA512_CONST_71, - SHA512_CONST_72, SHA512_CONST_73, SHA512_CONST_74, - SHA512_CONST_75, SHA512_CONST_76, SHA512_CONST_77, - SHA512_CONST_78, SHA512_CONST_79 - }; -#endif /* __sparc */ - - - if ((uintptr_t)blk & 0x7) { /* not 8-byte aligned? */ - bcopy(blk, ctx->buf_un.buf64, sizeof (ctx->buf_un.buf64)); - blk = (uint8_t *)ctx->buf_un.buf64; - } - - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w0 = LOAD_BIG_64(blk + 8 * 0); - SHA512ROUND(a, b, c, d, e, f, g, h, 0, w0); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w1 = LOAD_BIG_64(blk + 8 * 1); - SHA512ROUND(h, a, b, c, d, e, f, g, 1, w1); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w2 = LOAD_BIG_64(blk + 8 * 2); - SHA512ROUND(g, h, a, b, c, d, e, f, 2, w2); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w3 = LOAD_BIG_64(blk + 8 * 3); - SHA512ROUND(f, g, h, a, b, c, d, e, 3, w3); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w4 = LOAD_BIG_64(blk + 8 * 4); - SHA512ROUND(e, f, g, h, a, b, c, d, 4, w4); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w5 = LOAD_BIG_64(blk + 8 * 5); - SHA512ROUND(d, e, f, g, h, a, b, c, 5, w5); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w6 = LOAD_BIG_64(blk + 8 * 6); - SHA512ROUND(c, d, e, f, g, h, a, b, 6, w6); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w7 = LOAD_BIG_64(blk + 8 * 7); - SHA512ROUND(b, c, d, e, f, g, h, a, 7, w7); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w8 = LOAD_BIG_64(blk + 8 * 8); - SHA512ROUND(a, b, c, d, e, f, g, h, 8, w8); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w9 = LOAD_BIG_64(blk + 8 * 9); - SHA512ROUND(h, a, b, c, d, e, f, g, 9, w9); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w10 = LOAD_BIG_64(blk + 8 * 10); - SHA512ROUND(g, h, a, b, c, d, e, f, 10, w10); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w11 = LOAD_BIG_64(blk + 8 * 11); - SHA512ROUND(f, g, h, a, b, c, d, e, 11, w11); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w12 = LOAD_BIG_64(blk + 8 * 12); - SHA512ROUND(e, f, g, h, a, b, c, d, 12, w12); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w13 = LOAD_BIG_64(blk + 8 * 13); - SHA512ROUND(d, e, f, g, h, a, b, c, 13, w13); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w14 = LOAD_BIG_64(blk + 8 * 14); - SHA512ROUND(c, d, e, f, g, h, a, b, 14, w14); - /* LINTED E_BAD_PTR_CAST_ALIGN */ - w15 = LOAD_BIG_64(blk + 8 * 15); - SHA512ROUND(b, c, d, e, f, g, h, a, 15, w15); - - w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; - SHA512ROUND(a, b, c, d, e, f, g, h, 16, w0); - w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; - SHA512ROUND(h, a, b, c, d, e, f, g, 17, w1); - w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; - SHA512ROUND(g, h, a, b, c, d, e, f, 18, w2); - w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; - SHA512ROUND(f, g, h, a, b, c, d, e, 19, w3); - w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; - SHA512ROUND(e, f, g, h, a, b, c, d, 20, w4); - w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; - SHA512ROUND(d, e, f, g, h, a, b, c, 21, w5); - w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; - SHA512ROUND(c, d, e, f, g, h, a, b, 22, w6); - w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; - SHA512ROUND(b, c, d, e, f, g, h, a, 23, w7); - w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; - SHA512ROUND(a, b, c, d, e, f, g, h, 24, w8); - w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; - SHA512ROUND(h, a, b, c, d, e, f, g, 25, w9); - w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; - SHA512ROUND(g, h, a, b, c, d, e, f, 26, w10); - w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; - SHA512ROUND(f, g, h, a, b, c, d, e, 27, w11); - w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; - SHA512ROUND(e, f, g, h, a, b, c, d, 28, w12); - w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; - SHA512ROUND(d, e, f, g, h, a, b, c, 29, w13); - w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; - SHA512ROUND(c, d, e, f, g, h, a, b, 30, w14); - w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; - SHA512ROUND(b, c, d, e, f, g, h, a, 31, w15); - - w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; - SHA512ROUND(a, b, c, d, e, f, g, h, 32, w0); - w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; - SHA512ROUND(h, a, b, c, d, e, f, g, 33, w1); - w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; - SHA512ROUND(g, h, a, b, c, d, e, f, 34, w2); - w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; - SHA512ROUND(f, g, h, a, b, c, d, e, 35, w3); - w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; - SHA512ROUND(e, f, g, h, a, b, c, d, 36, w4); - w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; - SHA512ROUND(d, e, f, g, h, a, b, c, 37, w5); - w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; - SHA512ROUND(c, d, e, f, g, h, a, b, 38, w6); - w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; - SHA512ROUND(b, c, d, e, f, g, h, a, 39, w7); - w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; - SHA512ROUND(a, b, c, d, e, f, g, h, 40, w8); - w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; - SHA512ROUND(h, a, b, c, d, e, f, g, 41, w9); - w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; - SHA512ROUND(g, h, a, b, c, d, e, f, 42, w10); - w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; - SHA512ROUND(f, g, h, a, b, c, d, e, 43, w11); - w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; - SHA512ROUND(e, f, g, h, a, b, c, d, 44, w12); - w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; - SHA512ROUND(d, e, f, g, h, a, b, c, 45, w13); - w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; - SHA512ROUND(c, d, e, f, g, h, a, b, 46, w14); - w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; - SHA512ROUND(b, c, d, e, f, g, h, a, 47, w15); - - w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; - SHA512ROUND(a, b, c, d, e, f, g, h, 48, w0); - w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; - SHA512ROUND(h, a, b, c, d, e, f, g, 49, w1); - w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; - SHA512ROUND(g, h, a, b, c, d, e, f, 50, w2); - w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; - SHA512ROUND(f, g, h, a, b, c, d, e, 51, w3); - w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; - SHA512ROUND(e, f, g, h, a, b, c, d, 52, w4); - w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; - SHA512ROUND(d, e, f, g, h, a, b, c, 53, w5); - w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; - SHA512ROUND(c, d, e, f, g, h, a, b, 54, w6); - w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; - SHA512ROUND(b, c, d, e, f, g, h, a, 55, w7); - w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; - SHA512ROUND(a, b, c, d, e, f, g, h, 56, w8); - w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; - SHA512ROUND(h, a, b, c, d, e, f, g, 57, w9); - w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; - SHA512ROUND(g, h, a, b, c, d, e, f, 58, w10); - w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; - SHA512ROUND(f, g, h, a, b, c, d, e, 59, w11); - w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; - SHA512ROUND(e, f, g, h, a, b, c, d, 60, w12); - w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; - SHA512ROUND(d, e, f, g, h, a, b, c, 61, w13); - w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; - SHA512ROUND(c, d, e, f, g, h, a, b, 62, w14); - w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; - SHA512ROUND(b, c, d, e, f, g, h, a, 63, w15); - - w0 = SIGMA1(w14) + w9 + SIGMA0(w1) + w0; - SHA512ROUND(a, b, c, d, e, f, g, h, 64, w0); - w1 = SIGMA1(w15) + w10 + SIGMA0(w2) + w1; - SHA512ROUND(h, a, b, c, d, e, f, g, 65, w1); - w2 = SIGMA1(w0) + w11 + SIGMA0(w3) + w2; - SHA512ROUND(g, h, a, b, c, d, e, f, 66, w2); - w3 = SIGMA1(w1) + w12 + SIGMA0(w4) + w3; - SHA512ROUND(f, g, h, a, b, c, d, e, 67, w3); - w4 = SIGMA1(w2) + w13 + SIGMA0(w5) + w4; - SHA512ROUND(e, f, g, h, a, b, c, d, 68, w4); - w5 = SIGMA1(w3) + w14 + SIGMA0(w6) + w5; - SHA512ROUND(d, e, f, g, h, a, b, c, 69, w5); - w6 = SIGMA1(w4) + w15 + SIGMA0(w7) + w6; - SHA512ROUND(c, d, e, f, g, h, a, b, 70, w6); - w7 = SIGMA1(w5) + w0 + SIGMA0(w8) + w7; - SHA512ROUND(b, c, d, e, f, g, h, a, 71, w7); - w8 = SIGMA1(w6) + w1 + SIGMA0(w9) + w8; - SHA512ROUND(a, b, c, d, e, f, g, h, 72, w8); - w9 = SIGMA1(w7) + w2 + SIGMA0(w10) + w9; - SHA512ROUND(h, a, b, c, d, e, f, g, 73, w9); - w10 = SIGMA1(w8) + w3 + SIGMA0(w11) + w10; - SHA512ROUND(g, h, a, b, c, d, e, f, 74, w10); - w11 = SIGMA1(w9) + w4 + SIGMA0(w12) + w11; - SHA512ROUND(f, g, h, a, b, c, d, e, 75, w11); - w12 = SIGMA1(w10) + w5 + SIGMA0(w13) + w12; - SHA512ROUND(e, f, g, h, a, b, c, d, 76, w12); - w13 = SIGMA1(w11) + w6 + SIGMA0(w14) + w13; - SHA512ROUND(d, e, f, g, h, a, b, c, 77, w13); - w14 = SIGMA1(w12) + w7 + SIGMA0(w15) + w14; - SHA512ROUND(c, d, e, f, g, h, a, b, 78, w14); - w15 = SIGMA1(w13) + w8 + SIGMA0(w0) + w15; - SHA512ROUND(b, c, d, e, f, g, h, a, 79, w15); - - ctx->state.s64[0] += a; - ctx->state.s64[1] += b; - ctx->state.s64[2] += c; - ctx->state.s64[3] += d; - ctx->state.s64[4] += e; - ctx->state.s64[5] += f; - ctx->state.s64[6] += g; - ctx->state.s64[7] += h; - -} -#endif /* !__amd64 || !_KERNEL */ - - -/* - * Encode() - * - * purpose: to convert a list of numbers from little endian to big endian - * input: uint8_t * : place to store the converted big endian numbers - * uint32_t * : place to get numbers to convert from - * size_t : the length of the input in bytes - * output: void - */ - -static void -Encode(uint8_t *_RESTRICT_KYWD output, uint32_t *_RESTRICT_KYWD input, - size_t len) -{ - size_t i, j; - -#if defined(__sparc) - if (IS_P2ALIGNED(output, sizeof (uint32_t))) { - for (i = 0, j = 0; j < len; i++, j += 4) { - /* LINTED E_BAD_PTR_CAST_ALIGN */ - *((uint32_t *)(output + j)) = input[i]; - } - } else { -#endif /* little endian -- will work on big endian, but slowly */ - for (i = 0, j = 0; j < len; i++, j += 4) { - output[j] = (input[i] >> 24) & 0xff; - output[j + 1] = (input[i] >> 16) & 0xff; - output[j + 2] = (input[i] >> 8) & 0xff; - output[j + 3] = input[i] & 0xff; - } -#if defined(__sparc) - } -#endif -} - -static void -Encode64(uint8_t *_RESTRICT_KYWD output, uint64_t *_RESTRICT_KYWD input, - size_t len) -{ - size_t i, j; - -#if defined(__sparc) - if (IS_P2ALIGNED(output, sizeof (uint64_t))) { - for (i = 0, j = 0; j < len; i++, j += 8) { - /* LINTED E_BAD_PTR_CAST_ALIGN */ - *((uint64_t *)(output + j)) = input[i]; - } - } else { -#endif /* little endian -- will work on big endian, but slowly */ - for (i = 0, j = 0; j < len; i++, j += 8) { - - output[j] = (input[i] >> 56) & 0xff; - output[j + 1] = (input[i] >> 48) & 0xff; - output[j + 2] = (input[i] >> 40) & 0xff; - output[j + 3] = (input[i] >> 32) & 0xff; - output[j + 4] = (input[i] >> 24) & 0xff; - output[j + 5] = (input[i] >> 16) & 0xff; - output[j + 6] = (input[i] >> 8) & 0xff; - output[j + 7] = input[i] & 0xff; - } -#if defined(__sparc) - } -#endif -} - - -void -SHA2Init(uint64_t mech, SHA2_CTX *ctx) -{ - - switch (mech) { - case SHA256_MECH_INFO_TYPE: - case SHA256_HMAC_MECH_INFO_TYPE: - case SHA256_HMAC_GEN_MECH_INFO_TYPE: - ctx->state.s32[0] = 0x6a09e667U; - ctx->state.s32[1] = 0xbb67ae85U; - ctx->state.s32[2] = 0x3c6ef372U; - ctx->state.s32[3] = 0xa54ff53aU; - ctx->state.s32[4] = 0x510e527fU; - ctx->state.s32[5] = 0x9b05688cU; - ctx->state.s32[6] = 0x1f83d9abU; - ctx->state.s32[7] = 0x5be0cd19U; - break; - case SHA384_MECH_INFO_TYPE: - case SHA384_HMAC_MECH_INFO_TYPE: - case SHA384_HMAC_GEN_MECH_INFO_TYPE: - ctx->state.s64[0] = 0xcbbb9d5dc1059ed8ULL; - ctx->state.s64[1] = 0x629a292a367cd507ULL; - ctx->state.s64[2] = 0x9159015a3070dd17ULL; - ctx->state.s64[3] = 0x152fecd8f70e5939ULL; - ctx->state.s64[4] = 0x67332667ffc00b31ULL; - ctx->state.s64[5] = 0x8eb44a8768581511ULL; - ctx->state.s64[6] = 0xdb0c2e0d64f98fa7ULL; - ctx->state.s64[7] = 0x47b5481dbefa4fa4ULL; - break; - case SHA512_MECH_INFO_TYPE: - case SHA512_HMAC_MECH_INFO_TYPE: - case SHA512_HMAC_GEN_MECH_INFO_TYPE: - ctx->state.s64[0] = 0x6a09e667f3bcc908ULL; - ctx->state.s64[1] = 0xbb67ae8584caa73bULL; - ctx->state.s64[2] = 0x3c6ef372fe94f82bULL; - ctx->state.s64[3] = 0xa54ff53a5f1d36f1ULL; - ctx->state.s64[4] = 0x510e527fade682d1ULL; - ctx->state.s64[5] = 0x9b05688c2b3e6c1fULL; - ctx->state.s64[6] = 0x1f83d9abfb41bd6bULL; - ctx->state.s64[7] = 0x5be0cd19137e2179ULL; - break; - case SHA512_224_MECH_INFO_TYPE: - ctx->state.s64[0] = 0x8C3D37C819544DA2ULL; - ctx->state.s64[1] = 0x73E1996689DCD4D6ULL; - ctx->state.s64[2] = 0x1DFAB7AE32FF9C82ULL; - ctx->state.s64[3] = 0x679DD514582F9FCFULL; - ctx->state.s64[4] = 0x0F6D2B697BD44DA8ULL; - ctx->state.s64[5] = 0x77E36F7304C48942ULL; - ctx->state.s64[6] = 0x3F9D85A86A1D36C8ULL; - ctx->state.s64[7] = 0x1112E6AD91D692A1ULL; - break; - case SHA512_256_MECH_INFO_TYPE: - ctx->state.s64[0] = 0x22312194FC2BF72CULL; - ctx->state.s64[1] = 0x9F555FA3C84C64C2ULL; - ctx->state.s64[2] = 0x2393B86B6F53B151ULL; - ctx->state.s64[3] = 0x963877195940EABDULL; - ctx->state.s64[4] = 0x96283EE2A88EFFE3ULL; - ctx->state.s64[5] = 0xBE5E1E2553863992ULL; - ctx->state.s64[6] = 0x2B0199FC2C85B8AAULL; - ctx->state.s64[7] = 0x0EB72DDC81C52CA2ULL; - break; -#ifdef _KERNEL - default: - cmn_err(CE_PANIC, - "sha2_init: failed to find a supported algorithm: 0x%x", - (uint32_t)mech); - -#endif /* _KERNEL */ - } - - ctx->algotype = (uint32_t)mech; - ctx->count.c64[0] = ctx->count.c64[1] = 0; -} - -#ifndef _KERNEL - -// #pragma inline(SHA256Init, SHA384Init, SHA512Init) -void -SHA256Init(SHA256_CTX *ctx) -{ - SHA2Init(SHA256, ctx); -} - -void -SHA384Init(SHA384_CTX *ctx) -{ - SHA2Init(SHA384, ctx); -} - -void -SHA512Init(SHA512_CTX *ctx) -{ - SHA2Init(SHA512, ctx); -} - -#endif /* _KERNEL */ - -/* - * SHA2Update() - * - * purpose: continues an sha2 digest operation, using the message block - * to update the context. - * input: SHA2_CTX * : the context to update - * void * : the message block - * size_t : the length of the message block, in bytes - * output: void - */ - -void -SHA2Update(SHA2_CTX *ctx, const void *inptr, size_t input_len) -{ - uint32_t i, buf_index, buf_len, buf_limit; - const uint8_t *input = inptr; - uint32_t algotype = ctx->algotype; - - /* check for noop */ - if (input_len == 0) - return; - - if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { - buf_limit = 64; - - /* compute number of bytes mod 64 */ - buf_index = (ctx->count.c32[1] >> 3) & 0x3F; - - /* update number of bits */ - if ((ctx->count.c32[1] += (input_len << 3)) < (input_len << 3)) - ctx->count.c32[0]++; - - ctx->count.c32[0] += (input_len >> 29); - - } else { - buf_limit = 128; - - /* compute number of bytes mod 128 */ - buf_index = (ctx->count.c64[1] >> 3) & 0x7F; - - /* update number of bits */ - if ((ctx->count.c64[1] += (input_len << 3)) < (input_len << 3)) - ctx->count.c64[0]++; - - ctx->count.c64[0] += (input_len >> 29); - } - - buf_len = buf_limit - buf_index; - - /* transform as many times as possible */ - i = 0; - if (input_len >= buf_len) { - - /* - * general optimization: - * - * only do initial bcopy() and SHA2Transform() if - * buf_index != 0. if buf_index == 0, we're just - * wasting our time doing the bcopy() since there - * wasn't any data left over from a previous call to - * SHA2Update(). - */ - if (buf_index) { - bcopy(input, &ctx->buf_un.buf8[buf_index], buf_len); - if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) - SHA256Transform(ctx, ctx->buf_un.buf8); - else - SHA512Transform(ctx, ctx->buf_un.buf8); - - i = buf_len; - } - -#if !defined(__amd64) || !defined(_KERNEL) - if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { - for (; i + buf_limit - 1 < input_len; i += buf_limit) { - SHA256Transform(ctx, &input[i]); - } - } else { - for (; i + buf_limit - 1 < input_len; i += buf_limit) { - SHA512Transform(ctx, &input[i]); - } - } - -#else - uint32_t block_count; - if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { - block_count = (input_len - i) >> 6; - if (block_count > 0) { - SHA256TransformBlocks(ctx, &input[i], - block_count); - i += block_count << 6; - } - } else { - block_count = (input_len - i) >> 7; - if (block_count > 0) { - SHA512TransformBlocks(ctx, &input[i], - block_count); - i += block_count << 7; - } - } -#endif /* !__amd64 || !_KERNEL */ - - /* - * general optimization: - * - * if i and input_len are the same, return now instead - * of calling bcopy(), since the bcopy() in this case - * will be an expensive noop. - */ - - if (input_len == i) - return; - - buf_index = 0; - } - - /* buffer remaining input */ - bcopy(&input[i], &ctx->buf_un.buf8[buf_index], input_len - i); -} - - -/* - * SHA2Final() - * - * purpose: ends an sha2 digest operation, finalizing the message digest and - * zeroing the context. - * input: uchar_t * : a buffer to store the digest - * : The function actually uses void* because many - * : callers pass things other than uchar_t here. - * SHA2_CTX * : the context to finalize, save, and zero - * output: void - */ - -void -SHA2Final(void *digest, SHA2_CTX *ctx) -{ - uint8_t bitcount_be[sizeof (ctx->count.c32)]; - uint8_t bitcount_be64[sizeof (ctx->count.c64)]; - uint32_t index; - uint32_t algotype = ctx->algotype; - - if (algotype <= SHA256_HMAC_GEN_MECH_INFO_TYPE) { - index = (ctx->count.c32[1] >> 3) & 0x3f; - Encode(bitcount_be, ctx->count.c32, sizeof (bitcount_be)); - SHA2Update(ctx, PADDING, ((index < 56) ? 56 : 120) - index); - SHA2Update(ctx, bitcount_be, sizeof (bitcount_be)); - Encode(digest, ctx->state.s32, sizeof (ctx->state.s32)); - } else { - index = (ctx->count.c64[1] >> 3) & 0x7f; - Encode64(bitcount_be64, ctx->count.c64, - sizeof (bitcount_be64)); - SHA2Update(ctx, PADDING, ((index < 112) ? 112 : 240) - index); - SHA2Update(ctx, bitcount_be64, sizeof (bitcount_be64)); - if (algotype <= SHA384_HMAC_GEN_MECH_INFO_TYPE) { - ctx->state.s64[6] = ctx->state.s64[7] = 0; - Encode64(digest, ctx->state.s64, - sizeof (uint64_t) * 6); - } else if (algotype == SHA512_224_MECH_INFO_TYPE) { - uint8_t last[sizeof (uint64_t)]; - /* - * Since SHA-512/224 doesn't align well to 64-bit - * boundaries, we must do the encoding in three steps: - * 1) encode the three 64-bit words that fit neatly - * 2) encode the last 64-bit word to a temp buffer - * 3) chop out the lower 32-bits from the temp buffer - * and append them to the digest - */ - Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 3); - Encode64(last, &ctx->state.s64[3], sizeof (uint64_t)); - bcopy(last, (uint8_t *)digest + 24, 4); - } else if (algotype == SHA512_256_MECH_INFO_TYPE) { - Encode64(digest, ctx->state.s64, sizeof (uint64_t) * 4); - } else { - Encode64(digest, ctx->state.s64, - sizeof (ctx->state.s64)); - } - } - - /* zeroize sensitive information */ - bzero(ctx, sizeof (*ctx)); -} - -#ifdef _KERNEL -EXPORT_SYMBOL(SHA2Init); -EXPORT_SYMBOL(SHA2Update); -EXPORT_SYMBOL(SHA2Final); -#endif diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c new file mode 100644 index 000000000000..0f24319511d7 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha256_impl.c @@ -0,0 +1,313 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#include <sys/simd.h> +#include <sys/zfs_context.h> +#include <sys/zfs_impl.h> +#include <sys/sha2.h> + +#include <sha2/sha2_impl.h> +#include <sys/asm_linkage.h> + +#define TF(E, N) \ + extern void ASMABI E(uint32_t s[8], const void *, size_t); \ + static inline void N(uint32_t s[8], const void *d, size_t b) { \ + kfpu_begin(); E(s, d, b); kfpu_end(); \ +} + +/* some implementation is always okay */ +static inline boolean_t sha2_is_supported(void) +{ + return (B_TRUE); +} + +#if defined(__x86_64) + +/* Users of ASMABI requires all calls to be from wrappers */ +extern void ASMABI +zfs_sha256_transform_x64(uint32_t s[8], const void *, size_t); + +static inline void +tf_sha256_transform_x64(uint32_t s[8], const void *d, size_t b) +{ + zfs_sha256_transform_x64(s, d, b); +} + +const sha256_ops_t sha256_x64_impl = { + .is_supported = sha2_is_supported, + .transform = tf_sha256_transform_x64, + .name = "x64" +}; + +#if defined(HAVE_SSSE3) +static boolean_t sha2_have_ssse3(void) +{ + return (kfpu_allowed() && zfs_ssse3_available()); +} + +TF(zfs_sha256_transform_ssse3, tf_sha256_ssse3); +const sha256_ops_t sha256_ssse3_impl = { + .is_supported = sha2_have_ssse3, + .transform = tf_sha256_ssse3, + .name = "ssse3" +}; +#endif + +#if defined(HAVE_AVX) +static boolean_t sha2_have_avx(void) +{ + return (kfpu_allowed() && zfs_avx_available()); +} + +TF(zfs_sha256_transform_avx, tf_sha256_avx); +const sha256_ops_t sha256_avx_impl = { + .is_supported = sha2_have_avx, + .transform = tf_sha256_avx, + .name = "avx" +}; +#endif + +#if defined(HAVE_AVX2) +static boolean_t sha2_have_avx2(void) +{ + return (kfpu_allowed() && zfs_avx2_available()); +} + +TF(zfs_sha256_transform_avx2, tf_sha256_avx2); +const sha256_ops_t sha256_avx2_impl = { + .is_supported = sha2_have_avx2, + .transform = tf_sha256_avx2, + .name = "avx2" +}; +#endif + +#if defined(HAVE_SSE4_1) +static boolean_t sha2_have_shani(void) +{ + return (kfpu_allowed() && zfs_sse4_1_available() && \ + zfs_shani_available()); +} + +TF(zfs_sha256_transform_shani, tf_sha256_shani); +const sha256_ops_t sha256_shani_impl = { + .is_supported = sha2_have_shani, + .transform = tf_sha256_shani, + .name = "shani" +}; +#endif + +#elif defined(__aarch64__) || defined(__arm__) +extern void zfs_sha256_block_armv7(uint32_t s[8], const void *, size_t); +const sha256_ops_t sha256_armv7_impl = { + .is_supported = sha2_is_supported, + .transform = zfs_sha256_block_armv7, + .name = "armv7" +}; + +#if __ARM_ARCH > 6 +static boolean_t sha256_have_neon(void) +{ + return (kfpu_allowed() && zfs_neon_available()); +} + +static boolean_t sha256_have_armv8ce(void) +{ + return (kfpu_allowed() && zfs_sha256_available()); +} + +TF(zfs_sha256_block_neon, tf_sha256_neon); +const sha256_ops_t sha256_neon_impl = { + .is_supported = sha256_have_neon, + .transform = tf_sha256_neon, + .name = "neon" +}; + +TF(zfs_sha256_block_armv8, tf_sha256_armv8ce); +const sha256_ops_t sha256_armv8_impl = { + .is_supported = sha256_have_armv8ce, + .transform = tf_sha256_armv8ce, + .name = "armv8-ce" +}; +#endif + +#elif defined(__PPC64__) +static boolean_t sha256_have_isa207(void) +{ + return (kfpu_allowed() && zfs_isa207_available()); +} + +TF(zfs_sha256_ppc, tf_sha256_ppc); +const sha256_ops_t sha256_ppc_impl = { + .is_supported = sha2_is_supported, + .transform = tf_sha256_ppc, + .name = "ppc" +}; + +TF(zfs_sha256_power8, tf_sha256_power8); +const sha256_ops_t sha256_power8_impl = { + .is_supported = sha256_have_isa207, + .transform = tf_sha256_power8, + .name = "power8" +}; +#endif /* __PPC64__ */ + +/* the two generic ones */ +extern const sha256_ops_t sha256_generic_impl; + +/* array with all sha256 implementations */ +static const sha256_ops_t *const sha256_impls[] = { + &sha256_generic_impl, +#if defined(__x86_64) + &sha256_x64_impl, +#endif +#if defined(__x86_64) && defined(HAVE_SSSE3) + &sha256_ssse3_impl, +#endif +#if defined(__x86_64) && defined(HAVE_AVX) + &sha256_avx_impl, +#endif +#if defined(__x86_64) && defined(HAVE_AVX2) + &sha256_avx2_impl, +#endif +#if defined(__x86_64) && defined(HAVE_SSE4_1) + &sha256_shani_impl, +#endif +#if defined(__aarch64__) || defined(__arm__) + &sha256_armv7_impl, +#if __ARM_ARCH > 6 + &sha256_neon_impl, + &sha256_armv8_impl, +#endif +#endif +#if defined(__PPC64__) + &sha256_ppc_impl, + &sha256_power8_impl, +#endif /* __PPC64__ */ +}; + +/* use the generic implementation functions */ +#define IMPL_NAME "sha256" +#define IMPL_OPS_T sha256_ops_t +#define IMPL_ARRAY sha256_impls +#define IMPL_GET_OPS sha256_get_ops +#define ZFS_IMPL_OPS zfs_sha256_ops +#include <generic_impl.c> + +#ifdef _KERNEL + +#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") + +#if defined(__linux__) + +static int +sha256_param_get(char *buffer, zfs_kernel_param_t *unused) +{ + const uint32_t impl = IMPL_READ(generic_impl_chosen); + char *fmt; + int cnt = 0; + + /* cycling */ + fmt = IMPL_FMT(impl, IMPL_CYCLE); + cnt += sprintf(buffer + cnt, fmt, "cycle"); + + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + cnt += sprintf(buffer + cnt, fmt, "fastest"); + + /* list all supported implementations */ + generic_impl_init(); + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + cnt += sprintf(buffer + cnt, fmt, + generic_supp_impls[i]->name); + } + + return (cnt); +} + +static int +sha256_param_set(const char *val, zfs_kernel_param_t *unused) +{ + (void) unused; + return (generic_impl_setname(val)); +} + +#elif defined(__FreeBSD__) + +#include <sys/sbuf.h> + +static int +sha256_param(ZFS_MODULE_PARAM_ARGS) +{ + int err; + + generic_impl_init(); + if (req->newptr == NULL) { + const uint32_t impl = IMPL_READ(generic_impl_chosen); + const int init_buflen = 64; + const char *fmt; + struct sbuf *s; + + s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); + + /* cycling */ + fmt = IMPL_FMT(impl, IMPL_CYCLE); + (void) sbuf_printf(s, fmt, "cycle"); + + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + (void) sbuf_printf(s, fmt, "fastest"); + + /* list all supported implementations */ + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name); + } + + err = sbuf_finish(s); + sbuf_delete(s); + + return (err); + } + + char buf[16]; + + err = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (err) { + return (err); + } + + return (-generic_impl_setname(buf)); +} +#endif + +#undef IMPL_FMT + +ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, sha256_impl, + sha256_param_set, sha256_param_get, ZMOD_RW, \ + "Select SHA256 implementation."); +#endif + +#undef TF diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c new file mode 100644 index 000000000000..60d7ad9a1dfa --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha2_generic.c @@ -0,0 +1,562 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on public domain code in cppcrypto 0.10. + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#include <sys/zfs_context.h> +#include <sys/zfs_impl.h> +#include <sys/sha2.h> + +#include <sha2/sha2_impl.h> + +/* + * On i386, gcc brings this for sha512_generic(): + * error: the frame size of 1040 bytes is larger than 1024 + */ +#if defined(__GNUC__) && defined(_ILP32) +#pragma GCC diagnostic ignored "-Wframe-larger-than=" +#endif + +/* SHA256 */ +static const uint32_t SHA256_K[64] = { + 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5, + 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5, + 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3, + 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174, + 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc, + 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da, + 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7, + 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967, + 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13, + 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85, + 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3, + 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070, + 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5, + 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3, + 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208, + 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 +}; + +#define Ch(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define Maj(x, y, z) (((y) & (z)) | (((y) | (z)) & (x))) + +#define rotr32(x, n) (((x) >> n) | ((x) << (32 - n))) +#define sum0(x) (rotr32((x), 2) ^ rotr32((x), 13) ^ rotr32((x), 22)) +#define sum1(x) (rotr32((x), 6) ^ rotr32((x), 11) ^ rotr32((x), 25)) +#define sigma0(x) (rotr32((x), 7) ^ rotr32((x), 18) ^ ((x) >> 3)) +#define sigma1(x) (rotr32((x), 17) ^ rotr32((x), 19) ^ ((x) >> 10)) + +#define WU(j) (W[j & 15] += sigma1(W[(j + 14) & 15]) \ + + W[(j + 9) & 15] + sigma0(W[(j + 1) & 15])) + +#define COMPRESS(i, j, K) \ + T1 = h + sum1(e) + Ch(e, f, g) + K[i + j] + (i? WU(j): W[j]); \ + T2 = sum0(a) + Maj(a, b, c); \ + h = g, g = f, f = e, e = d + T1; \ + d = c, c = b, b = a, a = T1 + T2; + +static void sha256_generic(uint32_t state[8], const void *data, size_t num_blks) +{ + uint64_t blk; + + for (blk = 0; blk < num_blks; blk++) { + uint32_t W[16]; + uint32_t a, b, c, d, e, f, g, h; + uint32_t T1, T2; + int i; + + for (i = 0; i < 16; i++) { + W[i] = BE_32( \ + (((const uint32_t *)(data))[blk * 16 + i])); + } + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; + + for (i = 0; i <= 63; i += 16) { + COMPRESS(i, 0, SHA256_K); + COMPRESS(i, 1, SHA256_K); + COMPRESS(i, 2, SHA256_K); + COMPRESS(i, 3, SHA256_K); + COMPRESS(i, 4, SHA256_K); + COMPRESS(i, 5, SHA256_K); + COMPRESS(i, 6, SHA256_K); + COMPRESS(i, 7, SHA256_K); + COMPRESS(i, 8, SHA256_K); + COMPRESS(i, 9, SHA256_K); + COMPRESS(i, 10, SHA256_K); + COMPRESS(i, 11, SHA256_K); + COMPRESS(i, 12, SHA256_K); + COMPRESS(i, 13, SHA256_K); + COMPRESS(i, 14, SHA256_K); + COMPRESS(i, 15, SHA256_K); + } + + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + } +} + +#undef sum0 +#undef sum1 +#undef sigma0 +#undef sigma1 + +#define rotr64(x, n) (((x) >> n) | ((x) << (64 - n))) +#define sum0(x) (rotr64((x), 28) ^ rotr64((x), 34) ^ rotr64((x), 39)) +#define sum1(x) (rotr64((x), 14) ^ rotr64((x), 18) ^ rotr64((x), 41)) +#define sigma0(x) (rotr64((x), 1) ^ rotr64((x), 8) ^ ((x) >> 7)) +#define sigma1(x) (rotr64((x), 19) ^ rotr64((x), 61) ^ ((x) >> 6)) + +/* SHA512 */ +static const uint64_t SHA512_K[80] = { + 0x428a2f98d728ae22, 0x7137449123ef65cd, 0xb5c0fbcfec4d3b2f, + 0xe9b5dba58189dbbc, 0x3956c25bf348b538, 0x59f111f1b605d019, + 0x923f82a4af194f9b, 0xab1c5ed5da6d8118, 0xd807aa98a3030242, + 0x12835b0145706fbe, 0x243185be4ee4b28c, 0x550c7dc3d5ffb4e2, + 0x72be5d74f27b896f, 0x80deb1fe3b1696b1, 0x9bdc06a725c71235, + 0xc19bf174cf692694, 0xe49b69c19ef14ad2, 0xefbe4786384f25e3, + 0x0fc19dc68b8cd5b5, 0x240ca1cc77ac9c65, 0x2de92c6f592b0275, + 0x4a7484aa6ea6e483, 0x5cb0a9dcbd41fbd4, 0x76f988da831153b5, + 0x983e5152ee66dfab, 0xa831c66d2db43210, 0xb00327c898fb213f, + 0xbf597fc7beef0ee4, 0xc6e00bf33da88fc2, 0xd5a79147930aa725, + 0x06ca6351e003826f, 0x142929670a0e6e70, 0x27b70a8546d22ffc, + 0x2e1b21385c26c926, 0x4d2c6dfc5ac42aed, 0x53380d139d95b3df, + 0x650a73548baf63de, 0x766a0abb3c77b2a8, 0x81c2c92e47edaee6, + 0x92722c851482353b, 0xa2bfe8a14cf10364, 0xa81a664bbc423001, + 0xc24b8b70d0f89791, 0xc76c51a30654be30, 0xd192e819d6ef5218, + 0xd69906245565a910, 0xf40e35855771202a, 0x106aa07032bbd1b8, + 0x19a4c116b8d2d0c8, 0x1e376c085141ab53, 0x2748774cdf8eeb99, + 0x34b0bcb5e19b48a8, 0x391c0cb3c5c95a63, 0x4ed8aa4ae3418acb, + 0x5b9cca4f7763e373, 0x682e6ff3d6b2b8a3, 0x748f82ee5defb2fc, + 0x78a5636f43172f60, 0x84c87814a1f0ab72, 0x8cc702081a6439ec, + 0x90befffa23631e28, 0xa4506cebde82bde9, 0xbef9a3f7b2c67915, + 0xc67178f2e372532b, 0xca273eceea26619c, 0xd186b8c721c0c207, + 0xeada7dd6cde0eb1e, 0xf57d4f7fee6ed178, 0x06f067aa72176fba, + 0x0a637dc5a2c898a6, 0x113f9804bef90dae, 0x1b710b35131c471b, + 0x28db77f523047d84, 0x32caab7b40c72493, 0x3c9ebe0a15c9bebc, + 0x431d67c49c100d4c, 0x4cc5d4becb3e42b6, 0x597f299cfc657e2a, + 0x5fcb6fab3ad6faec, 0x6c44198c4a475817 +}; + +static void sha512_generic(uint64_t state[8], const void *data, size_t num_blks) +{ + uint64_t blk; + + for (blk = 0; blk < num_blks; blk++) { + uint64_t W[16]; + uint64_t a, b, c, d, e, f, g, h; + uint64_t T1, T2; + int i; + + for (i = 0; i < 16; i++) { + W[i] = BE_64( \ + (((const uint64_t *)(data))[blk * 16 + i])); + } + + a = state[0]; + b = state[1]; + c = state[2]; + d = state[3]; + e = state[4]; + f = state[5]; + g = state[6]; + h = state[7]; + + for (i = 0; i <= 79; i += 16) { + COMPRESS(i, 0, SHA512_K); + COMPRESS(i, 1, SHA512_K); + COMPRESS(i, 2, SHA512_K); + COMPRESS(i, 3, SHA512_K); + COMPRESS(i, 4, SHA512_K); + COMPRESS(i, 5, SHA512_K); + COMPRESS(i, 6, SHA512_K); + COMPRESS(i, 7, SHA512_K); + COMPRESS(i, 8, SHA512_K); + COMPRESS(i, 9, SHA512_K); + COMPRESS(i, 10, SHA512_K); + COMPRESS(i, 11, SHA512_K); + COMPRESS(i, 12, SHA512_K); + COMPRESS(i, 13, SHA512_K); + COMPRESS(i, 14, SHA512_K); + COMPRESS(i, 15, SHA512_K); + } + state[0] += a; + state[1] += b; + state[2] += c; + state[3] += d; + state[4] += e; + state[5] += f; + state[6] += g; + state[7] += h; + } +} + +static void sha256_update(sha256_ctx *ctx, const uint8_t *data, size_t len) +{ + uint64_t pos = ctx->count[0]; + uint64_t total = ctx->count[1]; + uint8_t *m = ctx->wbuf; + const sha256_ops_t *ops = ctx->ops; + + if (pos && pos + len >= 64) { + memcpy(m + pos, data, 64 - pos); + ops->transform(ctx->state, m, 1); + len -= 64 - pos; + total += (64 - pos) * 8; + data += 64 - pos; + pos = 0; + } + + if (len >= 64) { + uint32_t blocks = len / 64; + uint32_t bytes = blocks * 64; + ops->transform(ctx->state, data, blocks); + len -= bytes; + total += (bytes) * 8; + data += bytes; + } + memcpy(m + pos, data, len); + + pos += len; + total += len * 8; + ctx->count[0] = pos; + ctx->count[1] = total; +} + +static void sha512_update(sha512_ctx *ctx, const uint8_t *data, size_t len) +{ + uint64_t pos = ctx->count[0]; + uint64_t total = ctx->count[1]; + uint8_t *m = ctx->wbuf; + const sha512_ops_t *ops = ctx->ops; + + if (pos && pos + len >= 128) { + memcpy(m + pos, data, 128 - pos); + ops->transform(ctx->state, m, 1); + len -= 128 - pos; + total += (128 - pos) * 8; + data += 128 - pos; + pos = 0; + } + + if (len >= 128) { + uint64_t blocks = len / 128; + uint64_t bytes = blocks * 128; + ops->transform(ctx->state, data, blocks); + len -= bytes; + total += (bytes) * 8; + data += bytes; + } + memcpy(m + pos, data, len); + + pos += len; + total += len * 8; + ctx->count[0] = pos; + ctx->count[1] = total; +} + +static void sha256_final(sha256_ctx *ctx, uint8_t *result, int bits) +{ + uint64_t mlen, pos = ctx->count[0]; + uint8_t *m = ctx->wbuf; + uint32_t *R = (uint32_t *)result; + const sha256_ops_t *ops = ctx->ops; + + m[pos++] = 0x80; + if (pos > 56) { + memset(m + pos, 0, 64 - pos); + ops->transform(ctx->state, m, 1); + pos = 0; + } + + memset(m + pos, 0, 64 - pos); + mlen = BE_64(ctx->count[1]); + memcpy(m + (64 - 8), &mlen, 64 / 8); + ops->transform(ctx->state, m, 1); + + switch (bits) { + case 224: /* 28 - unused currently /TR */ + R[0] = BE_32(ctx->state[0]); + R[1] = BE_32(ctx->state[1]); + R[2] = BE_32(ctx->state[2]); + R[3] = BE_32(ctx->state[3]); + R[4] = BE_32(ctx->state[4]); + R[5] = BE_32(ctx->state[5]); + R[6] = BE_32(ctx->state[6]); + break; + case 256: /* 32 */ + R[0] = BE_32(ctx->state[0]); + R[1] = BE_32(ctx->state[1]); + R[2] = BE_32(ctx->state[2]); + R[3] = BE_32(ctx->state[3]); + R[4] = BE_32(ctx->state[4]); + R[5] = BE_32(ctx->state[5]); + R[6] = BE_32(ctx->state[6]); + R[7] = BE_32(ctx->state[7]); + break; + } + + memset(ctx, 0, sizeof (*ctx)); +} + +static void sha512_final(sha512_ctx *ctx, uint8_t *result, int bits) +{ + uint64_t mlen, pos = ctx->count[0]; + uint8_t *m = ctx->wbuf, *r; + uint64_t *R = (uint64_t *)result; + const sha512_ops_t *ops = ctx->ops; + + m[pos++] = 0x80; + if (pos > 112) { + memset(m + pos, 0, 128 - pos); + ops->transform(ctx->state, m, 1); + pos = 0; + } + + memset(m + pos, 0, 128 - pos); + mlen = BE_64(ctx->count[1]); + memcpy(m + (128 - 8), &mlen, 64 / 8); + ops->transform(ctx->state, m, 1); + + switch (bits) { + case 224: /* 28 => 3,5 x 8 */ + r = result + 24; + R[0] = BE_64(ctx->state[0]); + R[1] = BE_64(ctx->state[1]); + R[2] = BE_64(ctx->state[2]); + /* last 4 bytes are special here */ + *r++ = (uint8_t)(ctx->state[3] >> 56); + *r++ = (uint8_t)(ctx->state[3] >> 48); + *r++ = (uint8_t)(ctx->state[3] >> 40); + *r++ = (uint8_t)(ctx->state[3] >> 32); + break; + case 256: /* 32 */ + R[0] = BE_64(ctx->state[0]); + R[1] = BE_64(ctx->state[1]); + R[2] = BE_64(ctx->state[2]); + R[3] = BE_64(ctx->state[3]); + break; + case 384: /* 48 */ + R[0] = BE_64(ctx->state[0]); + R[1] = BE_64(ctx->state[1]); + R[2] = BE_64(ctx->state[2]); + R[3] = BE_64(ctx->state[3]); + R[4] = BE_64(ctx->state[4]); + R[5] = BE_64(ctx->state[5]); + break; + case 512: /* 64 */ + R[0] = BE_64(ctx->state[0]); + R[1] = BE_64(ctx->state[1]); + R[2] = BE_64(ctx->state[2]); + R[3] = BE_64(ctx->state[3]); + R[4] = BE_64(ctx->state[4]); + R[5] = BE_64(ctx->state[5]); + R[6] = BE_64(ctx->state[6]); + R[7] = BE_64(ctx->state[7]); + break; + } + + memset(ctx, 0, sizeof (*ctx)); +} + +/* SHA2 Init function */ +void +SHA2Init(int algotype, SHA2_CTX *ctx) +{ + sha256_ctx *ctx256 = &ctx->sha256; + sha512_ctx *ctx512 = &ctx->sha512; + + ASSERT3S(algotype, >=, SHA256_MECH_INFO_TYPE); + ASSERT3S(algotype, <=, SHA512_256_MECH_INFO_TYPE); + + memset(ctx, 0, sizeof (*ctx)); + ctx->algotype = algotype; + switch (ctx->algotype) { + case SHA256_MECH_INFO_TYPE: + case SHA256_HMAC_MECH_INFO_TYPE: + case SHA256_HMAC_GEN_MECH_INFO_TYPE: + ctx256->state[0] = 0x6a09e667; + ctx256->state[1] = 0xbb67ae85; + ctx256->state[2] = 0x3c6ef372; + ctx256->state[3] = 0xa54ff53a; + ctx256->state[4] = 0x510e527f; + ctx256->state[5] = 0x9b05688c; + ctx256->state[6] = 0x1f83d9ab; + ctx256->state[7] = 0x5be0cd19; + ctx256->count[0] = 0; + ctx256->ops = sha256_get_ops(); + break; + case SHA384_MECH_INFO_TYPE: + case SHA384_HMAC_MECH_INFO_TYPE: + case SHA384_HMAC_GEN_MECH_INFO_TYPE: + ctx512->state[0] = 0xcbbb9d5dc1059ed8ULL; + ctx512->state[1] = 0x629a292a367cd507ULL; + ctx512->state[2] = 0x9159015a3070dd17ULL; + ctx512->state[3] = 0x152fecd8f70e5939ULL; + ctx512->state[4] = 0x67332667ffc00b31ULL; + ctx512->state[5] = 0x8eb44a8768581511ULL; + ctx512->state[6] = 0xdb0c2e0d64f98fa7ULL; + ctx512->state[7] = 0x47b5481dbefa4fa4ULL; + ctx512->count[0] = 0; + ctx512->count[1] = 0; + ctx512->ops = sha512_get_ops(); + break; + case SHA512_MECH_INFO_TYPE: + case SHA512_HMAC_MECH_INFO_TYPE: + case SHA512_HMAC_GEN_MECH_INFO_TYPE: + ctx512->state[0] = 0x6a09e667f3bcc908ULL; + ctx512->state[1] = 0xbb67ae8584caa73bULL; + ctx512->state[2] = 0x3c6ef372fe94f82bULL; + ctx512->state[3] = 0xa54ff53a5f1d36f1ULL; + ctx512->state[4] = 0x510e527fade682d1ULL; + ctx512->state[5] = 0x9b05688c2b3e6c1fULL; + ctx512->state[6] = 0x1f83d9abfb41bd6bULL; + ctx512->state[7] = 0x5be0cd19137e2179ULL; + ctx512->count[0] = 0; + ctx512->count[1] = 0; + ctx512->ops = sha512_get_ops(); + break; + case SHA512_224_MECH_INFO_TYPE: + ctx512->state[0] = 0x8c3d37c819544da2ULL; + ctx512->state[1] = 0x73e1996689dcd4d6ULL; + ctx512->state[2] = 0x1dfab7ae32ff9c82ULL; + ctx512->state[3] = 0x679dd514582f9fcfULL; + ctx512->state[4] = 0x0f6d2b697bd44da8ULL; + ctx512->state[5] = 0x77e36f7304c48942ULL; + ctx512->state[6] = 0x3f9d85a86a1d36c8ULL; + ctx512->state[7] = 0x1112e6ad91d692a1ULL; + ctx512->count[0] = 0; + ctx512->count[1] = 0; + ctx512->ops = sha512_get_ops(); + break; + case SHA512_256_MECH_INFO_TYPE: + ctx512->state[0] = 0x22312194fc2bf72cULL; + ctx512->state[1] = 0x9f555fa3c84c64c2ULL; + ctx512->state[2] = 0x2393b86b6f53b151ULL; + ctx512->state[3] = 0x963877195940eabdULL; + ctx512->state[4] = 0x96283ee2a88effe3ULL; + ctx512->state[5] = 0xbe5e1e2553863992ULL; + ctx512->state[6] = 0x2b0199fc2c85b8aaULL; + ctx512->state[7] = 0x0eb72ddc81c52ca2ULL; + ctx512->count[0] = 0; + ctx512->count[1] = 0; + ctx512->ops = sha512_get_ops(); + break; + } +} + +/* SHA2 Update function */ +void +SHA2Update(SHA2_CTX *ctx, const void *data, size_t len) +{ + /* check for zero input length */ + if (len == 0) + return; + + ASSERT3P(data, !=, NULL); + + switch (ctx->algotype) { + case SHA256_MECH_INFO_TYPE: + case SHA256_HMAC_MECH_INFO_TYPE: + case SHA256_HMAC_GEN_MECH_INFO_TYPE: + sha256_update(&ctx->sha256, data, len); + break; + case SHA384_MECH_INFO_TYPE: + case SHA384_HMAC_MECH_INFO_TYPE: + case SHA384_HMAC_GEN_MECH_INFO_TYPE: + sha512_update(&ctx->sha512, data, len); + break; + case SHA512_MECH_INFO_TYPE: + case SHA512_HMAC_MECH_INFO_TYPE: + case SHA512_HMAC_GEN_MECH_INFO_TYPE: + sha512_update(&ctx->sha512, data, len); + break; + case SHA512_224_MECH_INFO_TYPE: + sha512_update(&ctx->sha512, data, len); + break; + case SHA512_256_MECH_INFO_TYPE: + sha512_update(&ctx->sha512, data, len); + break; + } +} + +/* SHA2Final function */ +void +SHA2Final(void *digest, SHA2_CTX *ctx) +{ + switch (ctx->algotype) { + case SHA256_MECH_INFO_TYPE: + case SHA256_HMAC_MECH_INFO_TYPE: + case SHA256_HMAC_GEN_MECH_INFO_TYPE: + sha256_final(&ctx->sha256, digest, 256); + break; + case SHA384_MECH_INFO_TYPE: + case SHA384_HMAC_MECH_INFO_TYPE: + case SHA384_HMAC_GEN_MECH_INFO_TYPE: + sha512_final(&ctx->sha512, digest, 384); + break; + case SHA512_MECH_INFO_TYPE: + case SHA512_HMAC_MECH_INFO_TYPE: + case SHA512_HMAC_GEN_MECH_INFO_TYPE: + sha512_final(&ctx->sha512, digest, 512); + break; + case SHA512_224_MECH_INFO_TYPE: + sha512_final(&ctx->sha512, digest, 224); + break; + case SHA512_256_MECH_INFO_TYPE: + sha512_final(&ctx->sha512, digest, 256); + break; + } +} + +/* the generic implementation is always okay */ +static boolean_t sha2_is_supported(void) +{ + return (B_TRUE); +} + +const sha256_ops_t sha256_generic_impl = { + .name = "generic", + .transform = sha256_generic, + .is_supported = sha2_is_supported +}; + +const sha512_ops_t sha512_generic_impl = { + .name = "generic", + .transform = sha512_generic, + .is_supported = sha2_is_supported +}; diff --git a/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c new file mode 100644 index 000000000000..6291fbd77e36 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/sha2/sha512_impl.c @@ -0,0 +1,282 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#include <sys/simd.h> +#include <sys/zfs_context.h> +#include <sys/zfs_impl.h> +#include <sys/sha2.h> + +#include <sha2/sha2_impl.h> +#include <sys/asm_linkage.h> + +#define TF(E, N) \ + extern void ASMABI E(uint64_t s[8], const void *, size_t); \ + static inline void N(uint64_t s[8], const void *d, size_t b) { \ + kfpu_begin(); E(s, d, b); kfpu_end(); \ +} + +/* some implementation is always okay */ +static inline boolean_t sha2_is_supported(void) +{ + return (B_TRUE); +} + +#if defined(__x86_64) + +/* Users of ASMABI requires all calls to be from wrappers */ +extern void ASMABI +zfs_sha512_transform_x64(uint64_t s[8], const void *, size_t); + +static inline void +tf_sha512_transform_x64(uint64_t s[8], const void *d, size_t b) +{ + zfs_sha512_transform_x64(s, d, b); +} +const sha512_ops_t sha512_x64_impl = { + .is_supported = sha2_is_supported, + .transform = tf_sha512_transform_x64, + .name = "x64" +}; + +#if defined(HAVE_AVX) +static boolean_t sha2_have_avx(void) +{ + return (kfpu_allowed() && zfs_avx_available()); +} + +TF(zfs_sha512_transform_avx, tf_sha512_avx); +const sha512_ops_t sha512_avx_impl = { + .is_supported = sha2_have_avx, + .transform = tf_sha512_avx, + .name = "avx" +}; +#endif + +#if defined(HAVE_AVX2) +static boolean_t sha2_have_avx2(void) +{ + return (kfpu_allowed() && zfs_avx2_available()); +} + +TF(zfs_sha512_transform_avx2, tf_sha512_avx2); +const sha512_ops_t sha512_avx2_impl = { + .is_supported = sha2_have_avx2, + .transform = tf_sha512_avx2, + .name = "avx2" +}; +#endif + +#elif defined(__aarch64__) || defined(__arm__) +extern void zfs_sha512_block_armv7(uint64_t s[8], const void *, size_t); +const sha512_ops_t sha512_armv7_impl = { + .is_supported = sha2_is_supported, + .transform = zfs_sha512_block_armv7, + .name = "armv7" +}; + +#if defined(__aarch64__) +static boolean_t sha512_have_armv8ce(void) +{ + return (kfpu_allowed() && zfs_sha512_available()); +} + +TF(zfs_sha512_block_armv8, tf_sha512_armv8ce); +const sha512_ops_t sha512_armv8_impl = { + .is_supported = sha512_have_armv8ce, + .transform = tf_sha512_armv8ce, + .name = "armv8-ce" +}; +#endif + +#if defined(__arm__) && __ARM_ARCH > 6 +static boolean_t sha512_have_neon(void) +{ + return (kfpu_allowed() && zfs_neon_available()); +} + +TF(zfs_sha512_block_neon, tf_sha512_neon); +const sha512_ops_t sha512_neon_impl = { + .is_supported = sha512_have_neon, + .transform = tf_sha512_neon, + .name = "neon" +}; +#endif + +#elif defined(__PPC64__) +TF(zfs_sha512_ppc, tf_sha512_ppc); +const sha512_ops_t sha512_ppc_impl = { + .is_supported = sha2_is_supported, + .transform = tf_sha512_ppc, + .name = "ppc" +}; + +static boolean_t sha512_have_isa207(void) +{ + return (kfpu_allowed() && zfs_isa207_available()); +} + +TF(zfs_sha512_power8, tf_sha512_power8); +const sha512_ops_t sha512_power8_impl = { + .is_supported = sha512_have_isa207, + .transform = tf_sha512_power8, + .name = "power8" +}; +#endif /* __PPC64__ */ + +/* the two generic ones */ +extern const sha512_ops_t sha512_generic_impl; + +/* array with all sha512 implementations */ +static const sha512_ops_t *const sha512_impls[] = { + &sha512_generic_impl, +#if defined(__x86_64) + &sha512_x64_impl, +#endif +#if defined(__x86_64) && defined(HAVE_AVX) + &sha512_avx_impl, +#endif +#if defined(__x86_64) && defined(HAVE_AVX2) + &sha512_avx2_impl, +#endif +#if defined(__aarch64__) || defined(__arm__) + &sha512_armv7_impl, +#if defined(__aarch64__) + &sha512_armv8_impl, +#endif +#if defined(__arm__) && __ARM_ARCH > 6 + &sha512_neon_impl, +#endif +#endif +#if defined(__PPC64__) + &sha512_ppc_impl, + &sha512_power8_impl, +#endif /* __PPC64__ */ +}; + +/* use the generic implementation functions */ +#define IMPL_NAME "sha512" +#define IMPL_OPS_T sha512_ops_t +#define IMPL_ARRAY sha512_impls +#define IMPL_GET_OPS sha512_get_ops +#define ZFS_IMPL_OPS zfs_sha512_ops +#include <generic_impl.c> + +#ifdef _KERNEL + +#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ") + +#if defined(__linux__) + +static int +sha512_param_get(char *buffer, zfs_kernel_param_t *unused) +{ + const uint32_t impl = IMPL_READ(generic_impl_chosen); + char *fmt; + int cnt = 0; + + /* cycling */ + fmt = IMPL_FMT(impl, IMPL_CYCLE); + cnt += sprintf(buffer + cnt, fmt, "cycle"); + + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + cnt += sprintf(buffer + cnt, fmt, "fastest"); + + /* list all supported implementations */ + generic_impl_init(); + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + cnt += sprintf(buffer + cnt, fmt, + generic_supp_impls[i]->name); + } + + return (cnt); +} + +static int +sha512_param_set(const char *val, zfs_kernel_param_t *unused) +{ + (void) unused; + return (generic_impl_setname(val)); +} + +#elif defined(__FreeBSD__) + +#include <sys/sbuf.h> + +static int +sha512_param(ZFS_MODULE_PARAM_ARGS) +{ + int err; + + generic_impl_init(); + if (req->newptr == NULL) { + const uint32_t impl = IMPL_READ(generic_impl_chosen); + const int init_buflen = 64; + const char *fmt; + struct sbuf *s; + + s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req); + + /* cycling */ + fmt = IMPL_FMT(impl, IMPL_CYCLE); + (void) sbuf_printf(s, fmt, "cycle"); + + /* list fastest */ + fmt = IMPL_FMT(impl, IMPL_FASTEST); + (void) sbuf_printf(s, fmt, "fastest"); + + /* list all supported implementations */ + for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) { + fmt = IMPL_FMT(impl, i); + (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name); + } + + err = sbuf_finish(s); + sbuf_delete(s); + + return (err); + } + + /* we got module parameter */ + char buf[16]; + + err = sysctl_handle_string(oidp, buf, sizeof (buf), req); + if (err) { + return (err); + } + + return (-generic_impl_setname(buf)); +} +#endif + +#undef IMPL_FMT + +ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, sha512_impl, + sha512_param_set, sha512_param_get, ZMOD_RW, \ + "Select SHA512 implementation."); +#endif + +#undef TF diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein.c b/sys/contrib/openzfs/module/icp/algs/skein/skein.c index 83fe84260307..41ed2dd44e9e 100644 --- a/sys/contrib/openzfs/module/icp/algs/skein/skein.c +++ b/sys/contrib/openzfs/module/icp/algs/skein/skein.c @@ -26,16 +26,16 @@ Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) switch (hashBitLen) { /* use pre-computed values, where available */ #ifndef SKEIN_NO_PRECOMP case 256: - bcopy(SKEIN_256_IV_256, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_256_IV_256, sizeof (ctx->X)); break; case 224: - bcopy(SKEIN_256_IV_224, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_256_IV_224, sizeof (ctx->X)); break; case 160: - bcopy(SKEIN_256_IV_160, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_256_IV_160, sizeof (ctx->X)); break; case 128: - bcopy(SKEIN_256_IV_128, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_256_IV_128, sizeof (ctx->X)); break; #endif default: @@ -53,11 +53,11 @@ Skein_256_Init(Skein_256_Ctxt_t *ctx, size_t hashBitLen) cfg.w[1] = Skein_Swap64(hashBitLen); cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); /* zero pad config block */ - bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0])); + memset(&cfg.w[3], 0, sizeof (cfg) - 3 * sizeof (cfg.w[0])); /* compute the initial chaining values from config block */ /* zero the chaining variables */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); Skein_256_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); break; } @@ -91,7 +91,7 @@ Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, /* compute the initial chaining values ctx->X[], based on key */ if (keyBytes == 0) { /* is there a key? */ /* no key: use all zeroes as key for config block */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); } else { /* here to pre-process a key */ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X)); @@ -101,13 +101,13 @@ Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, /* set tweaks: T0 = 0; T1 = KEY type */ Skein_Start_New_Type(ctx, KEY); /* zero the initial chaining variables */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); /* hash the key */ (void) Skein_256_Update(ctx, key, keyBytes); /* put result into cfg.b[] */ (void) Skein_256_Final_Pad(ctx, cfg.b); /* copy over into ctx->X[] */ - bcopy(cfg.b, ctx->X, sizeof (cfg.b)); + memcpy(ctx->X, cfg.b, sizeof (cfg.b)); #if SKEIN_NEED_SWAP { uint_t i; @@ -124,7 +124,7 @@ Skein_256_InitExt(Skein_256_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ Skein_Start_New_Type(ctx, CFG_FINAL); - bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ + memset(&cfg.w, 0, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ @@ -161,7 +161,7 @@ Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) if (n) { /* check on our logic here */ Skein_assert(n < msgByteCnt); - bcopy(msg, &ctx->b[ctx->h.bCnt], n); + memcpy(&ctx->b[ctx->h.bCnt], msg, n); msgByteCnt -= n; msg += n; ctx->h.bCnt += n; @@ -189,7 +189,7 @@ Skein_256_Update(Skein_256_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) /* copy any remaining source message data bytes into b[] */ if (msgByteCnt) { Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_256_BLOCK_BYTES); - bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt); + memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt); ctx->h.bCnt += msgByteCnt; } @@ -209,7 +209,7 @@ Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* zero pad b[] if necessary */ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) - bzero(&ctx->b[ctx->h.bCnt], + memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); /* process the final block */ @@ -221,13 +221,12 @@ Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) /* run Threefish in "counter mode" to generate output */ /* zero out b[], so it can hold the counter */ - bzero(ctx->b, sizeof (ctx->b)); + memset(ctx->b, 0, sizeof (ctx->b)); /* keep a local copy of counter mode "key" */ - bcopy(ctx->X, X, sizeof (X)); + memcpy(X, ctx->X, sizeof (X)); for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) { /* build the counter block */ - uint64_t tmp = Skein_Swap64((uint64_t)i); - bcopy(&tmp, ctx->b, sizeof (tmp)); + *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i); Skein_Start_New_Type(ctx, OUT_FINAL); /* run "counter mode" */ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); @@ -240,7 +239,7 @@ Skein_256_Final(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) Skein_Show_Final(256, &ctx->h, n, hashVal + i * SKEIN_256_BLOCK_BYTES); /* restore the counter mode key for next time */ - bcopy(X, ctx->X, sizeof (X)); + memcpy(ctx->X, X, sizeof (X)); } return (SKEIN_SUCCESS); } @@ -262,16 +261,16 @@ Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) switch (hashBitLen) { /* use pre-computed values, where available */ #ifndef SKEIN_NO_PRECOMP case 512: - bcopy(SKEIN_512_IV_512, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_512_IV_512, sizeof (ctx->X)); break; case 384: - bcopy(SKEIN_512_IV_384, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_512_IV_384, sizeof (ctx->X)); break; case 256: - bcopy(SKEIN_512_IV_256, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_512_IV_256, sizeof (ctx->X)); break; case 224: - bcopy(SKEIN_512_IV_224, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN_512_IV_224, sizeof (ctx->X)); break; #endif default: @@ -289,11 +288,11 @@ Skein_512_Init(Skein_512_Ctxt_t *ctx, size_t hashBitLen) cfg.w[1] = Skein_Swap64(hashBitLen); cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); /* zero pad config block */ - bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0])); + memset(&cfg.w[3], 0, sizeof (cfg) - 3 * sizeof (cfg.w[0])); /* compute the initial chaining values from config block */ /* zero the chaining variables */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); Skein_512_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); break; } @@ -328,7 +327,7 @@ Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, /* compute the initial chaining values ctx->X[], based on key */ if (keyBytes == 0) { /* is there a key? */ /* no key: use all zeroes as key for config block */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); } else { /* here to pre-process a key */ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X)); @@ -338,12 +337,12 @@ Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, /* set tweaks: T0 = 0; T1 = KEY type */ Skein_Start_New_Type(ctx, KEY); /* zero the initial chaining variables */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); (void) Skein_512_Update(ctx, key, keyBytes); /* hash the key */ /* put result into cfg.b[] */ (void) Skein_512_Final_Pad(ctx, cfg.b); /* copy over into ctx->X[] */ - bcopy(cfg.b, ctx->X, sizeof (cfg.b)); + memcpy(ctx->X, cfg.b, sizeof (cfg.b)); #if SKEIN_NEED_SWAP { uint_t i; @@ -360,7 +359,7 @@ Skein_512_InitExt(Skein_512_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ Skein_Start_New_Type(ctx, CFG_FINAL); - bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ + memset(&cfg.w, 0, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); cfg.w[1] = Skein_Swap64(hashBitLen); /* hash result length in bits */ /* tree hash config info (or SKEIN_CFG_TREE_INFO_SEQUENTIAL) */ @@ -397,7 +396,7 @@ Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) if (n) { /* check on our logic here */ Skein_assert(n < msgByteCnt); - bcopy(msg, &ctx->b[ctx->h.bCnt], n); + memcpy(&ctx->b[ctx->h.bCnt], msg, n); msgByteCnt -= n; msg += n; ctx->h.bCnt += n; @@ -425,7 +424,7 @@ Skein_512_Update(Skein_512_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) /* copy any remaining source message data bytes into b[] */ if (msgByteCnt) { Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN_512_BLOCK_BYTES); - bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt); + memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt); ctx->h.bCnt += msgByteCnt; } @@ -445,7 +444,7 @@ Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* zero pad b[] if necessary */ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) - bzero(&ctx->b[ctx->h.bCnt], + memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); /* process the final block */ @@ -457,13 +456,12 @@ Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) /* run Threefish in "counter mode" to generate output */ /* zero out b[], so it can hold the counter */ - bzero(ctx->b, sizeof (ctx->b)); + memset(ctx->b, 0, sizeof (ctx->b)); /* keep a local copy of counter mode "key" */ - bcopy(ctx->X, X, sizeof (X)); + memcpy(X, ctx->X, sizeof (X)); for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) { /* build the counter block */ - uint64_t tmp = Skein_Swap64((uint64_t)i); - bcopy(&tmp, ctx->b, sizeof (tmp)); + *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i); Skein_Start_New_Type(ctx, OUT_FINAL); /* run "counter mode" */ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); @@ -476,7 +474,7 @@ Skein_512_Final(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) Skein_Show_Final(512, &ctx->h, n, hashVal + i * SKEIN_512_BLOCK_BYTES); /* restore the counter mode key for next time */ - bcopy(X, ctx->X, sizeof (X)); + memcpy(ctx->X, X, sizeof (X)); } return (SKEIN_SUCCESS); } @@ -498,13 +496,13 @@ Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) switch (hashBitLen) { /* use pre-computed values, where available */ #ifndef SKEIN_NO_PRECOMP case 512: - bcopy(SKEIN1024_IV_512, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN1024_IV_512, sizeof (ctx->X)); break; case 384: - bcopy(SKEIN1024_IV_384, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN1024_IV_384, sizeof (ctx->X)); break; case 1024: - bcopy(SKEIN1024_IV_1024, ctx->X, sizeof (ctx->X)); + memcpy(ctx->X, SKEIN1024_IV_1024, sizeof (ctx->X)); break; #endif default: @@ -522,11 +520,11 @@ Skein1024_Init(Skein1024_Ctxt_t *ctx, size_t hashBitLen) cfg.w[1] = Skein_Swap64(hashBitLen); cfg.w[2] = Skein_Swap64(SKEIN_CFG_TREE_INFO_SEQUENTIAL); /* zero pad config block */ - bzero(&cfg.w[3], sizeof (cfg) - 3 * sizeof (cfg.w[0])); + memset(&cfg.w[3], 0, sizeof (cfg) - 3 * sizeof (cfg.w[0])); /* compute the initial chaining values from config block */ /* zero the chaining variables */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); Skein1024_Process_Block(ctx, cfg.b, 1, SKEIN_CFG_STR_LEN); break; } @@ -561,7 +559,7 @@ Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, /* compute the initial chaining values ctx->X[], based on key */ if (keyBytes == 0) { /* is there a key? */ /* no key: use all zeroes as key for config block */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); } else { /* here to pre-process a key */ Skein_assert(sizeof (cfg.b) >= sizeof (ctx->X)); /* do a mini-Init right here */ @@ -570,12 +568,12 @@ Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, /* set tweaks: T0 = 0; T1 = KEY type */ Skein_Start_New_Type(ctx, KEY); /* zero the initial chaining variables */ - bzero(ctx->X, sizeof (ctx->X)); + memset(ctx->X, 0, sizeof (ctx->X)); (void) Skein1024_Update(ctx, key, keyBytes); /* hash the key */ /* put result into cfg.b[] */ (void) Skein1024_Final_Pad(ctx, cfg.b); /* copy over into ctx->X[] */ - bcopy(cfg.b, ctx->X, sizeof (cfg.b)); + memcpy(ctx->X, cfg.b, sizeof (cfg.b)); #if SKEIN_NEED_SWAP { uint_t i; @@ -592,7 +590,7 @@ Skein1024_InitExt(Skein1024_Ctxt_t *ctx, size_t hashBitLen, uint64_t treeInfo, ctx->h.hashBitLen = hashBitLen; /* output hash bit count */ Skein_Start_New_Type(ctx, CFG_FINAL); - bzero(&cfg.w, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ + memset(&cfg.w, 0, sizeof (cfg.w)); /* pre-pad cfg.w[] with zeroes */ cfg.w[0] = Skein_Swap64(SKEIN_SCHEMA_VER); /* hash result length in bits */ cfg.w[1] = Skein_Swap64(hashBitLen); @@ -630,7 +628,7 @@ Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) if (n) { /* check on our logic here */ Skein_assert(n < msgByteCnt); - bcopy(msg, &ctx->b[ctx->h.bCnt], n); + memcpy(&ctx->b[ctx->h.bCnt], msg, n); msgByteCnt -= n; msg += n; ctx->h.bCnt += n; @@ -658,7 +656,7 @@ Skein1024_Update(Skein1024_Ctxt_t *ctx, const uint8_t *msg, size_t msgByteCnt) /* copy any remaining source message data bytes into b[] */ if (msgByteCnt) { Skein_assert(msgByteCnt + ctx->h.bCnt <= SKEIN1024_BLOCK_BYTES); - bcopy(msg, &ctx->b[ctx->h.bCnt], msgByteCnt); + memcpy(&ctx->b[ctx->h.bCnt], msg, msgByteCnt); ctx->h.bCnt += msgByteCnt; } @@ -678,7 +676,7 @@ Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* zero pad b[] if necessary */ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) - bzero(&ctx->b[ctx->h.bCnt], + memset(&ctx->b[ctx->h.bCnt], 0, SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); /* process the final block */ @@ -690,13 +688,12 @@ Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) /* run Threefish in "counter mode" to generate output */ /* zero out b[], so it can hold the counter */ - bzero(ctx->b, sizeof (ctx->b)); + memset(ctx->b, 0, sizeof (ctx->b)); /* keep a local copy of counter mode "key" */ - bcopy(ctx->X, X, sizeof (X)); + memcpy(X, ctx->X, sizeof (X)); for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) { /* build the counter block */ - uint64_t tmp = Skein_Swap64((uint64_t)i); - bcopy(&tmp, ctx->b, sizeof (tmp)); + *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i); Skein_Start_New_Type(ctx, OUT_FINAL); /* run "counter mode" */ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); @@ -709,7 +706,7 @@ Skein1024_Final(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) Skein_Show_Final(1024, &ctx->h, n, hashVal + i * SKEIN1024_BLOCK_BYTES); /* restore the counter mode key for next time */ - bcopy(X, ctx->X, sizeof (X)); + memcpy(ctx->X, X, sizeof (X)); } return (SKEIN_SUCCESS); } @@ -727,7 +724,7 @@ Skein_256_Final_Pad(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* zero pad b[] if necessary */ if (ctx->h.bCnt < SKEIN_256_BLOCK_BYTES) - bzero(&ctx->b[ctx->h.bCnt], + memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_256_BLOCK_BYTES - ctx->h.bCnt); /* process the final block */ Skein_256_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); @@ -748,7 +745,7 @@ Skein_512_Final_Pad(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* tag as the final block */ /* zero pad b[] if necessary */ if (ctx->h.bCnt < SKEIN_512_BLOCK_BYTES) - bzero(&ctx->b[ctx->h.bCnt], + memset(&ctx->b[ctx->h.bCnt], 0, SKEIN_512_BLOCK_BYTES - ctx->h.bCnt); /* process the final block */ Skein_512_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); @@ -770,7 +767,7 @@ Skein1024_Final_Pad(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) ctx->h.T[1] |= SKEIN_T1_FLAG_FINAL; /* zero pad b[] if necessary */ if (ctx->h.bCnt < SKEIN1024_BLOCK_BYTES) - bzero(&ctx->b[ctx->h.bCnt], + memset(&ctx->b[ctx->h.bCnt], 0, SKEIN1024_BLOCK_BYTES - ctx->h.bCnt); /* process the final block */ Skein1024_Process_Block(ctx, ctx->b, 1, ctx->h.bCnt); @@ -798,13 +795,12 @@ Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) /* run Threefish in "counter mode" to generate output */ /* zero out b[], so it can hold the counter */ - bzero(ctx->b, sizeof (ctx->b)); + memset(ctx->b, 0, sizeof (ctx->b)); /* keep a local copy of counter mode "key" */ - bcopy(ctx->X, X, sizeof (X)); + memcpy(X, ctx->X, sizeof (X)); for (i = 0; i * SKEIN_256_BLOCK_BYTES < byteCnt; i++) { /* build the counter block */ - uint64_t tmp = Skein_Swap64((uint64_t)i); - bcopy(&tmp, ctx->b, sizeof (tmp)); + *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i); Skein_Start_New_Type(ctx, OUT_FINAL); /* run "counter mode" */ Skein_256_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); @@ -817,7 +813,7 @@ Skein_256_Output(Skein_256_Ctxt_t *ctx, uint8_t *hashVal) Skein_Show_Final(256, &ctx->h, n, hashVal + i * SKEIN_256_BLOCK_BYTES); /* restore the counter mode key for next time */ - bcopy(X, ctx->X, sizeof (X)); + memcpy(ctx->X, X, sizeof (X)); } return (SKEIN_SUCCESS); } @@ -838,13 +834,12 @@ Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) /* run Threefish in "counter mode" to generate output */ /* zero out b[], so it can hold the counter */ - bzero(ctx->b, sizeof (ctx->b)); + memset(ctx->b, 0, sizeof (ctx->b)); /* keep a local copy of counter mode "key" */ - bcopy(ctx->X, X, sizeof (X)); + memcpy(X, ctx->X, sizeof (X)); for (i = 0; i * SKEIN_512_BLOCK_BYTES < byteCnt; i++) { /* build the counter block */ - uint64_t tmp = Skein_Swap64((uint64_t)i); - bcopy(&tmp, ctx->b, sizeof (tmp)); + *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i); Skein_Start_New_Type(ctx, OUT_FINAL); /* run "counter mode" */ Skein_512_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); @@ -857,7 +852,7 @@ Skein_512_Output(Skein_512_Ctxt_t *ctx, uint8_t *hashVal) Skein_Show_Final(256, &ctx->h, n, hashVal + i * SKEIN_512_BLOCK_BYTES); /* restore the counter mode key for next time */ - bcopy(X, ctx->X, sizeof (X)); + memcpy(ctx->X, X, sizeof (X)); } return (SKEIN_SUCCESS); } @@ -878,13 +873,12 @@ Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) /* run Threefish in "counter mode" to generate output */ /* zero out b[], so it can hold the counter */ - bzero(ctx->b, sizeof (ctx->b)); + memset(ctx->b, 0, sizeof (ctx->b)); /* keep a local copy of counter mode "key" */ - bcopy(ctx->X, X, sizeof (X)); + memcpy(X, ctx->X, sizeof (X)); for (i = 0; i * SKEIN1024_BLOCK_BYTES < byteCnt; i++) { /* build the counter block */ - uint64_t tmp = Skein_Swap64((uint64_t)i); - bcopy(&tmp, ctx->b, sizeof (tmp)); + *(uint64_t *)ctx->b = Skein_Swap64((uint64_t)i); Skein_Start_New_Type(ctx, OUT_FINAL); /* run "counter mode" */ Skein1024_Process_Block(ctx, ctx->b, 1, sizeof (uint64_t)); @@ -897,7 +891,7 @@ Skein1024_Output(Skein1024_Ctxt_t *ctx, uint8_t *hashVal) Skein_Show_Final(256, &ctx->h, n, hashVal + i * SKEIN1024_BLOCK_BYTES); /* restore the counter mode key for next time */ - bcopy(X, ctx->X, sizeof (X)); + memcpy(ctx->X, X, sizeof (X)); } return (SKEIN_SUCCESS); } diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c b/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c index 7ba165a48511..3ad52da5f6a3 100644 --- a/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c +++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_block.c @@ -30,7 +30,9 @@ * the #pragma here to ignore the warning. */ #if defined(_ILP32) || defined(__powerpc) /* Assume small stack */ +#if defined(__GNUC__) && !defined(__clang__) #pragma GCC diagnostic ignored "-Wframe-larger-than=" +#endif /* * We're running on 32-bit, don't unroll loops to save stack frame space * diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h b/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h index 2f6307fa7b55..eff19ce83f81 100644 --- a/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h +++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_impl.h @@ -25,7 +25,7 @@ #define _SKEIN_IMPL_H_ #include <sys/skein.h> -#include <sys/strings.h> +#include <sys/string.h> #include "skein_impl.h" #include "skein_port.h" @@ -263,8 +263,6 @@ extern const uint64_t SKEIN_256_IV_128[]; extern const uint64_t SKEIN_256_IV_160[]; extern const uint64_t SKEIN_256_IV_224[]; extern const uint64_t SKEIN_256_IV_256[]; -extern const uint64_t SKEIN_512_IV_128[]; -extern const uint64_t SKEIN_512_IV_160[]; extern const uint64_t SKEIN_512_IV_224[]; extern const uint64_t SKEIN_512_IV_256[]; extern const uint64_t SKEIN_512_IV_384[]; diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c b/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c index 140d38f76547..84cefe4546ca 100644 --- a/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c +++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_iv.c @@ -52,30 +52,6 @@ const uint64_t SKEIN_256_IV_256[] = { MK_64(0x6A54E920, 0xFDE8DA69) }; -/* blkSize = 512 bits. hashSize = 128 bits */ -const uint64_t SKEIN_512_IV_128[] = { - MK_64(0xA8BC7BF3, 0x6FBF9F52), - MK_64(0x1E9872CE, 0xBD1AF0AA), - MK_64(0x309B1790, 0xB32190D3), - MK_64(0xBCFBB854, 0x3F94805C), - MK_64(0x0DA61BCD, 0x6E31B11B), - MK_64(0x1A18EBEA, 0xD46A32E3), - MK_64(0xA2CC5B18, 0xCE84AA82), - MK_64(0x6982AB28, 0x9D46982D) -}; - -/* blkSize = 512 bits. hashSize = 160 bits */ -const uint64_t SKEIN_512_IV_160[] = { - MK_64(0x28B81A2A, 0xE013BD91), - MK_64(0xC2F11668, 0xB5BDF78F), - MK_64(0x1760D8F3, 0xF6A56F12), - MK_64(0x4FB74758, 0x8239904F), - MK_64(0x21EDE07F, 0x7EAF5056), - MK_64(0xD908922E, 0x63ED70B8), - MK_64(0xB8EC76FF, 0xECCB52FA), - MK_64(0x01A47BB8, 0xA3F27A6E) -}; - /* blkSize = 512 bits. hashSize = 224 bits */ const uint64_t SKEIN_512_IV_224[] = { MK_64(0xCCD06162, 0x48677224), diff --git a/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h b/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h index ce4353082552..96d1266d019e 100644 --- a/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h +++ b/sys/contrib/openzfs/module/icp/algs/skein/skein_port.h @@ -50,9 +50,9 @@ #else /* here for x86 and x86-64 CPUs (and other detected little-endian CPUs) */ #define SKEIN_NEED_SWAP (0) -#define Skein_Put64_LSB_First(dst08, src64, bCnt) bcopy(src64, dst08, bCnt) +#define Skein_Put64_LSB_First(dst08, src64, bCnt) memcpy(dst08, src64, bCnt) #define Skein_Get64_LSB_First(dst64, src08, wCnt) \ - bcopy(src08, dst64, 8 * (wCnt)) + memcpy(dst64, src08, 8 * (wCnt)) #endif #endif /* ifndef SKEIN_NEED_SWAP */ diff --git a/sys/contrib/openzfs/module/icp/api/kcf_cipher.c b/sys/contrib/openzfs/module/icp/api/kcf_cipher.c index d6aa48147edb..4bea46807197 100644 --- a/sys/contrib/openzfs/module/icp/api/kcf_cipher.c +++ b/sys/contrib/openzfs/module/icp/api/kcf_cipher.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -34,253 +34,11 @@ * Encryption and decryption routines. */ -/* - * The following are the possible returned values common to all the routines - * below. The applicability of some of these return values depends on the - * presence of the arguments. - * - * CRYPTO_SUCCESS: The operation completed successfully. - * CRYPTO_QUEUED: A request was submitted successfully. The callback - * routine will be called when the operation is done. - * CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or - * CRYPTO_INVALID_MECH for problems with the 'mech'. - * CRYPTO_INVALID_DATA for bogus 'data' - * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work. - * CRYPTO_INVALID_CONTEXT: Not a valid context. - * CRYPTO_BUSY: Cannot process the request now. Schedule a - * crypto_bufcall(), or try later. - * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is - * capable of a function or a mechanism. - * CRYPTO_INVALID_KEY: bogus 'key' argument. - * CRYPTO_INVALID_PLAINTEXT: bogus 'plaintext' argument. - * CRYPTO_INVALID_CIPHERTEXT: bogus 'ciphertext' argument. - */ /* - * crypto_cipher_init_prov() + * crypto_encrypt() * * Arguments: - * - * pd: provider descriptor - * sid: session id - * mech: crypto_mechanism_t pointer. - * mech_type is a valid value previously returned by - * crypto_mech2id(); - * When the mech's parameter is not NULL, its definition depends - * on the standard definition of the mechanism. - * key: pointer to a crypto_key_t structure. - * tmpl: a crypto_ctx_template_t, opaque template of a context of an - * encryption or decryption with the 'mech' using 'key'. - * 'tmpl' is created by a previous call to - * crypto_create_ctx_template(). - * ctxp: Pointer to a crypto_context_t. - * func: CRYPTO_FG_ENCRYPT or CRYPTO_FG_DECRYPT. - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * This is a common function invoked internally by both - * crypto_encrypt_init() and crypto_decrypt_init(). - * Asynchronously submits a request for, or synchronously performs the - * initialization of an encryption or a decryption operation. - * When possible and applicable, will internally use the pre-expanded key - * schedule from the context template, tmpl. - * When complete and successful, 'ctxp' will contain a crypto_context_t - * valid for later calls to encrypt_update() and encrypt_final(), or - * decrypt_update() and decrypt_final(). - * The caller should hold a reference on the specified provider - * descriptor before calling this function. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -static int -crypto_cipher_init_prov(crypto_provider_t provider, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_key_t *key, - crypto_spi_ctx_template_t tmpl, crypto_context_t *ctxp, - crypto_call_req_t *crq, crypto_func_group_t func) -{ - int error; - crypto_ctx_t *ctx; - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; - kcf_provider_desc_t *real_provider = pd; - - ASSERT(KCF_PROV_REFHELD(pd)); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - if (func == CRYPTO_FG_ENCRYPT) { - error = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_ENCRYPT); - } else { - error = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_DECRYPT); - } - - if (error != CRYPTO_SUCCESS) - return (error); - } - - /* Allocate and initialize the canonical context */ - if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) { - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - return (CRYPTO_HOST_MEMORY); - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; - - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech); - - if (func == CRYPTO_FG_ENCRYPT) - error = KCF_PROV_ENCRYPT_INIT(real_provider, ctx, - &lmech, key, tmpl, KCF_SWFP_RHNDL(crq)); - else { - ASSERT(func == CRYPTO_FG_DECRYPT); - - error = KCF_PROV_DECRYPT_INIT(real_provider, ctx, - &lmech, key, tmpl, KCF_SWFP_RHNDL(crq)); - } - KCF_PROV_INCRSTATS(pd, error); - - goto done; - } - - /* Check if context sharing is possible */ - if (pd->pd_prov_type == CRYPTO_HW_PROVIDER && - key->ck_format == CRYPTO_KEY_RAW && - KCF_CAN_SHARE_OPSTATE(pd, mech->cm_type)) { - kcf_context_t *tctxp = (kcf_context_t *)ctx; - kcf_provider_desc_t *tpd = NULL; - crypto_mech_info_t *sinfo; - - if ((kcf_get_sw_prov(mech->cm_type, &tpd, &tctxp->kc_mech, - B_FALSE) == CRYPTO_SUCCESS)) { - int tlen; - - sinfo = &(KCF_TO_PROV_MECHINFO(tpd, mech->cm_type)); - /* - * key->ck_length from the consumer is always in bits. - * We convert it to be in the same unit registered by - * the provider in order to do a comparison. - */ - if (sinfo->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BYTES) - tlen = key->ck_length >> 3; - else - tlen = key->ck_length; - /* - * Check if the software provider can support context - * sharing and support this key length. - */ - if ((sinfo->cm_mech_flags & CRYPTO_CAN_SHARE_OPSTATE) && - (tlen >= sinfo->cm_min_key_length) && - (tlen <= sinfo->cm_max_key_length)) { - ctx->cc_flags = CRYPTO_INIT_OPSTATE; - tctxp->kc_sw_prov_desc = tpd; - } else - KCF_PROV_REFRELE(tpd); - } - } - - if (func == CRYPTO_FG_ENCRYPT) { - KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_INIT, sid, - mech, key, NULL, NULL, tmpl); - } else { - ASSERT(func == CRYPTO_FG_DECRYPT); - KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_INIT, sid, - mech, key, NULL, NULL, tmpl); - } - - error = kcf_submit_request(real_provider, ctx, crq, ¶ms, - B_FALSE); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - -done: - if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED)) - *ctxp = (crypto_context_t)ctx; - else { - /* Release the hold done in kcf_new_ctx(). */ - KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private); - } - - return (error); -} - -/* - * Same as crypto_cipher_init_prov(), but relies on the scheduler to pick - * an appropriate provider. See crypto_cipher_init_prov() comments for more - * details. - */ -static int -crypto_cipher_init(crypto_mechanism_t *mech, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_context_t *ctxp, - crypto_call_req_t *crq, crypto_func_group_t func) -{ - int error; - kcf_mech_entry_t *me; - kcf_provider_desc_t *pd; - kcf_ctx_template_t *ctx_tmpl; - crypto_spi_ctx_template_t spi_ctx_tmpl = NULL; - kcf_prov_tried_t *list = NULL; - -retry: - /* pd is returned held */ - if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error, - list, func, CHECK_RESTRICT(crq), 0)) == NULL) { - if (list != NULL) - kcf_free_triedlist(list); - return (error); - } - - /* - * For SW providers, check the validity of the context template - * It is very rare that the generation number mis-matches, so - * is acceptable to fail here, and let the consumer recover by - * freeing this tmpl and create a new one for the key and new SW - * provider - */ - if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && - ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) { - if (ctx_tmpl->ct_generation != me->me_gen_swprov) { - if (list != NULL) - kcf_free_triedlist(list); - KCF_PROV_REFRELE(pd); - return (CRYPTO_OLD_CTX_TEMPLATE); - } else { - spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - } - } - - error = crypto_cipher_init_prov(pd, pd->pd_sid, mech, key, - spi_ctx_tmpl, ctxp, crq, func); - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { - /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) - goto retry; - } - - if (list != NULL) - kcf_free_triedlist(list); - - KCF_PROV_REFRELE(pd); - return (error); -} - -/* - * crypto_encrypt_prov() - * - * Arguments: - * pd: provider descriptor * sid: session id * mech: crypto_mechanism_t pointer. * mech_type is a valid value previously returned by @@ -294,7 +52,6 @@ retry: * tmpl: a crypto_ctx_template_t, opaque template of a context of an * encryption with the 'mech' using 'key'. 'tmpl' is created by * a previous call to crypto_create_ctx_template(). - * cr: crypto_call_req_t calling conditions and call back info. * * Description: * Asynchronously submits a request for, or synchronously performs a @@ -302,57 +59,17 @@ retry: * the key 'key'. * When complete and successful, 'ciphertext' will contain the encrypted * message. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. + * Relies on the KCF scheduler to pick a provider. * * Returns: * See comment in the beginning of the file. */ int -crypto_encrypt_prov(crypto_provider_t provider, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_data_t *plaintext, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_data_t *ciphertext, - crypto_call_req_t *crq) -{ - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; - kcf_provider_desc_t *real_provider = pd; - int error; - - ASSERT(KCF_PROV_REFHELD(pd)); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - error = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_ENCRYPT_ATOMIC); - - if (error != CRYPTO_SUCCESS) - return (error); - } - - KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, key, - plaintext, ciphertext, tmpl); - - error = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE); - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - - return (error); -} - -/* - * Same as crypto_encrypt_prov(), but relies on the scheduler to pick - * a provider. See crypto_encrypt_prov() for more details. - */ -int crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, - crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext, - crypto_call_req_t *crq) + crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *ciphertext) { int error; kcf_mech_entry_t *me; - kcf_req_params_t params; kcf_provider_desc_t *pd; kcf_ctx_template_t *ctx_tmpl; crypto_spi_ctx_template_t spi_ctx_tmpl = NULL; @@ -361,52 +78,23 @@ crypto_encrypt(crypto_mechanism_t *mech, crypto_data_t *plaintext, retry: /* pd is returned held */ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error, - list, CRYPTO_FG_ENCRYPT_ATOMIC, CHECK_RESTRICT(crq), - plaintext->cd_length)) == NULL) { + list, CRYPTO_FG_ENCRYPT_ATOMIC)) == NULL) { if (list != NULL) kcf_free_triedlist(list); return (error); } - /* - * For SW providers, check the validity of the context template - * It is very rare that the generation number mis-matches, so - * is acceptable to fail here, and let the consumer recover by - * freeing this tmpl and create a new one for the key and new SW - * provider - */ - if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && - ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) { - if (ctx_tmpl->ct_generation != me->me_gen_swprov) { - if (list != NULL) - kcf_free_triedlist(list); - KCF_PROV_REFRELE(pd); - return (CRYPTO_OLD_CTX_TEMPLATE); - } else { - spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - } - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; + if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) + spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); + crypto_mechanism_t lmech = *mech; + KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); + error = KCF_PROV_ENCRYPT_ATOMIC(pd, &lmech, key, + plaintext, ciphertext, spi_ctx_tmpl); - error = KCF_PROV_ENCRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key, - plaintext, ciphertext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq)); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, pd->pd_sid, - mech, key, plaintext, ciphertext, spi_ctx_tmpl); - error = kcf_submit_request(pd, NULL, crq, ¶ms, B_FALSE); - } - - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { + if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) + if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL) goto retry; } @@ -418,147 +106,6 @@ retry: } /* - * crypto_encrypt_init_prov() - * - * Calls crypto_cipher_init_prov() to initialize an encryption operation. - */ -int -crypto_encrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_context_t *ctxp, - crypto_call_req_t *crq) -{ - return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq, - CRYPTO_FG_ENCRYPT)); -} - -/* - * crypto_encrypt_init() - * - * Calls crypto_cipher_init() to initialize an encryption operation - */ -int -crypto_encrypt_init(crypto_mechanism_t *mech, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_context_t *ctxp, - crypto_call_req_t *crq) -{ - return (crypto_cipher_init(mech, key, tmpl, ctxp, crq, - CRYPTO_FG_ENCRYPT)); -} - -/* - * crypto_encrypt_update() - * - * Arguments: - * context: A crypto_context_t initialized by encrypt_init(). - * plaintext: The message part to be encrypted - * ciphertext: Storage for the encrypted message part. - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs a - * part of an encryption operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -int -crypto_encrypt_update(crypto_context_t context, crypto_data_t *plaintext, - crypto_data_t *ciphertext, crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_ENCRYPT_UPDATE(pd, ctx, plaintext, - ciphertext, NULL); - KCF_PROV_INCRSTATS(pd, error); - return (error); - } - - /* Check if we should use a software provider for small jobs */ - if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) { - if (plaintext->cd_length < kcf_ctx->kc_mech->me_threshold && - kcf_ctx->kc_sw_prov_desc != NULL && - KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) { - pd = kcf_ctx->kc_sw_prov_desc; - } - } - - KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_UPDATE, - ctx->cc_session, NULL, NULL, plaintext, ciphertext, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - - return (error); -} - -/* - * crypto_encrypt_final() - * - * Arguments: - * context: A crypto_context_t initialized by encrypt_init(). - * ciphertext: Storage for the last part of encrypted message - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs the - * final part of an encryption operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -int -crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext, - crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_ENCRYPT_FINAL(pd, ctx, ciphertext, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_FINAL, - ctx->cc_session, NULL, NULL, NULL, ciphertext, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - /* Release the hold done in kcf_new_ctx() during init step. */ - KCF_CONTEXT_COND_RELEASE(error, kcf_ctx); - return (error); -} - -/* * crypto_decrypt_prov() * * Arguments: @@ -576,7 +123,6 @@ crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext, * tmpl: a crypto_ctx_template_t, opaque template of a context of an * encryption with the 'mech' using 'key'. 'tmpl' is created by * a previous call to crypto_create_ctx_template(). - * cr: crypto_call_req_t calling conditions and call back info. * * Description: * Asynchronously submits a request for, or synchronously performs a @@ -584,58 +130,17 @@ crypto_encrypt_final(crypto_context_t context, crypto_data_t *ciphertext, * the key 'key'. * When complete and successful, 'plaintext' will contain the decrypted * message. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. + * Relies on the KCF scheduler to choose a provider. * * Returns: * See comment in the beginning of the file. */ int -crypto_decrypt_prov(crypto_provider_t provider, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_data_t *ciphertext, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_data_t *plaintext, - crypto_call_req_t *crq) -{ - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; - kcf_provider_desc_t *real_provider = pd; - int rv; - - ASSERT(KCF_PROV_REFHELD(pd)); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - rv = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_DECRYPT_ATOMIC); - - if (rv != CRYPTO_SUCCESS) - return (rv); - } - - KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, key, - ciphertext, plaintext, tmpl); - - rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE); - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - - return (rv); -} - -/* - * Same as crypto_decrypt_prov(), but relies on the KCF scheduler to - * choose a provider. See crypto_decrypt_prov() comments for more - * information. - */ -int crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext, - crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext, - crypto_call_req_t *crq) + crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *plaintext) { int error; kcf_mech_entry_t *me; - kcf_req_params_t params; kcf_provider_desc_t *pd; kcf_ctx_template_t *ctx_tmpl; crypto_spi_ctx_template_t spi_ctx_tmpl = NULL; @@ -644,52 +149,24 @@ crypto_decrypt(crypto_mechanism_t *mech, crypto_data_t *ciphertext, retry: /* pd is returned held */ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error, - list, CRYPTO_FG_DECRYPT_ATOMIC, CHECK_RESTRICT(crq), - ciphertext->cd_length)) == NULL) { + list, CRYPTO_FG_DECRYPT_ATOMIC)) == NULL) { if (list != NULL) kcf_free_triedlist(list); return (error); } - /* - * For SW providers, check the validity of the context template - * It is very rare that the generation number mis-matches, so - * is acceptable to fail here, and let the consumer recover by - * freeing this tmpl and create a new one for the key and new SW - * provider - */ - if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && - ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) { - if (ctx_tmpl->ct_generation != me->me_gen_swprov) { - if (list != NULL) - kcf_free_triedlist(list); - KCF_PROV_REFRELE(pd); - return (CRYPTO_OLD_CTX_TEMPLATE); - } else { - spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - } - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; + if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) + spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); + crypto_mechanism_t lmech = *mech; + KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); - error = KCF_PROV_DECRYPT_ATOMIC(pd, pd->pd_sid, &lmech, key, - ciphertext, plaintext, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq)); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, pd->pd_sid, - mech, key, ciphertext, plaintext, spi_ctx_tmpl); - error = kcf_submit_request(pd, NULL, crq, ¶ms, B_FALSE); - } + error = KCF_PROV_DECRYPT_ATOMIC(pd, &lmech, key, + ciphertext, plaintext, spi_ctx_tmpl); - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { + if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) + if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL) goto retry; } @@ -700,231 +177,7 @@ retry: return (error); } -/* - * crypto_decrypt_init_prov() - * - * Calls crypto_cipher_init_prov() to initialize a decryption operation - */ -int -crypto_decrypt_init_prov(crypto_provider_t pd, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_context_t *ctxp, - crypto_call_req_t *crq) -{ - return (crypto_cipher_init_prov(pd, sid, mech, key, tmpl, ctxp, crq, - CRYPTO_FG_DECRYPT)); -} - -/* - * crypto_decrypt_init() - * - * Calls crypto_cipher_init() to initialize a decryption operation - */ -int -crypto_decrypt_init(crypto_mechanism_t *mech, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_context_t *ctxp, - crypto_call_req_t *crq) -{ - return (crypto_cipher_init(mech, key, tmpl, ctxp, crq, - CRYPTO_FG_DECRYPT)); -} - -/* - * crypto_decrypt_update() - * - * Arguments: - * context: A crypto_context_t initialized by decrypt_init(). - * ciphertext: The message part to be decrypted - * plaintext: Storage for the decrypted message part. - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs a - * part of an decryption operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -int -crypto_decrypt_update(crypto_context_t context, crypto_data_t *ciphertext, - crypto_data_t *plaintext, crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_DECRYPT_UPDATE(pd, ctx, ciphertext, - plaintext, NULL); - KCF_PROV_INCRSTATS(pd, error); - return (error); - } - - /* Check if we should use a software provider for small jobs */ - if ((ctx->cc_flags & CRYPTO_USE_OPSTATE) && cr == NULL) { - if (ciphertext->cd_length < kcf_ctx->kc_mech->me_threshold && - kcf_ctx->kc_sw_prov_desc != NULL && - KCF_IS_PROV_USABLE(kcf_ctx->kc_sw_prov_desc)) { - pd = kcf_ctx->kc_sw_prov_desc; - } - } - - KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_UPDATE, - ctx->cc_session, NULL, NULL, ciphertext, plaintext, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - - return (error); -} - -/* - * crypto_decrypt_final() - * - * Arguments: - * context: A crypto_context_t initialized by decrypt_init(). - * plaintext: Storage for the last part of the decrypted message - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs the - * final part of a decryption operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -int -crypto_decrypt_final(crypto_context_t context, crypto_data_t *plaintext, - crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_DECRYPT_FINAL(pd, ctx, plaintext, - NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_FINAL, - ctx->cc_session, NULL, NULL, NULL, plaintext, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - /* Release the hold done in kcf_new_ctx() during init step. */ - KCF_CONTEXT_COND_RELEASE(error, kcf_ctx); - return (error); -} - -/* - * See comments for crypto_encrypt_update(). - */ -int -crypto_encrypt_single(crypto_context_t context, crypto_data_t *plaintext, - crypto_data_t *ciphertext, crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_ENCRYPT(pd, ctx, plaintext, - ciphertext, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_ENCRYPT_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid, - NULL, NULL, plaintext, ciphertext, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - /* Release the hold done in kcf_new_ctx() during init step. */ - KCF_CONTEXT_COND_RELEASE(error, kcf_ctx); - return (error); -} - -/* - * See comments for crypto_decrypt_update(). - */ -int -crypto_decrypt_single(crypto_context_t context, crypto_data_t *ciphertext, - crypto_data_t *plaintext, crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_DECRYPT(pd, ctx, ciphertext, - plaintext, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DECRYPT_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid, - NULL, NULL, ciphertext, plaintext, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - /* Release the hold done in kcf_new_ctx() during init step. */ - KCF_CONTEXT_COND_RELEASE(error, kcf_ctx); - return (error); -} - #if defined(_KERNEL) -EXPORT_SYMBOL(crypto_encrypt_prov); EXPORT_SYMBOL(crypto_encrypt); -EXPORT_SYMBOL(crypto_encrypt_init_prov); -EXPORT_SYMBOL(crypto_encrypt_init); -EXPORT_SYMBOL(crypto_encrypt_update); -EXPORT_SYMBOL(crypto_encrypt_final); -EXPORT_SYMBOL(crypto_decrypt_prov); EXPORT_SYMBOL(crypto_decrypt); -EXPORT_SYMBOL(crypto_decrypt_init_prov); -EXPORT_SYMBOL(crypto_decrypt_init); -EXPORT_SYMBOL(crypto_decrypt_update); -EXPORT_SYMBOL(crypto_decrypt_final); -EXPORT_SYMBOL(crypto_encrypt_single); -EXPORT_SYMBOL(crypto_decrypt_single); #endif diff --git a/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c index 21b0977d3634..b8cd67ea7f67 100644 --- a/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c +++ b/sys/contrib/openzfs/module/icp/api/kcf_ctxops.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -48,7 +48,6 @@ * ptmpl: a storage for the opaque crypto_ctx_template_t, allocated and * initialized by the software provider this routine is * dispatched to. - * kmflag: KM_SLEEP/KM_NOSLEEP mem. alloc. flag. * * Description: * Redirects the call to the software provider of the specified @@ -69,7 +68,7 @@ */ int crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key, - crypto_ctx_template_t *ptmpl, int kmflag) + crypto_ctx_template_t *ptmpl) { int error; kcf_mech_entry_t *me; @@ -89,8 +88,8 @@ crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key, if (error != CRYPTO_SUCCESS) return (error); - if ((ctx_tmpl = (kcf_ctx_template_t *)kmem_alloc( - sizeof (kcf_ctx_template_t), kmflag)) == NULL) { + if ((ctx_tmpl = kmem_alloc( + sizeof (kcf_ctx_template_t), KM_SLEEP)) == NULL) { KCF_PROV_REFRELE(pd); return (CRYPTO_HOST_MEMORY); } @@ -101,10 +100,9 @@ crypto_create_ctx_template(crypto_mechanism_t *mech, crypto_key_t *key, prov_mech.cm_param_len = mech->cm_param_len; error = KCF_PROV_CREATE_CTX_TEMPLATE(pd, &prov_mech, key, - &(ctx_tmpl->ct_prov_tmpl), &(ctx_tmpl->ct_size), KCF_RHNDL(kmflag)); + &(ctx_tmpl->ct_prov_tmpl), &(ctx_tmpl->ct_size)); if (error == CRYPTO_SUCCESS) { - ctx_tmpl->ct_generation = me->me_gen_swprov; *ptmpl = ctx_tmpl; } else { kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t)); @@ -140,7 +138,7 @@ crypto_destroy_ctx_template(crypto_ctx_template_t tmpl) ASSERT(ctx_tmpl->ct_prov_tmpl != NULL); - bzero(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size); + memset(ctx_tmpl->ct_prov_tmpl, 0, ctx_tmpl->ct_size); kmem_free(ctx_tmpl->ct_prov_tmpl, ctx_tmpl->ct_size); kmem_free(ctx_tmpl, sizeof (kcf_ctx_template_t)); } diff --git a/sys/contrib/openzfs/module/icp/api/kcf_digest.c b/sys/contrib/openzfs/module/icp/api/kcf_digest.c deleted file mode 100644 index aa68d69bc162..000000000000 --- a/sys/contrib/openzfs/module/icp/api/kcf_digest.c +++ /dev/null @@ -1,491 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/zfs_context.h> -#include <sys/crypto/common.h> -#include <sys/crypto/impl.h> -#include <sys/crypto/api.h> -#include <sys/crypto/spi.h> -#include <sys/crypto/sched_impl.h> - -/* - * Message digest routines - */ - -/* - * The following are the possible returned values common to all the routines - * below. The applicability of some of these return values depends on the - * presence of the arguments. - * - * CRYPTO_SUCCESS: The operation completed successfully. - * CRYPTO_QUEUED: A request was submitted successfully. The callback - * routine will be called when the operation is done. - * CRYPTO_MECHANISM_INVALID or CRYPTO_INVALID_MECH_PARAM - * for problems with the 'mech'. - * CRYPTO_INVALID_DATA for bogus 'data' - * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work. - * CRYPTO_INVALID_CONTEXT: Not a valid context. - * CRYPTO_BUSY: Cannot process the request now. Schedule a - * crypto_bufcall(), or try later. - * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: - * No provider is capable of a function or a mechanism. - */ - - -/* - * crypto_digest_prov() - * - * Arguments: - * pd: pointer to the descriptor of the provider to use for this - * operation. - * sid: provider session id. - * mech: crypto_mechanism_t pointer. - * mech_type is a valid value previously returned by - * crypto_mech2id(); - * When the mech's parameter is not NULL, its definition depends - * on the standard definition of the mechanism. - * data: The message to be digested. - * digest: Storage for the digest. The length needed depends on the - * mechanism. - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs the - * digesting operation of 'data' on the specified - * provider with the specified session. - * When complete and successful, 'digest' will contain the digest value. - * The caller should hold a reference on the specified provider - * descriptor before calling this function. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -int -crypto_digest_prov(crypto_provider_t provider, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_data_t *data, crypto_data_t *digest, - crypto_call_req_t *crq) -{ - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; - kcf_provider_desc_t *real_provider = pd; - int rv; - - ASSERT(KCF_PROV_REFHELD(pd)); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - rv = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), - pd, &real_provider, CRYPTO_FG_DIGEST_ATOMIC); - - if (rv != CRYPTO_SUCCESS) - return (rv); - } - KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, NULL, - data, digest); - - /* no crypto context to carry between multiple parts. */ - rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE); - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - - return (rv); -} - - -/* - * Same as crypto_digest_prov(), but relies on the KCF scheduler to - * choose a provider. See crypto_digest_prov() comments for more information. - */ -int -crypto_digest(crypto_mechanism_t *mech, crypto_data_t *data, - crypto_data_t *digest, crypto_call_req_t *crq) -{ - int error; - kcf_provider_desc_t *pd; - kcf_req_params_t params; - kcf_prov_tried_t *list = NULL; - -retry: - /* The pd is returned held */ - if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error, list, - CRYPTO_FG_DIGEST_ATOMIC, CHECK_RESTRICT(crq), - data->cd_length)) == NULL) { - if (list != NULL) - kcf_free_triedlist(list); - return (error); - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; - - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); - error = KCF_PROV_DIGEST_ATOMIC(pd, pd->pd_sid, &lmech, data, - digest, KCF_SWFP_RHNDL(crq)); - KCF_PROV_INCRSTATS(pd, error); - } else { - if (pd->pd_prov_type == CRYPTO_HW_PROVIDER && - (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) && - (data->cd_length > pd->pd_hash_limit)) { - error = CRYPTO_BUFFER_TOO_BIG; - } else { - KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, - pd->pd_sid, mech, NULL, data, digest); - - /* no crypto context to carry between multiple parts. */ - error = kcf_submit_request(pd, NULL, crq, ¶ms, - B_FALSE); - } - } - - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { - /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) - goto retry; - } - - if (list != NULL) - kcf_free_triedlist(list); - - KCF_PROV_REFRELE(pd); - return (error); -} - -/* - * crypto_digest_init_prov() - * - * pd: pointer to the descriptor of the provider to use for this - * operation. - * sid: provider session id. - * mech: crypto_mechanism_t pointer. - * mech_type is a valid value previously returned by - * crypto_mech2id(); - * When the mech's parameter is not NULL, its definition depends - * on the standard definition of the mechanism. - * ctxp: Pointer to a crypto_context_t. - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs the - * initialization of a message digest operation on the specified - * provider with the specified session. - * When complete and successful, 'ctxp' will contain a crypto_context_t - * valid for later calls to digest_update() and digest_final(). - * The caller should hold a reference on the specified provider - * descriptor before calling this function. - */ -int -crypto_digest_init_prov(crypto_provider_t provider, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_context_t *ctxp, crypto_call_req_t *crq) -{ - int error; - crypto_ctx_t *ctx; - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; - kcf_provider_desc_t *real_provider = pd; - - ASSERT(KCF_PROV_REFHELD(pd)); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - error = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_DIGEST); - - if (error != CRYPTO_SUCCESS) - return (error); - } - - /* Allocate and initialize the canonical context */ - if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) { - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - return (CRYPTO_HOST_MEMORY); - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; - - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech); - error = KCF_PROV_DIGEST_INIT(real_provider, ctx, &lmech, - KCF_SWFP_RHNDL(crq)); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_INIT, sid, - mech, NULL, NULL, NULL); - error = kcf_submit_request(real_provider, ctx, crq, ¶ms, - B_FALSE); - } - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - - if ((error == CRYPTO_SUCCESS) || (error == CRYPTO_QUEUED)) - *ctxp = (crypto_context_t)ctx; - else { - /* Release the hold done in kcf_new_ctx(). */ - KCF_CONTEXT_REFRELE((kcf_context_t *)ctx->cc_framework_private); - } - - return (error); -} - -/* - * Same as crypto_digest_init_prov(), but relies on the KCF scheduler - * to choose a provider. See crypto_digest_init_prov() comments for - * more information. - */ -int -crypto_digest_init(crypto_mechanism_t *mech, crypto_context_t *ctxp, - crypto_call_req_t *crq) -{ - int error; - kcf_provider_desc_t *pd; - kcf_prov_tried_t *list = NULL; - -retry: - /* The pd is returned held */ - if ((pd = kcf_get_mech_provider(mech->cm_type, NULL, &error, - list, CRYPTO_FG_DIGEST, CHECK_RESTRICT(crq), 0)) == NULL) { - if (list != NULL) - kcf_free_triedlist(list); - return (error); - } - - if (pd->pd_prov_type == CRYPTO_HW_PROVIDER && - (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) { - /* - * The hardware provider has limited digest support. - * So, we fallback early here to using a software provider. - * - * XXX - need to enhance to do the fallback later in - * crypto_digest_update() if the size of accumulated input data - * exceeds the maximum size digestable by hardware provider. - */ - error = CRYPTO_BUFFER_TOO_BIG; - } else { - error = crypto_digest_init_prov(pd, pd->pd_sid, - mech, ctxp, crq); - } - - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { - /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) - goto retry; - } - - if (list != NULL) - kcf_free_triedlist(list); - KCF_PROV_REFRELE(pd); - return (error); -} - -/* - * crypto_digest_update() - * - * Arguments: - * context: A crypto_context_t initialized by digest_init(). - * data: The part of message to be digested. - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs a - * part of a message digest operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -int -crypto_digest_update(crypto_context_t context, crypto_data_t *data, - crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_DIGEST_UPDATE(pd, ctx, data, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_UPDATE, - ctx->cc_session, NULL, NULL, data, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - return (error); -} - -/* - * crypto_digest_final() - * - * Arguments: - * context: A crypto_context_t initialized by digest_init(). - * digest: The storage for the digest. - * cr: crypto_call_req_t calling conditions and call back info. - * - * Description: - * Asynchronously submits a request for, or synchronously performs the - * final part of a message digest operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * - * Returns: - * See comment in the beginning of the file. - */ -int -crypto_digest_final(crypto_context_t context, crypto_data_t *digest, - crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_DIGEST_FINAL(pd, ctx, digest, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_FINAL, - ctx->cc_session, NULL, NULL, NULL, digest); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - /* Release the hold done in kcf_new_ctx() during init step. */ - KCF_CONTEXT_COND_RELEASE(error, kcf_ctx); - return (error); -} - -/* - * Performs a digest update on the specified key. Note that there is - * no k-API crypto_digest_key() equivalent of this function. - */ -int -crypto_digest_key_prov(crypto_context_t context, crypto_key_t *key, - crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_DIGEST_KEY(pd, ctx, key, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_DIGEST_KEY, - ctx->cc_session, NULL, key, NULL, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - return (error); -} - -/* - * See comments for crypto_digest_update() and crypto_digest_final(). - */ -int -crypto_digest_single(crypto_context_t context, crypto_data_t *data, - crypto_data_t *digest, crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_DIGEST(pd, ctx, data, digest, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_DIGEST_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid, - NULL, NULL, data, digest); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - /* Release the hold done in kcf_new_ctx() during init step. */ - KCF_CONTEXT_COND_RELEASE(error, kcf_ctx); - return (error); -} - -#if defined(_KERNEL) -EXPORT_SYMBOL(crypto_digest_prov); -EXPORT_SYMBOL(crypto_digest); -EXPORT_SYMBOL(crypto_digest_init_prov); -EXPORT_SYMBOL(crypto_digest_init); -EXPORT_SYMBOL(crypto_digest_update); -EXPORT_SYMBOL(crypto_digest_final); -EXPORT_SYMBOL(crypto_digest_key_prov); -EXPORT_SYMBOL(crypto_digest_single); -#endif diff --git a/sys/contrib/openzfs/module/icp/api/kcf_mac.c b/sys/contrib/openzfs/module/icp/api/kcf_mac.c index a7722d8f914c..287467e68350 100644 --- a/sys/contrib/openzfs/module/icp/api/kcf_mac.c +++ b/sys/contrib/openzfs/module/icp/api/kcf_mac.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -40,15 +40,12 @@ * presence of the arguments. * * CRYPTO_SUCCESS: The operation completed successfully. - * CRYPTO_QUEUED: A request was submitted successfully. The callback - * routine will be called when the operation is done. * CRYPTO_INVALID_MECH_NUMBER, CRYPTO_INVALID_MECH_PARAM, or * CRYPTO_INVALID_MECH for problems with the 'mech'. * CRYPTO_INVALID_DATA for bogus 'data' * CRYPTO_HOST_MEMORY for failure to allocate memory to handle this work. * CRYPTO_INVALID_CONTEXT: Not a valid context. - * CRYPTO_BUSY: Cannot process the request now. Schedule a - * crypto_bufcall(), or try later. + * CRYPTO_BUSY: Cannot process the request now. Try later. * CRYPTO_NOT_SUPPORTED and CRYPTO_MECH_NOT_SUPPORTED: No provider is * capable of a function or a mechanism. * CRYPTO_INVALID_KEY: bogus 'key' argument. @@ -70,7 +67,6 @@ * tmpl: a crypto_ctx_template_t, opaque template of a context of a * MAC with the 'mech' using 'key'. 'tmpl' is created by * a previous call to crypto_create_ctx_template(). - * cr: crypto_call_req_t calling conditions and call back info. * * Description: * Asynchronously submits a request for, or synchronously performs a @@ -79,55 +75,17 @@ * the specified session id. * When complete and successful, 'mac' will contain the message * authentication code. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'crq'. + * Relies on the KCF scheduler to choose a provider. * * Returns: * See comment in the beginning of the file. */ int -crypto_mac_prov(crypto_provider_t provider, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq) -{ - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; - kcf_provider_desc_t *real_provider = pd; - int rv; - - ASSERT(KCF_PROV_REFHELD(pd)); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - rv = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_MAC_ATOMIC); - - if (rv != CRYPTO_SUCCESS) - return (rv); - } - - KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, sid, mech, key, - data, mac, tmpl); - rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE); - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - - return (rv); -} - -/* - * Same as crypto_mac_prov(), but relies on the KCF scheduler to choose - * a provider. See crypto_mac() comments for more information. - */ -int crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data, - crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac, - crypto_call_req_t *crq) + crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac) { int error; kcf_mech_entry_t *me; - kcf_req_params_t params; kcf_provider_desc_t *pd; kcf_ctx_template_t *ctx_tmpl; crypto_spi_ctx_template_t spi_ctx_tmpl = NULL; @@ -136,187 +94,23 @@ crypto_mac(crypto_mechanism_t *mech, crypto_data_t *data, retry: /* The pd is returned held */ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error, - list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq), - data->cd_length)) == NULL) { + list, CRYPTO_FG_MAC_ATOMIC)) == NULL) { if (list != NULL) kcf_free_triedlist(list); return (error); } - /* - * For SW providers, check the validity of the context template - * It is very rare that the generation number mis-matches, so - * is acceptable to fail here, and let the consumer recover by - * freeing this tmpl and create a new one for the key and new SW - * provider - */ - if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && - ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) { - if (ctx_tmpl->ct_generation != me->me_gen_swprov) { - if (list != NULL) - kcf_free_triedlist(list); - KCF_PROV_REFRELE(pd); - return (CRYPTO_OLD_CTX_TEMPLATE); - } else { - spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - } - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; - - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); - - error = KCF_PROV_MAC_ATOMIC(pd, pd->pd_sid, &lmech, key, data, - mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq)); - KCF_PROV_INCRSTATS(pd, error); - } else { - if (pd->pd_prov_type == CRYPTO_HW_PROVIDER && - (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) && - (data->cd_length > pd->pd_hash_limit)) { - /* - * XXX - We need a check to see if this is indeed - * a HMAC. So far, all kernel clients use - * this interface only for HMAC. So, this is fine - * for now. - */ - error = CRYPTO_BUFFER_TOO_BIG; - } else { - KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_ATOMIC, - pd->pd_sid, mech, key, data, mac, spi_ctx_tmpl); + if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) + spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - error = kcf_submit_request(pd, NULL, crq, ¶ms, - KCF_ISDUALREQ(crq)); - } - } + crypto_mechanism_t lmech = *mech; + KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); + error = KCF_PROV_MAC_ATOMIC(pd, &lmech, key, data, + mac, spi_ctx_tmpl); - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { + if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) - goto retry; - } - - if (list != NULL) - kcf_free_triedlist(list); - - KCF_PROV_REFRELE(pd); - return (error); -} - -/* - * Single part operation to compute the MAC corresponding to the specified - * 'data' and to verify that it matches the MAC specified by 'mac'. - * The other arguments are the same as the function crypto_mac_prov(). - */ -int -crypto_mac_verify_prov(crypto_provider_t provider, crypto_session_id_t sid, - crypto_mechanism_t *mech, crypto_data_t *data, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_data_t *mac, crypto_call_req_t *crq) -{ - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; - kcf_provider_desc_t *real_provider = pd; - int rv; - - ASSERT(KCF_PROV_REFHELD(pd)); - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - rv = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_MAC_ATOMIC); - - if (rv != CRYPTO_SUCCESS) - return (rv); - } - - KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_MAC_VERIFY_ATOMIC, sid, mech, - key, data, mac, tmpl); - rv = kcf_submit_request(real_provider, NULL, crq, ¶ms, B_FALSE); - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - - return (rv); -} - -/* - * Same as crypto_mac_verify_prov(), but relies on the KCF scheduler to choose - * a provider. See crypto_mac_verify_prov() comments for more information. - */ -int -crypto_mac_verify(crypto_mechanism_t *mech, crypto_data_t *data, - crypto_key_t *key, crypto_ctx_template_t tmpl, crypto_data_t *mac, - crypto_call_req_t *crq) -{ - int error; - kcf_mech_entry_t *me; - kcf_req_params_t params; - kcf_provider_desc_t *pd; - kcf_ctx_template_t *ctx_tmpl; - crypto_spi_ctx_template_t spi_ctx_tmpl = NULL; - kcf_prov_tried_t *list = NULL; - -retry: - /* The pd is returned held */ - if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error, - list, CRYPTO_FG_MAC_ATOMIC, CHECK_RESTRICT(crq), - data->cd_length)) == NULL) { - if (list != NULL) - kcf_free_triedlist(list); - return (error); - } - - /* - * For SW providers, check the validity of the context template - * It is very rare that the generation number mis-matches, so - * is acceptable to fail here, and let the consumer recover by - * freeing this tmpl and create a new one for the key and new SW - * provider - */ - if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && - ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) { - if (ctx_tmpl->ct_generation != me->me_gen_swprov) { - if (list != NULL) - kcf_free_triedlist(list); - KCF_PROV_REFRELE(pd); - return (CRYPTO_OLD_CTX_TEMPLATE); - } else { - spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - } - } - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; - - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, pd, &lmech); - - error = KCF_PROV_MAC_VERIFY_ATOMIC(pd, pd->pd_sid, &lmech, key, - data, mac, spi_ctx_tmpl, KCF_SWFP_RHNDL(crq)); - KCF_PROV_INCRSTATS(pd, error); - } else { - if (pd->pd_prov_type == CRYPTO_HW_PROVIDER && - (pd->pd_flags & CRYPTO_HASH_NO_UPDATE) && - (data->cd_length > pd->pd_hash_limit)) { - /* see comments in crypto_mac() */ - error = CRYPTO_BUFFER_TOO_BIG; - } else { - KCF_WRAP_MAC_OPS_PARAMS(¶ms, - KCF_OP_MAC_VERIFY_ATOMIC, pd->pd_sid, mech, - key, data, mac, spi_ctx_tmpl); - - error = kcf_submit_request(pd, NULL, crq, ¶ms, - KCF_ISDUALREQ(crq)); - } - } - - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { - /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) + if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL) goto retry; } @@ -333,7 +127,6 @@ retry: * Arguments: * pd: pointer to the descriptor of the provider to use for this * operation. - * sid: provider session id. * mech: crypto_mechanism_t pointer. * mech_type is a valid value previously returned by * crypto_mech2id(); @@ -344,7 +137,6 @@ retry: * MAC with the 'mech' using 'key'. 'tmpl' is created by * a previous call to crypto_create_ctx_template(). * ctxp: Pointer to a crypto_context_t. - * cr: crypto_call_req_t calling conditions and call back info. * * Description: * Asynchronously submits a request for, or synchronously performs the @@ -357,61 +149,29 @@ retry: * The caller should hold a reference on the specified provider * descriptor before calling this function. * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. - * * Returns: * See comment in the beginning of the file. */ -int -crypto_mac_init_prov(crypto_provider_t provider, crypto_session_id_t sid, +static int +crypto_mac_init_prov(kcf_provider_desc_t *pd, crypto_mechanism_t *mech, crypto_key_t *key, crypto_spi_ctx_template_t tmpl, - crypto_context_t *ctxp, crypto_call_req_t *crq) + crypto_context_t *ctxp) { int rv; crypto_ctx_t *ctx; - kcf_req_params_t params; - kcf_provider_desc_t *pd = provider; kcf_provider_desc_t *real_provider = pd; ASSERT(KCF_PROV_REFHELD(pd)); - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - rv = kcf_get_hardware_provider(mech->cm_type, - CRYPTO_MECH_INVALID, CHECK_RESTRICT(crq), pd, - &real_provider, CRYPTO_FG_MAC); - - if (rv != CRYPTO_SUCCESS) - return (rv); - } - /* Allocate and initialize the canonical context */ - if ((ctx = kcf_new_ctx(crq, real_provider, sid)) == NULL) { - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); + if ((ctx = kcf_new_ctx(real_provider)) == NULL) return (CRYPTO_HOST_MEMORY); - } - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(crq, pd)) { - crypto_mechanism_t lmech; + crypto_mechanism_t lmech = *mech; + KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech); + rv = KCF_PROV_MAC_INIT(real_provider, ctx, &lmech, key, tmpl); - lmech = *mech; - KCF_SET_PROVIDER_MECHNUM(mech->cm_type, real_provider, &lmech); - rv = KCF_PROV_MAC_INIT(real_provider, ctx, &lmech, key, tmpl, - KCF_SWFP_RHNDL(crq)); - KCF_PROV_INCRSTATS(pd, rv); - } else { - KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_INIT, sid, mech, key, - NULL, NULL, tmpl); - rv = kcf_submit_request(real_provider, ctx, crq, ¶ms, - B_FALSE); - } - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) - KCF_PROV_REFRELE(real_provider); - - if ((rv == CRYPTO_SUCCESS) || (rv == CRYPTO_QUEUED)) + if (rv == CRYPTO_SUCCESS) *ctxp = (crypto_context_t)ctx; else { /* Release the hold done in kcf_new_ctx(). */ @@ -428,8 +188,7 @@ crypto_mac_init_prov(crypto_provider_t provider, crypto_session_id_t sid, */ int crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key, - crypto_ctx_template_t tmpl, crypto_context_t *ctxp, - crypto_call_req_t *crq) + crypto_ctx_template_t tmpl, crypto_context_t *ctxp) { int error; kcf_mech_entry_t *me; @@ -441,51 +200,27 @@ crypto_mac_init(crypto_mechanism_t *mech, crypto_key_t *key, retry: /* The pd is returned held */ if ((pd = kcf_get_mech_provider(mech->cm_type, &me, &error, - list, CRYPTO_FG_MAC, CHECK_RESTRICT(crq), 0)) == NULL) { + list, CRYPTO_FG_MAC)) == NULL) { if (list != NULL) kcf_free_triedlist(list); return (error); } /* - * For SW providers, check the validity of the context template + * Check the validity of the context template * It is very rare that the generation number mis-matches, so * is acceptable to fail here, and let the consumer recover by - * freeing this tmpl and create a new one for the key and new SW - * provider + * freeing this tmpl and create a new one for the key and new provider */ - if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && - ((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) { - if (ctx_tmpl->ct_generation != me->me_gen_swprov) { - if (list != NULL) - kcf_free_triedlist(list); - KCF_PROV_REFRELE(pd); - return (CRYPTO_OLD_CTX_TEMPLATE); - } else { - spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - } - } + if (((ctx_tmpl = (kcf_ctx_template_t *)tmpl) != NULL)) + spi_ctx_tmpl = ctx_tmpl->ct_prov_tmpl; - if (pd->pd_prov_type == CRYPTO_HW_PROVIDER && - (pd->pd_flags & CRYPTO_HASH_NO_UPDATE)) { - /* - * The hardware provider has limited HMAC support. - * So, we fallback early here to using a software provider. - * - * XXX - need to enhance to do the fallback later in - * crypto_mac_update() if the size of accumulated input data - * exceeds the maximum size digestable by hardware provider. - */ - error = CRYPTO_BUFFER_TOO_BIG; - } else { - error = crypto_mac_init_prov(pd, pd->pd_sid, mech, key, - spi_ctx_tmpl, ctxp, crq); - } - if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED && - IS_RECOVERABLE(error)) { + error = crypto_mac_init_prov(pd, mech, key, + spi_ctx_tmpl, ctxp); + if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { /* Add pd to the linked list of providers tried. */ - if (kcf_insert_triedlist(&list, pd, KCF_KMFLAG(crq)) != NULL) + if (kcf_insert_triedlist(&list, pd, KM_SLEEP) != NULL) goto retry; } @@ -502,27 +237,19 @@ retry: * Arguments: * context: A crypto_context_t initialized by mac_init(). * data: The message part to be MAC'ed - * cr: crypto_call_req_t calling conditions and call back info. * * Description: - * Asynchronously submits a request for, or synchronously performs a - * part of a MAC operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. + * Synchronously performs a part of a MAC operation. * * Returns: * See comment in the beginning of the file. */ int -crypto_mac_update(crypto_context_t context, crypto_data_t *data, - crypto_call_req_t *cr) +crypto_mac_update(crypto_context_t context, crypto_data_t *data) { crypto_ctx_t *ctx = (crypto_ctx_t *)context; kcf_context_t *kcf_ctx; kcf_provider_desc_t *pd; - kcf_req_params_t params; - int rv; if ((ctx == NULL) || ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || @@ -530,19 +257,7 @@ crypto_mac_update(crypto_context_t context, crypto_data_t *data, return (CRYPTO_INVALID_CONTEXT); } - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - rv = KCF_PROV_MAC_UPDATE(pd, ctx, data, NULL); - KCF_PROV_INCRSTATS(pd, rv); - } else { - KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_UPDATE, - ctx->cc_session, NULL, NULL, data, NULL, NULL); - rv = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - return (rv); + return (KCF_PROV_MAC_UPDATE(pd, ctx, data)); } /* @@ -551,27 +266,19 @@ crypto_mac_update(crypto_context_t context, crypto_data_t *data, * Arguments: * context: A crypto_context_t initialized by mac_init(). * mac: Storage for the message authentication code. - * cr: crypto_call_req_t calling conditions and call back info. * * Description: - * Asynchronously submits a request for, or synchronously performs a - * part of a message authentication operation. - * - * Context: - * Process or interrupt, according to the semantics dictated by the 'cr'. + * Synchronously performs a part of a message authentication operation. * * Returns: * See comment in the beginning of the file. */ int -crypto_mac_final(crypto_context_t context, crypto_data_t *mac, - crypto_call_req_t *cr) +crypto_mac_final(crypto_context_t context, crypto_data_t *mac) { crypto_ctx_t *ctx = (crypto_ctx_t *)context; kcf_context_t *kcf_ctx; kcf_provider_desc_t *pd; - kcf_req_params_t params; - int rv; if ((ctx == NULL) || ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || @@ -579,67 +286,16 @@ crypto_mac_final(crypto_context_t context, crypto_data_t *mac, return (CRYPTO_INVALID_CONTEXT); } - ASSERT(pd->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - rv = KCF_PROV_MAC_FINAL(pd, ctx, mac, NULL); - KCF_PROV_INCRSTATS(pd, rv); - } else { - KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_FINAL, - ctx->cc_session, NULL, NULL, NULL, mac, NULL); - rv = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } + int rv = KCF_PROV_MAC_FINAL(pd, ctx, mac); /* Release the hold done in kcf_new_ctx() during init step. */ KCF_CONTEXT_COND_RELEASE(rv, kcf_ctx); return (rv); } -/* - * See comments for crypto_mac_update() and crypto_mac_final(). - */ -int -crypto_mac_single(crypto_context_t context, crypto_data_t *data, - crypto_data_t *mac, crypto_call_req_t *cr) -{ - crypto_ctx_t *ctx = (crypto_ctx_t *)context; - kcf_context_t *kcf_ctx; - kcf_provider_desc_t *pd; - int error; - kcf_req_params_t params; - - - if ((ctx == NULL) || - ((kcf_ctx = (kcf_context_t *)ctx->cc_framework_private) == NULL) || - ((pd = kcf_ctx->kc_prov_desc) == NULL)) { - return (CRYPTO_INVALID_CONTEXT); - } - - - /* The fast path for SW providers. */ - if (CHECK_FASTPATH(cr, pd)) { - error = KCF_PROV_MAC(pd, ctx, data, mac, NULL); - KCF_PROV_INCRSTATS(pd, error); - } else { - KCF_WRAP_MAC_OPS_PARAMS(¶ms, KCF_OP_SINGLE, pd->pd_sid, - NULL, NULL, data, mac, NULL); - error = kcf_submit_request(pd, ctx, cr, ¶ms, B_FALSE); - } - - /* Release the hold done in kcf_new_ctx() during init step. */ - KCF_CONTEXT_COND_RELEASE(error, kcf_ctx); - return (error); -} - #if defined(_KERNEL) -EXPORT_SYMBOL(crypto_mac_prov); EXPORT_SYMBOL(crypto_mac); -EXPORT_SYMBOL(crypto_mac_verify_prov); -EXPORT_SYMBOL(crypto_mac_verify); -EXPORT_SYMBOL(crypto_mac_init_prov); EXPORT_SYMBOL(crypto_mac_init); EXPORT_SYMBOL(crypto_mac_update); EXPORT_SYMBOL(crypto_mac_final); -EXPORT_SYMBOL(crypto_mac_single); #endif diff --git a/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c b/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c deleted file mode 100644 index c0f415b264a7..000000000000 --- a/sys/contrib/openzfs/module/icp/api/kcf_miscapi.c +++ /dev/null @@ -1,127 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/zfs_context.h> -#include <sys/crypto/common.h> -#include <sys/crypto/api.h> -#include <sys/crypto/impl.h> -#include <sys/crypto/sched_impl.h> - -/* - * All event subscribers are put on a list. kcf_notify_list_lock - * protects changes to this list. - * - * The following locking order is maintained in the code - The - * global kcf_notify_list_lock followed by the individual lock - * in a kcf_ntfy_elem structure (kn_lock). - */ -kmutex_t ntfy_list_lock; -kcondvar_t ntfy_list_cv; /* cv the service thread waits on */ -static kcf_ntfy_elem_t *ntfy_list_head; - -/* - * crypto_mech2id() - * - * Arguments: - * . mechname: A null-terminated string identifying the mechanism name. - * - * Description: - * Walks the mechanisms tables, looking for an entry that matches the - * mechname. Once it find it, it builds the 64-bit mech_type and returns - * it. If there are no hardware or software providers for the mechanism, - * but there is an unloaded software provider, this routine will attempt - * to load it. - * - * Context: - * Process and interruption. - * - * Returns: - * The unique mechanism identified by 'mechname', if found. - * CRYPTO_MECH_INVALID otherwise. - */ -crypto_mech_type_t -crypto_mech2id(char *mechname) -{ - return (crypto_mech2id_common(mechname, B_TRUE)); -} - -/* - * We walk the notification list and do the callbacks. - */ -void -kcf_walk_ntfylist(uint32_t event, void *event_arg) -{ - kcf_ntfy_elem_t *nep; - int nelem = 0; - - mutex_enter(&ntfy_list_lock); - - /* - * Count how many clients are on the notification list. We need - * this count to ensure that clients which joined the list after we - * have started this walk, are not wrongly notified. - */ - for (nep = ntfy_list_head; nep != NULL; nep = nep->kn_next) - nelem++; - - for (nep = ntfy_list_head; (nep != NULL && nelem); nep = nep->kn_next) { - nelem--; - - /* - * Check if this client is interested in the - * event. - */ - if (!(nep->kn_event_mask & event)) - continue; - - mutex_enter(&nep->kn_lock); - nep->kn_state = NTFY_RUNNING; - mutex_exit(&nep->kn_lock); - mutex_exit(&ntfy_list_lock); - - /* - * We invoke the callback routine with no locks held. Another - * client could have joined the list meanwhile. This is fine - * as we maintain nelem as stated above. The NULL check in the - * for loop guards against shrinkage. Also, any callers of - * crypto_unnotify_events() at this point cv_wait till kn_state - * changes to NTFY_WAITING. Hence, nep is assured to be valid. - */ - (*nep->kn_func)(event, event_arg); - - mutex_enter(&nep->kn_lock); - nep->kn_state = NTFY_WAITING; - cv_broadcast(&nep->kn_cv); - mutex_exit(&nep->kn_lock); - - mutex_enter(&ntfy_list_lock); - } - - mutex_exit(&ntfy_list_lock); -} - -#if defined(_KERNEL) -EXPORT_SYMBOL(crypto_mech2id); -#endif diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S new file mode 100644 index 000000000000..b0af629066ea --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S @@ -0,0 +1,2069 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale + * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de> + * + * This is converted assembly: SSE2 -> ARMv8-A + * Used tools: SIMDe https://github.com/simd-everywhere/simde + * + * Should work on FreeBSD, Linux and macOS + * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh + */ + +#if defined(__aarch64__) + +/* make gcc <= 9 happy */ +#if !defined(LD_VERSION) || LD_VERSION >= 233010000 +#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state +#else +#define CFI_NEGATE_RA_STATE +#endif + + .text + .section .note.gnu.property,"a",@note + .p2align 3 + .word 4 + .word 16 + .word 5 + .asciz "GNU" + .word 3221225472 + .word 4 + .word 3 + .word 0 +.Lsec_end0: + .text + .globl zfs_blake3_compress_in_place_sse2 + .p2align 2 + .type zfs_blake3_compress_in_place_sse2,@function +zfs_blake3_compress_in_place_sse2: + .cfi_startproc + hint #25 + CFI_NEGATE_RA_STATE + sub sp, sp, #96 + stp x29, x30, [sp, #64] + add x29, sp, #64 + str x19, [sp, #80] + .cfi_def_cfa w29, 32 + .cfi_offset w19, -16 + .cfi_offset w30, -24 + .cfi_offset w29, -32 + mov x19, x0 + mov w5, w4 + mov x4, x3 + mov w3, w2 + mov x2, x1 + mov x0, sp + mov x1, x19 + bl compress_pre + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + eor v0.16b, v2.16b, v0.16b + eor v1.16b, v3.16b, v1.16b + ldp x29, x30, [sp, #64] + stp q0, q1, [x19] + ldr x19, [sp, #80] + add sp, sp, #96 + hint #29 + ret +.Lfunc_end0: + .size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-zfs_blake3_compress_in_place_sse2 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI1_0: + .xword -4942790177982912921 + .xword -6534734903820487822 + .text + .p2align 2 + .type compress_pre,@function +compress_pre: + .cfi_startproc + hint #34 + fmov s1, w3 + movi d0, #0x0000ff000000ff + ldr q2, [x1] + fmov d3, x4 + adrp x8, .LCPI1_0 + mov v1.s[1], w5 + str q2, [x0] + ldr q4, [x8, :lo12:.LCPI1_0] + add x8, x2, #32 + ldr q5, [x1, #16] + and v0.8b, v1.8b, v0.8b + stp q5, q4, [x0, #16] + mov v3.d[1], v0.d[0] + str q3, [x0, #48] + ldp q0, q6, [x2] + uzp1 v1.4s, v0.4s, v6.4s + uzp2 v0.4s, v0.4s, v6.4s + add v2.4s, v2.4s, v1.4s + uzp1 v18.4s, v1.4s, v1.4s + add v2.4s, v2.4s, v5.4s + eor v3.16b, v2.16b, v3.16b + add v2.4s, v2.4s, v0.4s + rev32 v3.8h, v3.8h + add v4.4s, v3.4s, v4.4s + eor v5.16b, v4.16b, v5.16b + ushr v6.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v6.16b + add v2.4s, v2.4s, v5.4s + eor v3.16b, v2.16b, v3.16b + ushr v6.4s, v3.4s, #8 + shl v3.4s, v3.4s, #24 + orr v3.16b, v3.16b, v6.16b + ld2 { v6.4s, v7.4s }, [x8] + add v4.4s, v3.4s, v4.4s + ext v3.16b, v3.16b, v3.16b, #8 + add v2.4s, v2.4s, v6.4s + eor v5.16b, v4.16b, v5.16b + ext v4.16b, v4.16b, v4.16b, #4 + ext v6.16b, v6.16b, v6.16b, #12 + ext v2.16b, v2.16b, v2.16b, #12 + ushr v16.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + orr v5.16b, v5.16b, v16.16b + ext v16.16b, v7.16b, v7.16b, #12 + add v2.4s, v2.4s, v5.4s + mov v7.16b, v16.16b + eor v3.16b, v3.16b, v2.16b + add v2.4s, v2.4s, v16.4s + mov v7.s[1], v6.s[2] + rev32 v3.8h, v3.8h + add v4.4s, v4.4s, v3.4s + eor v5.16b, v4.16b, v5.16b + ushr v17.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v17.16b + add v2.4s, v2.4s, v5.4s + eor v3.16b, v2.16b, v3.16b + ushr v17.4s, v3.4s, #8 + shl v3.4s, v3.4s, #24 + orr v3.16b, v3.16b, v17.16b + ext v17.16b, v18.16b, v1.16b, #8 + add v4.4s, v3.4s, v4.4s + uzp2 v17.4s, v17.4s, v0.4s + ext v3.16b, v3.16b, v3.16b, #8 + eor v5.16b, v4.16b, v5.16b + add v2.4s, v2.4s, v17.4s + ext v4.16b, v4.16b, v4.16b, #12 + ushr v18.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + ext v2.16b, v2.16b, v2.16b, #4 + orr v5.16b, v5.16b, v18.16b + ext v18.16b, v1.16b, v1.16b, #12 + add v2.4s, v2.4s, v5.4s + ext v1.16b, v1.16b, v18.16b, #12 + zip1 v18.2d, v16.2d, v0.2d + zip2 v0.4s, v0.4s, v16.4s + eor v3.16b, v3.16b, v2.16b + rev64 v1.4s, v1.4s + mov v18.s[3], v6.s[3] + zip1 v16.4s, v0.4s, v6.4s + rev32 v3.8h, v3.8h + trn2 v1.4s, v1.4s, v7.4s + zip1 v0.4s, v6.4s, v0.4s + add v4.4s, v4.4s, v3.4s + add v2.4s, v2.4s, v1.4s + ext v6.16b, v0.16b, v16.16b, #8 + eor v5.16b, v4.16b, v5.16b + ushr v7.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v7.16b + add v7.4s, v2.4s, v5.4s + eor v2.16b, v7.16b, v3.16b + ext v7.16b, v7.16b, v7.16b, #12 + ushr v3.4s, v2.4s, #8 + shl v2.4s, v2.4s, #24 + orr v3.16b, v2.16b, v3.16b + ext v2.16b, v18.16b, v18.16b, #12 + add v4.4s, v3.4s, v4.4s + uzp1 v2.4s, v18.4s, v2.4s + ext v3.16b, v3.16b, v3.16b, #8 + eor v5.16b, v4.16b, v5.16b + add v7.4s, v7.4s, v2.4s + ext v4.16b, v4.16b, v4.16b, #4 + ushr v18.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + orr v5.16b, v5.16b, v18.16b + add v7.4s, v7.4s, v5.4s + eor v3.16b, v3.16b, v7.16b + add v7.4s, v7.4s, v6.4s + rev32 v3.8h, v3.8h + add v4.4s, v4.4s, v3.4s + eor v5.16b, v4.16b, v5.16b + ushr v0.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v0.16b, v5.16b, v0.16b + add v5.4s, v7.4s, v0.4s + ext v7.16b, v17.16b, v17.16b, #4 + eor v3.16b, v5.16b, v3.16b + uzp1 v17.4s, v7.4s, v7.4s + ushr v16.4s, v3.4s, #8 + shl v3.4s, v3.4s, #24 + orr v3.16b, v3.16b, v16.16b + ext v16.16b, v17.16b, v7.16b, #8 + add v4.4s, v3.4s, v4.4s + uzp2 v16.4s, v16.4s, v1.4s + ext v3.16b, v3.16b, v3.16b, #8 + eor v0.16b, v4.16b, v0.16b + add v5.4s, v5.4s, v16.4s + ext v4.16b, v4.16b, v4.16b, #12 + ushr v17.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ext v5.16b, v5.16b, v5.16b, #4 + orr v0.16b, v0.16b, v17.16b + ext v17.16b, v7.16b, v7.16b, #12 + add v5.4s, v5.4s, v0.4s + ext v7.16b, v7.16b, v17.16b, #12 + mov v17.16b, v6.16b + eor v3.16b, v3.16b, v5.16b + rev64 v7.4s, v7.4s + mov v17.s[1], v2.s[2] + rev32 v3.8h, v3.8h + add v4.4s, v4.4s, v3.4s + eor v18.16b, v4.16b, v0.16b + trn2 v0.4s, v7.4s, v17.4s + ushr v7.4s, v18.4s, #12 + shl v17.4s, v18.4s, #20 + add v5.4s, v5.4s, v0.4s + zip1 v18.2d, v6.2d, v1.2d + zip2 v1.4s, v1.4s, v6.4s + orr v7.16b, v17.16b, v7.16b + mov v18.s[3], v2.s[3] + zip1 v6.4s, v1.4s, v2.4s + add v5.4s, v5.4s, v7.4s + zip1 v1.4s, v2.4s, v1.4s + eor v3.16b, v5.16b, v3.16b + ext v5.16b, v5.16b, v5.16b, #12 + ext v6.16b, v1.16b, v6.16b, #8 + ushr v17.4s, v3.4s, #8 + shl v3.4s, v3.4s, #24 + orr v17.16b, v3.16b, v17.16b + ext v3.16b, v18.16b, v18.16b, #12 + add v4.4s, v17.4s, v4.4s + uzp1 v3.4s, v18.4s, v3.4s + ext v17.16b, v17.16b, v17.16b, #8 + eor v7.16b, v4.16b, v7.16b + add v5.4s, v5.4s, v3.4s + ext v4.16b, v4.16b, v4.16b, #4 + ushr v18.4s, v7.4s, #7 + shl v7.4s, v7.4s, #25 + orr v7.16b, v7.16b, v18.16b + add v5.4s, v5.4s, v7.4s + eor v17.16b, v17.16b, v5.16b + add v5.4s, v5.4s, v6.4s + rev32 v17.8h, v17.8h + add v4.4s, v4.4s, v17.4s + eor v2.16b, v4.16b, v7.16b + ext v7.16b, v16.16b, v16.16b, #4 + ushr v1.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + orr v1.16b, v2.16b, v1.16b + add v2.4s, v5.4s, v1.4s + eor v5.16b, v2.16b, v17.16b + uzp1 v17.4s, v7.4s, v7.4s + ushr v16.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + orr v5.16b, v5.16b, v16.16b + ext v16.16b, v17.16b, v7.16b, #8 + add v4.4s, v5.4s, v4.4s + uzp2 v16.4s, v16.4s, v0.4s + ext v5.16b, v5.16b, v5.16b, #8 + eor v1.16b, v4.16b, v1.16b + add v2.4s, v2.4s, v16.4s + ext v4.16b, v4.16b, v4.16b, #12 + ushr v17.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + ext v2.16b, v2.16b, v2.16b, #4 + orr v1.16b, v1.16b, v17.16b + ext v17.16b, v7.16b, v7.16b, #12 + add v2.4s, v2.4s, v1.4s + ext v7.16b, v7.16b, v17.16b, #12 + mov v17.16b, v6.16b + eor v5.16b, v5.16b, v2.16b + rev64 v7.4s, v7.4s + mov v17.s[1], v3.s[2] + rev32 v5.8h, v5.8h + add v4.4s, v4.4s, v5.4s + eor v18.16b, v4.16b, v1.16b + trn2 v1.4s, v7.4s, v17.4s + ushr v7.4s, v18.4s, #12 + shl v17.4s, v18.4s, #20 + add v2.4s, v2.4s, v1.4s + zip1 v18.2d, v6.2d, v0.2d + zip2 v0.4s, v0.4s, v6.4s + orr v7.16b, v17.16b, v7.16b + mov v18.s[3], v3.s[3] + add v2.4s, v2.4s, v7.4s + eor v5.16b, v2.16b, v5.16b + ext v2.16b, v2.16b, v2.16b, #12 + ushr v17.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + orr v5.16b, v5.16b, v17.16b + add v17.4s, v5.4s, v4.4s + ext v4.16b, v18.16b, v18.16b, #12 + ext v5.16b, v5.16b, v5.16b, #8 + eor v7.16b, v17.16b, v7.16b + uzp1 v4.4s, v18.4s, v4.4s + ext v17.16b, v17.16b, v17.16b, #4 + ushr v18.4s, v7.4s, #7 + shl v7.4s, v7.4s, #25 + add v2.4s, v2.4s, v4.4s + orr v7.16b, v7.16b, v18.16b + add v2.4s, v2.4s, v7.4s + eor v5.16b, v5.16b, v2.16b + rev32 v5.8h, v5.8h + add v6.4s, v17.4s, v5.4s + zip1 v17.4s, v0.4s, v3.4s + zip1 v0.4s, v3.4s, v0.4s + eor v3.16b, v6.16b, v7.16b + ext v0.16b, v0.16b, v17.16b, #8 + ushr v7.4s, v3.4s, #12 + shl v3.4s, v3.4s, #20 + add v2.4s, v2.4s, v0.4s + orr v3.16b, v3.16b, v7.16b + ext v7.16b, v16.16b, v16.16b, #4 + add v2.4s, v2.4s, v3.4s + uzp1 v17.4s, v7.4s, v7.4s + eor v5.16b, v2.16b, v5.16b + ushr v16.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + orr v5.16b, v5.16b, v16.16b + ext v16.16b, v17.16b, v7.16b, #8 + add v6.4s, v5.4s, v6.4s + uzp2 v16.4s, v16.4s, v1.4s + ext v5.16b, v5.16b, v5.16b, #8 + eor v3.16b, v6.16b, v3.16b + add v2.4s, v2.4s, v16.4s + ext v6.16b, v6.16b, v6.16b, #12 + ushr v17.4s, v3.4s, #7 + shl v3.4s, v3.4s, #25 + ext v2.16b, v2.16b, v2.16b, #4 + orr v3.16b, v3.16b, v17.16b + add v17.4s, v2.4s, v3.4s + eor v2.16b, v5.16b, v17.16b + ext v5.16b, v7.16b, v7.16b, #12 + rev32 v18.8h, v2.8h + ext v2.16b, v7.16b, v5.16b, #12 + mov v5.16b, v0.16b + add v6.4s, v6.4s, v18.4s + rev64 v2.4s, v2.4s + mov v5.s[1], v4.s[2] + eor v3.16b, v6.16b, v3.16b + trn2 v2.4s, v2.4s, v5.4s + ushr v5.4s, v3.4s, #12 + shl v3.4s, v3.4s, #20 + add v7.4s, v17.4s, v2.4s + orr v3.16b, v3.16b, v5.16b + add v5.4s, v7.4s, v3.4s + eor v7.16b, v5.16b, v18.16b + zip1 v18.2d, v0.2d, v1.2d + ext v5.16b, v5.16b, v5.16b, #12 + zip2 v0.4s, v1.4s, v0.4s + ushr v17.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + mov v18.s[3], v4.s[3] + orr v7.16b, v7.16b, v17.16b + ext v17.16b, v18.16b, v18.16b, #12 + add v6.4s, v7.4s, v6.4s + ext v7.16b, v7.16b, v7.16b, #8 + eor v19.16b, v6.16b, v3.16b + uzp1 v3.4s, v18.4s, v17.4s + ext v6.16b, v6.16b, v6.16b, #4 + ushr v17.4s, v19.4s, #7 + shl v18.4s, v19.4s, #25 + add v5.4s, v5.4s, v3.4s + orr v17.16b, v18.16b, v17.16b + add v5.4s, v5.4s, v17.4s + eor v7.16b, v7.16b, v5.16b + rev32 v7.8h, v7.8h + add v1.4s, v6.4s, v7.4s + zip1 v6.4s, v0.4s, v4.4s + zip1 v0.4s, v4.4s, v0.4s + eor v4.16b, v1.16b, v17.16b + ext v6.16b, v0.16b, v6.16b, #8 + ushr v0.4s, v4.4s, #12 + shl v4.4s, v4.4s, #20 + add v5.4s, v5.4s, v6.4s + zip1 v20.2d, v6.2d, v2.2d + orr v0.16b, v4.16b, v0.16b + mov v20.s[3], v3.s[3] + add v4.4s, v5.4s, v0.4s + eor v5.16b, v4.16b, v7.16b + ext v7.16b, v16.16b, v16.16b, #4 + ushr v16.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + uzp1 v17.4s, v7.4s, v7.4s + orr v5.16b, v5.16b, v16.16b + ext v16.16b, v17.16b, v7.16b, #8 + add v1.4s, v5.4s, v1.4s + uzp2 v16.4s, v16.4s, v2.4s + zip2 v2.4s, v2.4s, v6.4s + eor v0.16b, v1.16b, v0.16b + add v4.4s, v4.4s, v16.4s + ext v1.16b, v1.16b, v1.16b, #12 + ext v16.16b, v16.16b, v16.16b, #4 + ushr v17.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ext v4.16b, v4.16b, v4.16b, #4 + orr v17.16b, v0.16b, v17.16b + ext v0.16b, v5.16b, v5.16b, #8 + ext v5.16b, v7.16b, v7.16b, #12 + add v4.4s, v4.4s, v17.4s + eor v0.16b, v0.16b, v4.16b + rev32 v18.8h, v0.8h + ext v0.16b, v7.16b, v5.16b, #12 + mov v5.16b, v6.16b + add v7.4s, v1.4s, v18.4s + rev64 v1.4s, v0.4s + mov v5.s[1], v3.s[2] + eor v17.16b, v7.16b, v17.16b + trn2 v1.4s, v1.4s, v5.4s + ushr v19.4s, v17.4s, #12 + shl v17.4s, v17.4s, #20 + add v4.4s, v4.4s, v1.4s + orr v17.16b, v17.16b, v19.16b + add v19.4s, v4.4s, v17.4s + eor v4.16b, v19.16b, v18.16b + ext v19.16b, v19.16b, v19.16b, #12 + ushr v18.4s, v4.4s, #8 + shl v4.4s, v4.4s, #24 + orr v18.16b, v4.16b, v18.16b + ext v4.16b, v20.16b, v20.16b, #12 + add v7.4s, v18.4s, v7.4s + uzp1 v4.4s, v20.4s, v4.4s + ext v18.16b, v18.16b, v18.16b, #8 + eor v17.16b, v7.16b, v17.16b + add v19.4s, v19.4s, v4.4s + ext v7.16b, v7.16b, v7.16b, #4 + ushr v20.4s, v17.4s, #7 + shl v17.4s, v17.4s, #25 + orr v17.16b, v17.16b, v20.16b + add v19.4s, v19.4s, v17.4s + eor v18.16b, v18.16b, v19.16b + rev32 v18.8h, v18.8h + add v6.4s, v7.4s, v18.4s + zip1 v7.4s, v2.4s, v3.4s + zip1 v2.4s, v3.4s, v2.4s + eor v3.16b, v6.16b, v17.16b + ext v2.16b, v2.16b, v7.16b, #8 + ushr v7.4s, v3.4s, #12 + shl v3.4s, v3.4s, #20 + add v17.4s, v19.4s, v2.4s + zip1 v1.2d, v2.2d, v1.2d + zip2 v0.4s, v0.4s, v2.4s + orr v3.16b, v3.16b, v7.16b + mov v1.s[3], v4.s[3] + add v7.4s, v17.4s, v3.4s + eor v17.16b, v7.16b, v18.16b + ext v7.16b, v7.16b, v7.16b, #4 + ushr v18.4s, v17.4s, #8 + shl v17.4s, v17.4s, #24 + orr v17.16b, v17.16b, v18.16b + ext v18.16b, v16.16b, v16.16b, #8 + add v6.4s, v17.4s, v6.4s + uzp2 v5.4s, v18.4s, v5.4s + eor v3.16b, v6.16b, v3.16b + ext v5.16b, v5.16b, v18.16b, #4 + ext v6.16b, v6.16b, v6.16b, #12 + ushr v18.4s, v3.4s, #7 + shl v3.4s, v3.4s, #25 + add v5.4s, v7.4s, v5.4s + ext v7.16b, v17.16b, v17.16b, #8 + ext v17.16b, v16.16b, v16.16b, #12 + orr v3.16b, v3.16b, v18.16b + ext v16.16b, v16.16b, v17.16b, #12 + add v5.4s, v3.4s, v5.4s + mov v17.16b, v2.16b + rev64 v16.4s, v16.4s + eor v7.16b, v7.16b, v5.16b + mov v17.s[1], v4.s[2] + rev32 v7.8h, v7.8h + trn2 v16.4s, v16.4s, v17.4s + add v6.4s, v6.4s, v7.4s + add v5.4s, v5.4s, v16.4s + eor v3.16b, v6.16b, v3.16b + ushr v17.4s, v3.4s, #12 + shl v3.4s, v3.4s, #20 + orr v3.16b, v3.16b, v17.16b + add v5.4s, v5.4s, v3.4s + eor v7.16b, v5.16b, v7.16b + ext v5.16b, v5.16b, v5.16b, #12 + ushr v16.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + orr v7.16b, v7.16b, v16.16b + ext v16.16b, v1.16b, v1.16b, #12 + add v6.4s, v7.4s, v6.4s + uzp1 v1.4s, v1.4s, v16.4s + eor v3.16b, v6.16b, v3.16b + add v1.4s, v5.4s, v1.4s + ext v5.16b, v7.16b, v7.16b, #8 + ext v6.16b, v6.16b, v6.16b, #4 + ushr v16.4s, v3.4s, #7 + shl v3.4s, v3.4s, #25 + orr v3.16b, v3.16b, v16.16b + add v1.4s, v1.4s, v3.4s + eor v5.16b, v5.16b, v1.16b + rev32 v5.8h, v5.8h + add v2.4s, v6.4s, v5.4s + zip1 v6.4s, v0.4s, v4.4s + zip1 v0.4s, v4.4s, v0.4s + eor v3.16b, v2.16b, v3.16b + ext v0.16b, v0.16b, v6.16b, #8 + ushr v4.4s, v3.4s, #12 + shl v3.4s, v3.4s, #20 + add v0.4s, v1.4s, v0.4s + orr v1.16b, v3.16b, v4.16b + add v0.4s, v0.4s, v1.4s + eor v3.16b, v0.16b, v5.16b + ext v0.16b, v0.16b, v0.16b, #4 + ushr v4.4s, v3.4s, #8 + shl v3.4s, v3.4s, #24 + orr v3.16b, v3.16b, v4.16b + add v2.4s, v3.4s, v2.4s + ext v3.16b, v3.16b, v3.16b, #8 + eor v1.16b, v2.16b, v1.16b + ext v2.16b, v2.16b, v2.16b, #12 + ushr v4.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + stp q2, q3, [x0, #32] + orr v1.16b, v1.16b, v4.16b + stp q0, q1, [x0] + ret +.Lfunc_end1: + .size compress_pre, .Lfunc_end1-compress_pre + .cfi_endproc + + .globl zfs_blake3_compress_xof_sse2 + .p2align 2 + .type zfs_blake3_compress_xof_sse2,@function +zfs_blake3_compress_xof_sse2: + .cfi_startproc + hint #25 + CFI_NEGATE_RA_STATE + sub sp, sp, #96 + stp x29, x30, [sp, #64] + add x29, sp, #64 + stp x20, x19, [sp, #80] + .cfi_def_cfa w29, 32 + .cfi_offset w19, -8 + .cfi_offset w20, -16 + .cfi_offset w30, -24 + .cfi_offset w29, -32 + mov x20, x0 + mov x19, x5 + mov w5, w4 + mov x4, x3 + mov w3, w2 + mov x2, x1 + mov x0, sp + mov x1, x20 + bl compress_pre + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + eor v0.16b, v2.16b, v0.16b + eor v1.16b, v3.16b, v1.16b + ldp x29, x30, [sp, #64] + stp q0, q1, [x19] + ldr q0, [x20] + eor v0.16b, v0.16b, v2.16b + str q0, [x19, #32] + ldr q0, [x20, #16] + eor v0.16b, v0.16b, v3.16b + str q0, [x19, #48] + ldp x20, x19, [sp, #80] + add sp, sp, #96 + hint #29 + ret +.Lfunc_end2: + .size zfs_blake3_compress_xof_sse2, .Lfunc_end2-zfs_blake3_compress_xof_sse2 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI3_0: + .word 0 + .word 1 + .word 2 + .word 3 + .text + .globl zfs_blake3_hash_many_sse2 + .p2align 2 + .type zfs_blake3_hash_many_sse2,@function +zfs_blake3_hash_many_sse2: + .cfi_startproc + hint #25 + CFI_NEGATE_RA_STATE + stp d15, d14, [sp, #-160]! + stp d13, d12, [sp, #16] + stp d11, d10, [sp, #32] + stp d9, d8, [sp, #48] + stp x29, x30, [sp, #64] + add x29, sp, #64 + stp x28, x27, [sp, #80] + stp x26, x25, [sp, #96] + stp x24, x23, [sp, #112] + stp x22, x21, [sp, #128] + stp x20, x19, [sp, #144] + sub sp, sp, #464 + .cfi_def_cfa w29, 96 + .cfi_offset w19, -8 + .cfi_offset w20, -16 + .cfi_offset w21, -24 + .cfi_offset w22, -32 + .cfi_offset w23, -40 + .cfi_offset w24, -48 + .cfi_offset w25, -56 + .cfi_offset w26, -64 + .cfi_offset w27, -72 + .cfi_offset w28, -80 + .cfi_offset w30, -88 + .cfi_offset w29, -96 + .cfi_offset b8, -104 + .cfi_offset b9, -112 + .cfi_offset b10, -120 + .cfi_offset b11, -128 + .cfi_offset b12, -136 + .cfi_offset b13, -144 + .cfi_offset b14, -152 + .cfi_offset b15, -160 + mov w19, w6 + mov x20, x4 + mov x24, x1 + ldr x26, [x29, #104] + ldrb w27, [x29, #96] + cmp x1, #4 + str x3, [sp, #40] + b.lo .LBB3_6 + adrp x8, .LCPI3_0 + sbfx w9, w5, #0, #1 + mov w10, #44677 + mov w11, #62322 + movk w10, #47975, lsl #16 + movk w11, #15470, lsl #16 + ldr q0, [x8, :lo12:.LCPI3_0] + dup v1.4s, w9 + mov w9, #58983 + orr w8, w7, w19 + movk w9, #27145, lsl #16 + and v0.16b, v1.16b, v0.16b + dup v1.4s, w11 + movi v24.4s, #64 + dup v2.4s, w9 + mov w9, #62778 + movk w9, #42319, lsl #16 + str q0, [sp, #16] + orr v0.4s, #128, lsl #24 + stp q2, q1, [sp, #48] + str q0, [sp] + dup v0.4s, w10 + str q0, [sp, #80] + b .LBB3_3 +.LBB3_2: + zip1 v0.4s, v12.4s, v31.4s + add x10, x20, #4 + zip1 v1.4s, v29.4s, v30.4s + tst w5, #0x1 + zip1 v2.4s, v28.4s, v23.4s + csel x20, x10, x20, ne + zip1 v3.4s, v13.4s, v25.4s + add x0, x0, #32 + zip2 v6.4s, v12.4s, v31.4s + sub x24, x24, #4 + zip1 v4.2d, v0.2d, v1.2d + cmp x24, #3 + zip2 v7.4s, v29.4s, v30.4s + zip1 v5.2d, v2.2d, v3.2d + zip2 v0.2d, v0.2d, v1.2d + zip2 v1.2d, v2.2d, v3.2d + zip2 v2.4s, v28.4s, v23.4s + zip2 v3.4s, v13.4s, v25.4s + stp q4, q5, [x26] + zip2 v4.2d, v6.2d, v7.2d + stp q0, q1, [x26, #32] + zip1 v0.2d, v6.2d, v7.2d + zip1 v1.2d, v2.2d, v3.2d + zip2 v2.2d, v2.2d, v3.2d + stp q0, q1, [x26, #64] + stp q4, q2, [x26, #96] + add x26, x26, #128 + b.ls .LBB3_6 +.LBB3_3: + ldr x14, [sp, #40] + mov x10, x14 + add x11, x14, #8 + add x12, x14, #12 + add x13, x14, #16 + ld1r { v12.4s }, [x10], #4 + ld1r { v29.4s }, [x11] + add x11, x14, #20 + ld1r { v30.4s }, [x12] + add x12, x14, #24 + ld1r { v28.4s }, [x13] + ld1r { v23.4s }, [x11] + add x11, x14, #28 + ld1r { v13.4s }, [x12] + ld1r { v31.4s }, [x10] + ld1r { v25.4s }, [x11] + cbz x2, .LBB3_2 + ldr q1, [sp, #16] + dup v0.4s, w20 + lsr x12, x20, #32 + mov x10, xzr + ldp x13, x14, [x0, #16] + add v1.4s, v0.4s, v1.4s + mov x15, x2 + movi v0.4s, #128, lsl #24 + mov w4, w8 + str q1, [sp, #112] + eor v0.16b, v1.16b, v0.16b + ldr q1, [sp] + cmgt v0.4s, v1.4s, v0.4s + dup v1.4s, w12 + ldp x11, x12, [x0] + sub v0.4s, v1.4s, v0.4s + str q0, [sp, #96] +.LBB3_5: + add x17, x11, x10 + add x21, x12, x10 + add x16, x13, x10 + add x6, x14, x10 + subs x15, x15, #1 + add x10, x10, #64 + ldp q0, q1, [x17] + csel w3, w27, wzr, eq + orr w3, w3, w4 + mov w4, w19 + and w3, w3, #0xff + ldp q3, q6, [x21] + dup v2.4s, w3 + zip1 v21.4s, v0.4s, v3.4s + zip2 v19.4s, v0.4s, v3.4s + ldp q5, q7, [x16] + zip1 v17.4s, v1.4s, v6.4s + zip2 v22.4s, v1.4s, v6.4s + ldp q16, q18, [x6] + zip1 v4.4s, v5.4s, v16.4s + zip2 v0.4s, v5.4s, v16.4s + ldp q26, q27, [x17, #32] + zip1 v1.4s, v7.4s, v18.4s + zip2 v3.4s, v7.4s, v18.4s + zip2 v20.2d, v19.2d, v0.2d + mov v19.d[1], v0.d[0] + dup v18.4s, w9 + ldp q8, q9, [x21, #32] + stur q19, [x29, #-208] + zip2 v7.4s, v26.4s, v8.4s + zip1 v10.4s, v26.4s, v8.4s + ldp q11, q5, [x16, #32] + zip2 v26.2d, v17.2d, v1.2d + stp q7, q26, [sp, #192] + mov v17.d[1], v1.d[0] + add v1.4s, v23.4s, v31.4s + ldp q16, q6, [x6, #32] + stur q17, [x29, #-256] + add v1.4s, v1.4s, v19.4s + zip1 v8.4s, v11.4s, v16.4s + zip2 v7.4s, v11.4s, v16.4s + zip1 v11.4s, v27.4s, v9.4s + zip2 v9.4s, v27.4s, v9.4s + zip2 v27.2d, v21.2d, v4.2d + mov v21.d[1], v4.d[0] + str q7, [sp, #224] + add v4.4s, v28.4s, v12.4s + zip1 v15.4s, v5.4s, v6.4s + zip2 v14.4s, v5.4s, v6.4s + stur q27, [x29, #-192] + zip2 v16.2d, v22.2d, v3.2d + stp q20, q21, [x29, #-240] + add v0.4s, v4.4s, v21.4s + ldp q6, q4, [sp, #96] + mov v22.d[1], v3.d[0] + add v5.4s, v25.4s, v30.4s + add v3.4s, v13.4s, v29.4s + eor v6.16b, v1.16b, v6.16b + add v1.4s, v1.4s, v20.4s + str q22, [sp, #256] + eor v4.16b, v0.16b, v4.16b + add v5.4s, v5.4s, v22.4s + add v3.4s, v3.4s, v17.4s + ldr q17, [sp, #48] + rev32 v6.8h, v6.8h + rev32 v4.8h, v4.8h + eor v2.16b, v5.16b, v2.16b + eor v7.16b, v3.16b, v24.16b + add v0.4s, v0.4s, v27.4s + add v21.4s, v4.4s, v17.4s + rev32 v31.8h, v2.8h + ldr q2, [sp, #80] + rev32 v7.8h, v7.8h + mov v27.16b, v16.16b + eor v17.16b, v21.16b, v28.16b + add v29.4s, v6.4s, v2.4s + ldr q2, [sp, #64] + add v24.4s, v31.4s, v18.4s + str q27, [sp, #176] + ushr v19.4s, v17.4s, #12 + shl v17.4s, v17.4s, #20 + add v30.4s, v7.4s, v2.4s + eor v18.16b, v29.16b, v23.16b + orr v12.16b, v17.16b, v19.16b + eor v17.16b, v30.16b, v13.16b + eor v19.16b, v24.16b, v25.16b + ushr v23.4s, v18.4s, #12 + shl v18.4s, v18.4s, #20 + ushr v25.4s, v17.4s, #12 + shl v17.4s, v17.4s, #20 + ushr v28.4s, v19.4s, #12 + shl v19.4s, v19.4s, #20 + orr v13.16b, v18.16b, v23.16b + orr v25.16b, v17.16b, v25.16b + orr v2.16b, v19.16b, v28.16b + add v28.4s, v0.4s, v12.4s + add v0.4s, v3.4s, v26.4s + add v18.4s, v1.4s, v13.4s + add v3.4s, v5.4s, v16.4s + eor v1.16b, v28.16b, v4.16b + add v17.4s, v0.4s, v25.4s + eor v0.16b, v18.16b, v6.16b + add v19.4s, v3.4s, v2.4s + ushr v16.4s, v1.4s, #8 + shl v3.4s, v1.4s, #24 + eor v4.16b, v17.16b, v7.16b + ushr v6.4s, v0.4s, #8 + shl v1.4s, v0.4s, #24 + eor v5.16b, v19.16b, v31.16b + ushr v23.4s, v4.4s, #8 + shl v4.4s, v4.4s, #24 + orr v7.16b, v3.16b, v16.16b + orr v6.16b, v1.16b, v6.16b + ushr v31.4s, v5.4s, #8 + shl v0.4s, v5.4s, #24 + orr v5.16b, v4.16b, v23.16b + add v4.4s, v7.4s, v21.4s + ldr q21, [sp, #192] + add v3.4s, v6.4s, v29.4s + orr v31.16b, v0.16b, v31.16b + add v23.4s, v5.4s, v30.4s + eor v0.16b, v4.16b, v12.16b + eor v1.16b, v3.16b, v13.16b + add v16.4s, v31.4s, v24.4s + eor v20.16b, v23.16b, v25.16b + ushr v24.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v29.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + ushr v30.4s, v20.4s, #7 + shl v20.4s, v20.4s, #25 + orr v25.16b, v0.16b, v24.16b + orr v0.16b, v1.16b, v29.16b + mov v29.16b, v10.16b + orr v1.16b, v20.16b, v30.16b + mov v20.16b, v10.16b + mov v24.16b, v21.16b + ldr q20, [sp, #224] + mov v29.d[1], v8.d[0] + mov v13.16b, v9.16b + zip2 v30.2d, v10.2d, v8.2d + zip2 v8.2d, v21.2d, v20.2d + mov v26.16b, v11.16b + mov v24.d[1], v20.d[0] + add v20.4s, v28.4s, v29.4s + mov v13.d[1], v14.d[0] + str q8, [sp, #128] + eor v2.16b, v16.16b, v2.16b + mov v26.d[1], v15.d[0] + str q24, [sp, #192] + add v20.4s, v20.4s, v0.4s + add v19.4s, v19.4s, v13.4s + ushr v12.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + zip2 v10.2d, v9.2d, v14.2d + add v18.4s, v18.4s, v24.4s + add v17.4s, v17.4s, v26.4s + mov v14.16b, v26.16b + eor v26.16b, v20.16b, v31.16b + stp q10, q30, [sp, #224] + add v19.4s, v19.4s, v25.4s + orr v2.16b, v2.16b, v12.16b + add v18.4s, v18.4s, v1.4s + rev32 v26.8h, v26.8h + eor v5.16b, v19.16b, v5.16b + add v17.4s, v17.4s, v2.4s + eor v7.16b, v18.16b, v7.16b + add v23.4s, v23.4s, v26.4s + rev32 v5.8h, v5.8h + eor v6.16b, v17.16b, v6.16b + rev32 v7.8h, v7.8h + eor v0.16b, v23.16b, v0.16b + add v3.4s, v3.4s, v5.4s + rev32 v6.8h, v6.8h + add v16.4s, v16.4s, v7.4s + ushr v31.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v25.16b, v3.16b, v25.16b + add v4.4s, v4.4s, v6.4s + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + add v20.4s, v20.4s, v30.4s + zip2 v21.2d, v11.2d, v15.2d + ushr v11.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v31.16b + add v19.4s, v19.4s, v10.4s + add v20.4s, v20.4s, v0.4s + orr v1.16b, v1.16b, v11.16b + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + add v18.4s, v18.4s, v8.4s + add v19.4s, v19.4s, v25.4s + eor v26.16b, v20.16b, v26.16b + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v1.4s + eor v5.16b, v19.16b, v5.16b + ushr v31.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v17.4s, v17.4s, v2.4s + ushr v11.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + eor v7.16b, v18.16b, v7.16b + orr v26.16b, v26.16b, v31.16b + eor v6.16b, v17.16b, v6.16b + orr v5.16b, v5.16b, v11.16b + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + add v23.4s, v26.4s, v23.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + orr v7.16b, v7.16b, v31.16b + add v3.4s, v5.4s, v3.4s + eor v0.16b, v23.16b, v0.16b + ldp q28, q12, [x29, #-256] + orr v6.16b, v6.16b, v11.16b + add v16.4s, v7.4s, v16.4s + eor v25.16b, v3.16b, v25.16b + ushr v31.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v4.4s, v6.4s, v4.4s + ushr v11.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + add v18.4s, v18.4s, v12.4s + mov v15.16b, v29.16b + ldur q29, [x29, #-208] + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v11.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + str q15, [sp, #160] + add v20.4s, v20.4s, v29.4s + add v18.4s, v18.4s, v0.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v27.4s + eor v6.16b, v6.16b, v18.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v28.4s + eor v7.16b, v7.16b, v20.16b + add v17.4s, v17.4s, v1.4s + rev32 v6.8h, v6.8h + add v19.4s, v19.4s, v2.4s + rev32 v7.8h, v7.8h + eor v5.16b, v17.16b, v5.16b + add v3.4s, v3.4s, v6.4s + eor v26.16b, v19.16b, v26.16b + add v4.4s, v4.4s, v7.4s + rev32 v5.8h, v5.8h + eor v0.16b, v3.16b, v0.16b + rev32 v26.8h, v26.8h + eor v25.16b, v4.16b, v25.16b + add v23.4s, v23.4s, v5.4s + ushr v11.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v16.4s, v16.4s, v26.4s + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v18.4s, v18.4s, v24.4s + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v20.4s, v20.4s, v22.4s + add v18.4s, v18.4s, v0.4s + mov v9.16b, v30.16b + mov v30.16b, v21.16b + ldur q21, [x29, #-224] + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + str q30, [sp, #144] + add v17.4s, v17.4s, v21.4s + ldur q21, [x29, #-192] + eor v6.16b, v18.16b, v6.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v30.4s + eor v7.16b, v20.16b, v7.16b + add v17.4s, v17.4s, v1.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + add v19.4s, v19.4s, v2.4s + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + eor v5.16b, v17.16b, v5.16b + orr v6.16b, v6.16b, v11.16b + eor v26.16b, v19.16b, v26.16b + orr v7.16b, v7.16b, v31.16b + ushr v31.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + add v3.4s, v6.4s, v3.4s + ushr v11.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v4.4s, v7.4s, v4.4s + orr v5.16b, v5.16b, v31.16b + eor v0.16b, v3.16b, v0.16b + orr v26.16b, v26.16b, v11.16b + eor v25.16b, v4.16b, v25.16b + add v23.4s, v5.4s, v23.4s + ushr v11.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v16.4s, v26.4s, v16.4s + ushr v31.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v20.4s, v20.4s, v21.4s + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v0.4s + add v19.4s, v19.4s, v10.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v18.4s, v18.4s, v14.4s + eor v26.16b, v20.16b, v26.16b + add v19.4s, v19.4s, v25.4s + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v9.4s + ldr q9, [sp, #208] + add v18.4s, v18.4s, v1.4s + rev32 v26.8h, v26.8h + eor v5.16b, v19.16b, v5.16b + add v17.4s, v17.4s, v2.4s + eor v7.16b, v18.16b, v7.16b + add v23.4s, v23.4s, v26.4s + rev32 v5.8h, v5.8h + eor v6.16b, v17.16b, v6.16b + rev32 v7.8h, v7.8h + eor v0.16b, v23.16b, v0.16b + add v3.4s, v3.4s, v5.4s + rev32 v6.8h, v6.8h + add v16.4s, v16.4s, v7.4s + ushr v31.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v25.16b, v3.16b, v25.16b + add v4.4s, v4.4s, v6.4s + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + add v20.4s, v20.4s, v8.4s + ushr v11.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v31.16b + add v19.4s, v19.4s, v15.4s + add v20.4s, v20.4s, v0.4s + orr v1.16b, v1.16b, v11.16b + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + add v18.4s, v18.4s, v9.4s + add v19.4s, v19.4s, v25.4s + eor v26.16b, v20.16b, v26.16b + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v13.4s + add v18.4s, v18.4s, v1.4s + eor v5.16b, v19.16b, v5.16b + ushr v31.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v17.4s, v17.4s, v2.4s + ushr v11.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + eor v7.16b, v18.16b, v7.16b + orr v26.16b, v26.16b, v31.16b + eor v6.16b, v17.16b, v6.16b + orr v5.16b, v5.16b, v11.16b + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + add v23.4s, v26.4s, v23.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + orr v7.16b, v7.16b, v31.16b + add v3.4s, v5.4s, v3.4s + eor v0.16b, v23.16b, v0.16b + orr v6.16b, v6.16b, v11.16b + add v16.4s, v7.4s, v16.4s + eor v25.16b, v3.16b, v25.16b + ushr v31.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v4.4s, v6.4s, v4.4s + ushr v11.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + add v18.4s, v18.4s, v24.4s + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v11.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v12.4s + add v18.4s, v18.4s, v0.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v30.4s + eor v6.16b, v6.16b, v18.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v27.4s + eor v7.16b, v7.16b, v20.16b + add v17.4s, v17.4s, v1.4s + rev32 v6.8h, v6.8h + add v19.4s, v19.4s, v2.4s + rev32 v7.8h, v7.8h + eor v5.16b, v17.16b, v5.16b + add v3.4s, v3.4s, v6.4s + eor v26.16b, v19.16b, v26.16b + add v4.4s, v4.4s, v7.4s + rev32 v5.8h, v5.8h + eor v0.16b, v3.16b, v0.16b + rev32 v26.8h, v26.8h + eor v25.16b, v4.16b, v25.16b + add v23.4s, v23.4s, v5.4s + ushr v11.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v16.4s, v16.4s, v26.4s + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v18.4s, v18.4s, v14.4s + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v20.4s, v20.4s, v28.4s + add v18.4s, v18.4s, v0.4s + mov v10.16b, v13.16b + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v29.4s + eor v6.16b, v18.16b, v6.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v10.4s + eor v7.16b, v20.16b, v7.16b + add v17.4s, v17.4s, v1.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + add v19.4s, v19.4s, v2.4s + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + eor v5.16b, v17.16b, v5.16b + orr v6.16b, v6.16b, v11.16b + eor v26.16b, v19.16b, v26.16b + orr v7.16b, v7.16b, v31.16b + ushr v31.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + add v3.4s, v6.4s, v3.4s + ushr v11.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v4.4s, v7.4s, v4.4s + orr v5.16b, v5.16b, v31.16b + eor v0.16b, v3.16b, v0.16b + mov v22.16b, v8.16b + ldp q8, q28, [sp, #240] + orr v26.16b, v26.16b, v11.16b + eor v25.16b, v4.16b, v25.16b + add v23.4s, v5.4s, v23.4s + ushr v11.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v16.4s, v26.4s, v16.4s + ushr v31.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v20.4s, v20.4s, v28.4s + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v0.4s + add v19.4s, v19.4s, v15.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v18.4s, v18.4s, v8.4s + eor v26.16b, v20.16b, v26.16b + add v19.4s, v19.4s, v25.4s + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v22.4s + ldur q22, [x29, #-256] + add v18.4s, v18.4s, v1.4s + rev32 v26.8h, v26.8h + eor v5.16b, v19.16b, v5.16b + add v17.4s, v17.4s, v2.4s + eor v7.16b, v18.16b, v7.16b + add v23.4s, v23.4s, v26.4s + rev32 v5.8h, v5.8h + eor v6.16b, v17.16b, v6.16b + rev32 v7.8h, v7.8h + eor v0.16b, v23.16b, v0.16b + add v3.4s, v3.4s, v5.4s + rev32 v6.8h, v6.8h + add v16.4s, v16.4s, v7.4s + ushr v31.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v25.16b, v3.16b, v25.16b + add v4.4s, v4.4s, v6.4s + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + add v20.4s, v20.4s, v9.4s + mov v13.16b, v12.16b + mov v12.16b, v27.16b + mov v27.16b, v9.16b + ldur q9, [x29, #-192] + mov v21.16b, v15.16b + ldr q15, [sp, #224] + ushr v11.4s, v1.4s, #12 + ldur q21, [x29, #-224] + shl v1.4s, v1.4s, #20 + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v31.16b + add v19.4s, v19.4s, v9.4s + add v20.4s, v20.4s, v0.4s + orr v1.16b, v1.16b, v11.16b + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + add v18.4s, v18.4s, v21.4s + add v19.4s, v19.4s, v25.4s + eor v26.16b, v20.16b, v26.16b + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v15.4s + add v18.4s, v18.4s, v1.4s + eor v5.16b, v19.16b, v5.16b + ushr v31.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v17.4s, v17.4s, v2.4s + ushr v11.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + eor v7.16b, v18.16b, v7.16b + orr v26.16b, v26.16b, v31.16b + eor v6.16b, v17.16b, v6.16b + orr v5.16b, v5.16b, v11.16b + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + add v23.4s, v26.4s, v23.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + orr v7.16b, v7.16b, v31.16b + add v3.4s, v5.4s, v3.4s + eor v0.16b, v23.16b, v0.16b + orr v6.16b, v6.16b, v11.16b + add v16.4s, v7.4s, v16.4s + eor v25.16b, v3.16b, v25.16b + ushr v31.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v4.4s, v6.4s, v4.4s + ushr v11.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + add v18.4s, v18.4s, v14.4s + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v11.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v24.4s + add v18.4s, v18.4s, v0.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v10.4s + eor v6.16b, v6.16b, v18.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v30.4s + eor v7.16b, v7.16b, v20.16b + add v17.4s, v17.4s, v1.4s + rev32 v6.8h, v6.8h + add v19.4s, v19.4s, v2.4s + rev32 v7.8h, v7.8h + eor v5.16b, v17.16b, v5.16b + add v3.4s, v3.4s, v6.4s + eor v26.16b, v19.16b, v26.16b + add v4.4s, v4.4s, v7.4s + rev32 v5.8h, v5.8h + eor v0.16b, v3.16b, v0.16b + rev32 v26.8h, v26.8h + eor v25.16b, v4.16b, v25.16b + add v23.4s, v23.4s, v5.4s + ushr v11.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v16.4s, v16.4s, v26.4s + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v18.4s, v18.4s, v8.4s + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v20.4s, v20.4s, v12.4s + add v18.4s, v18.4s, v0.4s + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v13.4s + ldr q13, [sp, #160] + eor v6.16b, v18.16b, v6.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v15.4s + eor v7.16b, v20.16b, v7.16b + add v17.4s, v17.4s, v1.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + add v19.4s, v19.4s, v2.4s + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + eor v5.16b, v17.16b, v5.16b + orr v6.16b, v6.16b, v11.16b + eor v26.16b, v19.16b, v26.16b + orr v7.16b, v7.16b, v31.16b + ushr v31.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + add v3.4s, v6.4s, v3.4s + ushr v11.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v4.4s, v7.4s, v4.4s + orr v5.16b, v5.16b, v31.16b + eor v0.16b, v3.16b, v0.16b + orr v26.16b, v26.16b, v11.16b + eor v25.16b, v4.16b, v25.16b + add v23.4s, v5.4s, v23.4s + ushr v11.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v16.4s, v26.4s, v16.4s + ushr v31.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v20.4s, v20.4s, v22.4s + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v0.4s + add v19.4s, v19.4s, v9.4s + mov v29.16b, v14.16b + ldr q14, [sp, #128] + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v18.4s, v18.4s, v14.4s + eor v26.16b, v20.16b, v26.16b + add v19.4s, v19.4s, v25.4s + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v27.4s + add v18.4s, v18.4s, v1.4s + rev32 v26.8h, v26.8h + eor v5.16b, v19.16b, v5.16b + add v17.4s, v17.4s, v2.4s + eor v7.16b, v18.16b, v7.16b + add v23.4s, v23.4s, v26.4s + rev32 v5.8h, v5.8h + eor v6.16b, v17.16b, v6.16b + rev32 v7.8h, v7.8h + eor v0.16b, v23.16b, v0.16b + add v3.4s, v3.4s, v5.4s + rev32 v6.8h, v6.8h + add v16.4s, v16.4s, v7.4s + ushr v31.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v25.16b, v3.16b, v25.16b + add v4.4s, v4.4s, v6.4s + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + add v20.4s, v20.4s, v21.4s + ushr v11.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v31.16b + add v19.4s, v19.4s, v28.4s + add v20.4s, v20.4s, v0.4s + mov v12.16b, v27.16b + ldur q27, [x29, #-208] + orr v1.16b, v1.16b, v11.16b + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + add v18.4s, v18.4s, v27.4s + add v19.4s, v19.4s, v25.4s + eor v26.16b, v20.16b, v26.16b + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v13.4s + add v18.4s, v18.4s, v1.4s + eor v5.16b, v19.16b, v5.16b + ushr v31.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v17.4s, v17.4s, v2.4s + ushr v11.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + eor v7.16b, v18.16b, v7.16b + orr v26.16b, v26.16b, v31.16b + eor v6.16b, v17.16b, v6.16b + orr v5.16b, v5.16b, v11.16b + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + add v23.4s, v26.4s, v23.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + orr v7.16b, v7.16b, v31.16b + add v3.4s, v5.4s, v3.4s + eor v0.16b, v23.16b, v0.16b + orr v6.16b, v6.16b, v11.16b + add v16.4s, v7.4s, v16.4s + eor v25.16b, v3.16b, v25.16b + ushr v31.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v4.4s, v6.4s, v4.4s + ushr v11.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + add v18.4s, v18.4s, v8.4s + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v11.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v29.4s + add v18.4s, v18.4s, v0.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v15.4s + eor v6.16b, v6.16b, v18.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v10.4s + eor v7.16b, v7.16b, v20.16b + add v17.4s, v17.4s, v1.4s + rev32 v6.8h, v6.8h + add v19.4s, v19.4s, v2.4s + rev32 v7.8h, v7.8h + eor v5.16b, v17.16b, v5.16b + add v3.4s, v3.4s, v6.4s + eor v26.16b, v19.16b, v26.16b + add v4.4s, v4.4s, v7.4s + rev32 v5.8h, v5.8h + eor v0.16b, v3.16b, v0.16b + rev32 v26.8h, v26.8h + eor v25.16b, v4.16b, v25.16b + add v23.4s, v23.4s, v5.4s + ushr v11.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v16.4s, v16.4s, v26.4s + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v18.4s, v18.4s, v14.4s + mov v30.16b, v29.16b + mov v29.16b, v15.16b + ldr q15, [sp, #144] + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v20.4s, v20.4s, v15.4s + add v18.4s, v18.4s, v0.4s + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v24.4s + eor v6.16b, v18.16b, v6.16b + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v13.4s + eor v7.16b, v20.16b, v7.16b + add v17.4s, v17.4s, v1.4s + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + add v19.4s, v19.4s, v2.4s + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + eor v5.16b, v17.16b, v5.16b + orr v6.16b, v6.16b, v11.16b + eor v26.16b, v19.16b, v26.16b + orr v7.16b, v7.16b, v31.16b + ushr v31.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + add v3.4s, v6.4s, v3.4s + ushr v11.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v4.4s, v7.4s, v4.4s + orr v5.16b, v5.16b, v31.16b + eor v0.16b, v3.16b, v0.16b + orr v26.16b, v26.16b, v11.16b + eor v25.16b, v4.16b, v25.16b + add v23.4s, v5.4s, v23.4s + ushr v11.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + mov v9.16b, v28.16b + mov v28.16b, v10.16b + ldr q10, [sp, #176] + add v16.4s, v26.4s, v16.4s + ushr v31.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v23.16b, v1.16b + orr v0.16b, v0.16b, v11.16b + add v20.4s, v20.4s, v10.4s + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v0.4s + add v19.4s, v19.4s, v9.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v18.4s, v18.4s, v12.4s + eor v26.16b, v20.16b, v26.16b + add v19.4s, v19.4s, v25.4s + orr v2.16b, v2.16b, v11.16b + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v1.4s + rev32 v26.8h, v26.8h + eor v5.16b, v19.16b, v5.16b + add v17.4s, v17.4s, v2.4s + eor v7.16b, v18.16b, v7.16b + add v23.4s, v23.4s, v26.4s + rev32 v5.8h, v5.8h + eor v6.16b, v17.16b, v6.16b + rev32 v7.8h, v7.8h + eor v0.16b, v23.16b, v0.16b + add v3.4s, v3.4s, v5.4s + rev32 v6.8h, v6.8h + add v16.4s, v16.4s, v7.4s + ushr v31.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v25.16b, v3.16b, v25.16b + add v4.4s, v4.4s, v6.4s + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + ushr v11.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + eor v2.16b, v4.16b, v2.16b + add v20.4s, v20.4s, v27.4s + orr v25.16b, v25.16b, v31.16b + add v19.4s, v19.4s, v22.4s + mov v9.16b, v22.16b + ldur q22, [x29, #-240] + orr v1.16b, v1.16b, v11.16b + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + add v20.4s, v20.4s, v0.4s + add v18.4s, v18.4s, v22.4s + add v19.4s, v19.4s, v25.4s + mov v24.16b, v21.16b + ldur q21, [x29, #-192] + orr v2.16b, v2.16b, v11.16b + eor v26.16b, v20.16b, v26.16b + add v17.4s, v17.4s, v21.4s + add v18.4s, v18.4s, v1.4s + eor v5.16b, v19.16b, v5.16b + ushr v31.4s, v26.4s, #8 + add v17.4s, v17.4s, v2.4s + shl v26.4s, v26.4s, #24 + ushr v11.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + eor v7.16b, v18.16b, v7.16b + orr v26.16b, v26.16b, v31.16b + eor v6.16b, v17.16b, v6.16b + orr v5.16b, v5.16b, v11.16b + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + add v23.4s, v26.4s, v23.4s + orr v7.16b, v7.16b, v31.16b + add v3.4s, v5.4s, v3.4s + orr v6.16b, v6.16b, v11.16b + eor v0.16b, v23.16b, v0.16b + add v16.4s, v7.4s, v16.4s + eor v25.16b, v3.16b, v25.16b + add v4.4s, v6.4s, v4.4s + ushr v31.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v11.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v16.16b, v1.16b + orr v0.16b, v0.16b, v31.16b + eor v2.16b, v4.16b, v2.16b + orr v25.16b, v25.16b, v11.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v8.4s + add v18.4s, v18.4s, v14.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v13.4s + add v18.4s, v18.4s, v0.4s + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v29.4s + eor v7.16b, v7.16b, v20.16b + add v17.4s, v17.4s, v1.4s + eor v6.16b, v6.16b, v18.16b + add v19.4s, v19.4s, v2.4s + rev32 v7.8h, v7.8h + eor v5.16b, v17.16b, v5.16b + rev32 v6.8h, v6.8h + eor v26.16b, v19.16b, v26.16b + add v4.4s, v4.4s, v7.4s + rev32 v5.8h, v5.8h + add v3.4s, v3.4s, v6.4s + rev32 v26.8h, v26.8h + eor v25.16b, v4.16b, v25.16b + add v23.4s, v23.4s, v5.4s + eor v0.16b, v3.16b, v0.16b + add v16.4s, v16.4s, v26.4s + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + ushr v11.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v1.16b, v23.16b, v1.16b + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + orr v0.16b, v0.16b, v11.16b + ushr v31.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v20.4s, v20.4s, v28.4s + add v18.4s, v18.4s, v12.4s + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + orr v1.16b, v1.16b, v31.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v30.4s + add v18.4s, v18.4s, v0.4s + orr v2.16b, v2.16b, v11.16b + add v19.4s, v19.4s, v21.4s + eor v7.16b, v20.16b, v7.16b + add v17.4s, v17.4s, v1.4s + eor v6.16b, v18.16b, v6.16b + add v19.4s, v19.4s, v2.4s + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + eor v5.16b, v17.16b, v5.16b + orr v7.16b, v7.16b, v31.16b + eor v26.16b, v19.16b, v26.16b + orr v6.16b, v6.16b, v11.16b + ushr v31.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + ushr v11.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + add v4.4s, v7.4s, v4.4s + orr v5.16b, v5.16b, v31.16b + add v3.4s, v6.4s, v3.4s + orr v26.16b, v26.16b, v11.16b + eor v25.16b, v4.16b, v25.16b + add v23.4s, v5.4s, v23.4s + eor v0.16b, v3.16b, v0.16b + add v16.4s, v26.4s, v16.4s + ushr v31.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + ushr v11.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + eor v1.16b, v23.16b, v1.16b + orr v25.16b, v25.16b, v31.16b + eor v2.16b, v16.16b, v2.16b + orr v0.16b, v0.16b, v11.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v20.4s, v20.4s, v15.4s + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v1.16b, v1.16b, v31.16b + add v18.4s, v18.4s, v24.4s + add v20.4s, v20.4s, v0.4s + add v19.4s, v19.4s, v9.4s + mov v8.16b, v13.16b + ldur q13, [x29, #-208] + orr v2.16b, v2.16b, v11.16b + add v18.4s, v18.4s, v1.4s + add v17.4s, v17.4s, v13.4s + eor v26.16b, v20.16b, v26.16b + add v19.4s, v19.4s, v25.4s + eor v7.16b, v18.16b, v7.16b + add v17.4s, v17.4s, v2.4s + rev32 v26.8h, v26.8h + eor v5.16b, v19.16b, v5.16b + rev32 v7.8h, v7.8h + eor v6.16b, v17.16b, v6.16b + add v23.4s, v23.4s, v26.4s + rev32 v5.8h, v5.8h + add v16.4s, v16.4s, v7.4s + rev32 v6.8h, v6.8h + eor v0.16b, v23.16b, v0.16b + add v3.4s, v3.4s, v5.4s + eor v1.16b, v16.16b, v1.16b + add v4.4s, v4.4s, v6.4s + ushr v31.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v25.16b, v3.16b, v25.16b + ushr v11.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + orr v0.16b, v0.16b, v31.16b + eor v2.16b, v4.16b, v2.16b + ushr v31.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + orr v1.16b, v1.16b, v11.16b + ushr v11.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + add v20.4s, v20.4s, v22.4s + orr v25.16b, v25.16b, v31.16b + add v19.4s, v19.4s, v10.4s + mov v27.16b, v12.16b + mov v12.16b, v30.16b + mov v29.16b, v21.16b + mov v21.16b, v24.16b + ldr q24, [sp, #192] + mov v30.16b, v22.16b + ldr q22, [sp, #256] + orr v2.16b, v2.16b, v11.16b + add v20.4s, v20.4s, v0.4s + add v18.4s, v18.4s, v24.4s + add v19.4s, v19.4s, v25.4s + add v17.4s, v17.4s, v22.4s + eor v26.16b, v20.16b, v26.16b + add v18.4s, v18.4s, v1.4s + eor v5.16b, v19.16b, v5.16b + add v17.4s, v17.4s, v2.4s + ushr v31.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + ushr v11.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + eor v7.16b, v18.16b, v7.16b + eor v6.16b, v17.16b, v6.16b + orr v26.16b, v26.16b, v31.16b + orr v5.16b, v5.16b, v11.16b + ushr v31.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + ushr v11.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + add v23.4s, v26.4s, v23.4s + orr v7.16b, v7.16b, v31.16b + add v3.4s, v5.4s, v3.4s + orr v6.16b, v6.16b, v11.16b + eor v0.16b, v23.16b, v0.16b + add v16.4s, v7.4s, v16.4s + eor v25.16b, v3.16b, v25.16b + add v4.4s, v6.4s, v4.4s + ushr v31.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v11.4s, v25.4s, #7 + shl v25.4s, v25.4s, #25 + eor v1.16b, v16.16b, v1.16b + eor v2.16b, v4.16b, v2.16b + orr v0.16b, v0.16b, v31.16b + orr v25.16b, v25.16b, v11.16b + ushr v31.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + ushr v11.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + add v20.4s, v20.4s, v14.4s + add v18.4s, v18.4s, v27.4s + ldr q27, [sp, #224] + orr v1.16b, v1.16b, v31.16b + orr v2.16b, v2.16b, v11.16b + add v20.4s, v20.4s, v25.4s + add v17.4s, v17.4s, v29.4s + add v18.4s, v18.4s, v0.4s + add v19.4s, v19.4s, v8.4s + eor v7.16b, v7.16b, v20.16b + add v17.4s, v17.4s, v1.4s + eor v6.16b, v6.16b, v18.16b + add v19.4s, v19.4s, v2.4s + rev32 v7.8h, v7.8h + eor v5.16b, v17.16b, v5.16b + rev32 v6.8h, v6.8h + eor v26.16b, v19.16b, v26.16b + add v4.4s, v4.4s, v7.4s + rev32 v5.8h, v5.8h + add v3.4s, v3.4s, v6.4s + rev32 v26.8h, v26.8h + eor v25.16b, v4.16b, v25.16b + add v23.4s, v23.4s, v5.4s + eor v0.16b, v3.16b, v0.16b + add v16.4s, v16.4s, v26.4s + ushr v29.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + ushr v31.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v1.16b, v23.16b, v1.16b + eor v2.16b, v16.16b, v2.16b + orr v25.16b, v25.16b, v29.16b + orr v0.16b, v0.16b, v31.16b + ushr v29.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + ushr v31.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + add v18.4s, v18.4s, v21.4s + ldr q21, [sp, #240] + add v20.4s, v20.4s, v27.4s + prfm pldl1keep, [x17, #256] + orr v1.16b, v1.16b, v29.16b + prfm pldl1keep, [x21, #256] + orr v2.16b, v2.16b, v31.16b + prfm pldl1keep, [x16, #256] + add v18.4s, v18.4s, v0.4s + prfm pldl1keep, [x6, #256] + add v17.4s, v17.4s, v21.4s + add v19.4s, v19.4s, v22.4s + add v20.4s, v20.4s, v25.4s + eor v6.16b, v18.16b, v6.16b + add v17.4s, v17.4s, v1.4s + add v19.4s, v19.4s, v2.4s + eor v7.16b, v20.16b, v7.16b + ushr v22.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + eor v5.16b, v17.16b, v5.16b + eor v26.16b, v19.16b, v26.16b + ushr v21.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + orr v6.16b, v6.16b, v22.16b + ushr v22.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + ushr v29.4s, v26.4s, #8 + shl v26.4s, v26.4s, #24 + orr v7.16b, v7.16b, v21.16b + orr v5.16b, v5.16b, v22.16b + add v3.4s, v6.4s, v3.4s + orr v21.16b, v26.16b, v29.16b + add v4.4s, v7.4s, v4.4s + add v22.4s, v5.4s, v23.4s + eor v0.16b, v3.16b, v0.16b + add v16.4s, v21.4s, v16.4s + eor v23.16b, v4.16b, v25.16b + eor v1.16b, v22.16b, v1.16b + ushr v25.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + eor v2.16b, v16.16b, v2.16b + ushr v26.4s, v23.4s, #7 + shl v23.4s, v23.4s, #25 + orr v0.16b, v0.16b, v25.16b + ushr v25.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + ushr v29.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + add v20.4s, v20.4s, v28.4s + orr v23.16b, v23.16b, v26.16b + orr v1.16b, v1.16b, v25.16b + orr v2.16b, v2.16b, v29.16b + add v20.4s, v20.4s, v0.4s + add v18.4s, v18.4s, v13.4s + add v17.4s, v17.4s, v30.4s + add v19.4s, v19.4s, v10.4s + eor v21.16b, v20.16b, v21.16b + add v18.4s, v18.4s, v1.4s + add v17.4s, v17.4s, v2.4s + add v19.4s, v19.4s, v23.4s + rev32 v21.8h, v21.8h + eor v7.16b, v18.16b, v7.16b + eor v6.16b, v17.16b, v6.16b + eor v5.16b, v19.16b, v5.16b + add v22.4s, v22.4s, v21.4s + rev32 v7.8h, v7.8h + rev32 v6.8h, v6.8h + rev32 v5.8h, v5.8h + eor v0.16b, v22.16b, v0.16b + add v16.4s, v16.4s, v7.4s + add v4.4s, v4.4s, v6.4s + add v3.4s, v3.4s, v5.4s + ushr v25.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v1.16b, v16.16b, v1.16b + eor v2.16b, v4.16b, v2.16b + eor v23.16b, v3.16b, v23.16b + orr v0.16b, v0.16b, v25.16b + ushr v25.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + ushr v26.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + ushr v27.4s, v23.4s, #12 + shl v23.4s, v23.4s, #20 + orr v1.16b, v1.16b, v25.16b + add v20.4s, v20.4s, v24.4s + orr v2.16b, v2.16b, v26.16b + orr v23.16b, v23.16b, v27.16b + add v18.4s, v18.4s, v12.4s + add v17.4s, v17.4s, v9.4s + add v19.4s, v19.4s, v15.4s + add v20.4s, v20.4s, v0.4s + add v18.4s, v18.4s, v1.4s + add v17.4s, v17.4s, v2.4s + add v19.4s, v19.4s, v23.4s + eor v21.16b, v20.16b, v21.16b + eor v7.16b, v18.16b, v7.16b + eor v6.16b, v17.16b, v6.16b + eor v5.16b, v19.16b, v5.16b + ushr v24.4s, v21.4s, #8 + shl v21.4s, v21.4s, #24 + ushr v25.4s, v7.4s, #8 + shl v7.4s, v7.4s, #24 + ushr v26.4s, v6.4s, #8 + shl v6.4s, v6.4s, #24 + ushr v27.4s, v5.4s, #8 + shl v5.4s, v5.4s, #24 + orr v21.16b, v21.16b, v24.16b + orr v7.16b, v7.16b, v25.16b + orr v6.16b, v6.16b, v26.16b + orr v5.16b, v5.16b, v27.16b + add v22.4s, v21.4s, v22.4s + add v16.4s, v7.4s, v16.4s + add v4.4s, v6.4s, v4.4s + add v3.4s, v5.4s, v3.4s + eor v0.16b, v22.16b, v0.16b + eor v1.16b, v16.16b, v1.16b + eor v2.16b, v4.16b, v2.16b + eor v23.16b, v3.16b, v23.16b + ushr v24.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v25.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + ushr v26.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + ushr v27.4s, v23.4s, #7 + shl v23.4s, v23.4s, #25 + orr v0.16b, v0.16b, v24.16b + orr v1.16b, v1.16b, v25.16b + orr v2.16b, v2.16b, v26.16b + orr v23.16b, v23.16b, v27.16b + movi v24.4s, #64 + eor v12.16b, v4.16b, v20.16b + eor v31.16b, v18.16b, v3.16b + eor v29.16b, v17.16b, v22.16b + eor v30.16b, v16.16b, v19.16b + eor v28.16b, v7.16b, v23.16b + eor v23.16b, v6.16b, v0.16b + eor v13.16b, v1.16b, v5.16b + eor v25.16b, v2.16b, v21.16b + cbnz x15, .LBB3_5 + b .LBB3_2 +.LBB3_6: + cbz x24, .LBB3_14 + orr w8, w7, w19 + and x22, x5, #0x1 + stur w8, [x29, #-192] +.LBB3_8: + ldr x8, [sp, #40] + mov x28, x0 + ldr x25, [x0] + mov x23, x2 + ldur w5, [x29, #-192] + ldp q0, q1, [x8] + mov x8, x2 + b .LBB3_11 +.LBB3_9: + orr w5, w5, w27 +.LBB3_10: + sub x0, x29, #144 + sub x1, x29, #176 + mov x2, x25 + mov w3, #64 + mov x4, x20 + bl compress_pre + ldp q0, q1, [x29, #-144] + add x25, x25, #64 + mov x8, x21 + mov w5, w19 + ldp q2, q3, [x29, #-112] + eor v0.16b, v2.16b, v0.16b + eor v1.16b, v3.16b, v1.16b +.LBB3_11: + subs x21, x8, #1 + stp q0, q1, [x29, #-176] + b.eq .LBB3_9 + cbnz x8, .LBB3_10 + ldp q1, q0, [x29, #-176] + mov x0, x28 + add x20, x20, x22 + add x0, x28, #8 + subs x24, x24, #1 + mov x2, x23 + stp q1, q0, [x26], #32 + b.ne .LBB3_8 +.LBB3_14: + add sp, sp, #464 + ldp x20, x19, [sp, #144] + ldp x22, x21, [sp, #128] + ldp x24, x23, [sp, #112] + ldp x26, x25, [sp, #96] + ldp x28, x27, [sp, #80] + ldp x29, x30, [sp, #64] + ldp d9, d8, [sp, #48] + ldp d11, d10, [sp, #32] + ldp d13, d12, [sp, #16] + ldp d15, d14, [sp], #160 + hint #29 + ret +.Lfunc_end3: + .size zfs_blake3_hash_many_sse2, .Lfunc_end3-zfs_blake3_hash_many_sse2 + .cfi_endproc + .section ".note.GNU-stack","",@progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S new file mode 100644 index 000000000000..0b719761dd4c --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S @@ -0,0 +1,2406 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2022 Samuel Neves + * Copyright (c) 2022-2023 Tino Reichardt <milky-zfs@mcmilk.de> + * + * This is converted assembly: SSE4.1 -> ARMv8-A + * Used tools: SIMDe https://github.com/simd-everywhere/simde + * + * Should work on FreeBSD, Linux and macOS + * see: https://github.com/mcmilk/BLAKE3-tests/blob/master/contrib/simde.sh + */ + +#if defined(__aarch64__) + +/* make gcc <= 9 happy */ +#if !defined(LD_VERSION) || LD_VERSION >= 233010000 +#define CFI_NEGATE_RA_STATE .cfi_negate_ra_state +#else +#define CFI_NEGATE_RA_STATE +#endif + + .text + .section .note.gnu.property,"a",@note + .p2align 3 + .word 4 + .word 16 + .word 5 + .asciz "GNU" + .word 3221225472 + .word 4 + .word 3 + .word 0 +.Lsec_end0: + .text + .globl zfs_blake3_compress_in_place_sse41 + .p2align 2 + .type zfs_blake3_compress_in_place_sse41,@function +zfs_blake3_compress_in_place_sse41: + .cfi_startproc + hint #25 + CFI_NEGATE_RA_STATE + sub sp, sp, #96 + stp x29, x30, [sp, #64] + add x29, sp, #64 + str x19, [sp, #80] + .cfi_def_cfa w29, 32 + .cfi_offset w19, -16 + .cfi_offset w30, -24 + .cfi_offset w29, -32 + mov x19, x0 + mov w5, w4 + mov x4, x3 + mov w3, w2 + mov x2, x1 + mov x0, sp + mov x1, x19 + bl compress_pre + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + eor v0.16b, v2.16b, v0.16b + eor v1.16b, v3.16b, v1.16b + ldp x29, x30, [sp, #64] + stp q0, q1, [x19] + ldr x19, [sp, #80] + add sp, sp, #96 + hint #29 + ret +.Lfunc_end0: + .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-zfs_blake3_compress_in_place_sse41 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI1_0: + .xword -4942790177982912921 + .xword -6534734903820487822 +.LCPI1_1: + .byte 2 + .byte 3 + .byte 0 + .byte 1 + .byte 6 + .byte 7 + .byte 4 + .byte 5 + .byte 10 + .byte 11 + .byte 8 + .byte 9 + .byte 14 + .byte 15 + .byte 12 + .byte 13 +.LCPI1_2: + .byte 1 + .byte 2 + .byte 3 + .byte 0 + .byte 5 + .byte 6 + .byte 7 + .byte 4 + .byte 9 + .byte 10 + .byte 11 + .byte 8 + .byte 13 + .byte 14 + .byte 15 + .byte 12 + .text + .p2align 2 + .type compress_pre,@function +compress_pre: + .cfi_startproc + hint #34 + fmov s1, w3 + movi d0, #0x0000ff000000ff + ldr q2, [x1] + adrp x8, .LCPI1_0 + mov v1.s[1], w5 + str q2, [x0] + ldr q4, [x8, :lo12:.LCPI1_0] + ldr q5, [x1, #16] + adrp x8, .LCPI1_1 + and v0.8b, v1.8b, v0.8b + fmov d1, x4 + stp q5, q4, [x0, #16] + mov v1.d[1], v0.d[0] + str q1, [x0, #48] + ldp q6, q7, [x2] + uzp1 v3.4s, v6.4s, v7.4s + add v0.4s, v2.4s, v3.4s + uzp2 v2.4s, v6.4s, v7.4s + add v16.4s, v0.4s, v5.4s + ldr q0, [x8, :lo12:.LCPI1_1] + adrp x8, .LCPI1_2 + eor v1.16b, v16.16b, v1.16b + add v7.4s, v16.4s, v2.4s + tbl v1.16b, { v1.16b }, v0.16b + add v4.4s, v1.4s, v4.4s + eor v5.16b, v4.16b, v5.16b + ushr v6.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v6.16b + add v6.4s, v7.4s, v5.4s + eor v7.16b, v1.16b, v6.16b + ldr q1, [x8, :lo12:.LCPI1_2] + add x8, x2, #32 + tbl v7.16b, { v7.16b }, v1.16b + ld2 { v16.4s, v17.4s }, [x8] + add v4.4s, v4.4s, v7.4s + ext v7.16b, v7.16b, v7.16b, #8 + add v6.4s, v6.4s, v16.4s + eor v5.16b, v4.16b, v5.16b + ext v4.16b, v4.16b, v4.16b, #4 + ext v16.16b, v16.16b, v16.16b, #12 + ext v6.16b, v6.16b, v6.16b, #12 + ushr v18.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + orr v5.16b, v5.16b, v18.16b + ext v18.16b, v17.16b, v17.16b, #12 + add v6.4s, v6.4s, v5.4s + mov v17.16b, v18.16b + eor v7.16b, v7.16b, v6.16b + add v6.4s, v6.4s, v18.4s + mov v17.s[1], v16.s[2] + tbl v7.16b, { v7.16b }, v0.16b + add v4.4s, v4.4s, v7.4s + eor v5.16b, v4.16b, v5.16b + ushr v19.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v19.16b + uzp1 v19.4s, v3.4s, v3.4s + add v6.4s, v6.4s, v5.4s + ext v19.16b, v19.16b, v3.16b, #8 + eor v7.16b, v7.16b, v6.16b + uzp2 v19.4s, v19.4s, v2.4s + tbl v7.16b, { v7.16b }, v1.16b + add v6.4s, v6.4s, v19.4s + add v4.4s, v4.4s, v7.4s + ext v6.16b, v6.16b, v6.16b, #4 + ext v7.16b, v7.16b, v7.16b, #8 + eor v5.16b, v4.16b, v5.16b + ext v4.16b, v4.16b, v4.16b, #12 + ushr v20.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + orr v5.16b, v5.16b, v20.16b + ext v20.16b, v3.16b, v3.16b, #12 + add v6.4s, v6.4s, v5.4s + ext v3.16b, v3.16b, v20.16b, #12 + eor v7.16b, v7.16b, v6.16b + rev64 v3.4s, v3.4s + tbl v7.16b, { v7.16b }, v0.16b + trn2 v3.4s, v3.4s, v17.4s + add v4.4s, v4.4s, v7.4s + add v6.4s, v6.4s, v3.4s + eor v5.16b, v4.16b, v5.16b + ushr v17.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v17.16b + zip1 v17.2d, v18.2d, v2.2d + zip2 v2.4s, v2.4s, v18.4s + add v6.4s, v6.4s, v5.4s + mov v17.s[3], v16.s[3] + zip1 v18.4s, v2.4s, v16.4s + zip1 v2.4s, v16.4s, v2.4s + eor v7.16b, v7.16b, v6.16b + ext v6.16b, v6.16b, v6.16b, #12 + ext v16.16b, v2.16b, v18.16b, #8 + tbl v7.16b, { v7.16b }, v1.16b + add v20.4s, v4.4s, v7.4s + ext v4.16b, v17.16b, v17.16b, #12 + ext v7.16b, v7.16b, v7.16b, #8 + eor v5.16b, v20.16b, v5.16b + uzp1 v4.4s, v17.4s, v4.4s + ushr v17.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v6.4s, v6.4s, v4.4s + orr v5.16b, v5.16b, v17.16b + ext v17.16b, v20.16b, v20.16b, #4 + add v6.4s, v6.4s, v5.4s + eor v7.16b, v7.16b, v6.16b + add v6.4s, v6.4s, v16.4s + tbl v7.16b, { v7.16b }, v0.16b + add v17.4s, v17.4s, v7.4s + eor v5.16b, v17.16b, v5.16b + ushr v2.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v2.16b, v5.16b, v2.16b + add v5.4s, v6.4s, v2.4s + ext v6.16b, v19.16b, v19.16b, #4 + eor v7.16b, v7.16b, v5.16b + uzp1 v18.4s, v6.4s, v6.4s + tbl v7.16b, { v7.16b }, v1.16b + ext v18.16b, v18.16b, v6.16b, #8 + add v17.4s, v17.4s, v7.4s + uzp2 v18.4s, v18.4s, v3.4s + ext v7.16b, v7.16b, v7.16b, #8 + eor v2.16b, v17.16b, v2.16b + add v5.4s, v5.4s, v18.4s + ext v17.16b, v17.16b, v17.16b, #12 + ushr v19.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + ext v5.16b, v5.16b, v5.16b, #4 + orr v2.16b, v2.16b, v19.16b + ext v19.16b, v6.16b, v6.16b, #12 + add v5.4s, v5.4s, v2.4s + ext v6.16b, v6.16b, v19.16b, #12 + mov v19.16b, v16.16b + eor v7.16b, v7.16b, v5.16b + rev64 v6.4s, v6.4s + mov v19.s[1], v4.s[2] + tbl v7.16b, { v7.16b }, v0.16b + add v17.4s, v17.4s, v7.4s + eor v20.16b, v17.16b, v2.16b + trn2 v2.4s, v6.4s, v19.4s + ushr v6.4s, v20.4s, #12 + shl v19.4s, v20.4s, #20 + add v5.4s, v5.4s, v2.4s + orr v6.16b, v19.16b, v6.16b + add v19.4s, v5.4s, v6.4s + eor v5.16b, v7.16b, v19.16b + zip1 v7.2d, v16.2d, v3.2d + zip2 v3.4s, v3.4s, v16.4s + tbl v20.16b, { v5.16b }, v1.16b + mov v7.s[3], v4.s[3] + add v17.4s, v17.4s, v20.4s + ext v5.16b, v7.16b, v7.16b, #12 + eor v6.16b, v17.16b, v6.16b + uzp1 v5.4s, v7.4s, v5.4s + ext v7.16b, v19.16b, v19.16b, #12 + ext v17.16b, v17.16b, v17.16b, #4 + ushr v19.4s, v6.4s, #7 + shl v6.4s, v6.4s, #25 + add v7.4s, v7.4s, v5.4s + orr v6.16b, v6.16b, v19.16b + ext v19.16b, v20.16b, v20.16b, #8 + add v7.4s, v7.4s, v6.4s + eor v19.16b, v19.16b, v7.16b + tbl v19.16b, { v19.16b }, v0.16b + add v16.4s, v17.4s, v19.4s + zip1 v17.4s, v3.4s, v4.4s + zip1 v3.4s, v4.4s, v3.4s + eor v4.16b, v16.16b, v6.16b + ext v17.16b, v3.16b, v17.16b, #8 + ushr v3.4s, v4.4s, #12 + shl v4.4s, v4.4s, #20 + add v6.4s, v7.4s, v17.4s + orr v3.16b, v4.16b, v3.16b + add v4.4s, v6.4s, v3.4s + ext v6.16b, v18.16b, v18.16b, #4 + eor v7.16b, v19.16b, v4.16b + uzp1 v18.4s, v6.4s, v6.4s + tbl v7.16b, { v7.16b }, v1.16b + ext v18.16b, v18.16b, v6.16b, #8 + add v16.4s, v16.4s, v7.4s + uzp2 v18.4s, v18.4s, v2.4s + ext v7.16b, v7.16b, v7.16b, #8 + eor v3.16b, v16.16b, v3.16b + add v4.4s, v4.4s, v18.4s + ext v16.16b, v16.16b, v16.16b, #12 + ushr v19.4s, v3.4s, #7 + shl v3.4s, v3.4s, #25 + ext v4.16b, v4.16b, v4.16b, #4 + orr v3.16b, v3.16b, v19.16b + ext v19.16b, v6.16b, v6.16b, #12 + add v4.4s, v4.4s, v3.4s + ext v6.16b, v6.16b, v19.16b, #12 + mov v19.16b, v17.16b + eor v7.16b, v7.16b, v4.16b + rev64 v6.4s, v6.4s + mov v19.s[1], v5.s[2] + tbl v7.16b, { v7.16b }, v0.16b + add v16.4s, v16.4s, v7.4s + eor v20.16b, v16.16b, v3.16b + trn2 v3.4s, v6.4s, v19.4s + ushr v6.4s, v20.4s, #12 + shl v19.4s, v20.4s, #20 + add v4.4s, v4.4s, v3.4s + orr v6.16b, v19.16b, v6.16b + zip1 v19.2d, v17.2d, v2.2d + zip2 v2.4s, v2.4s, v17.4s + add v4.4s, v4.4s, v6.4s + mov v19.s[3], v5.s[3] + zip1 v17.4s, v2.4s, v5.4s + zip1 v2.4s, v5.4s, v2.4s + eor v7.16b, v7.16b, v4.16b + ext v20.16b, v19.16b, v19.16b, #12 + ext v4.16b, v4.16b, v4.16b, #12 + ext v2.16b, v2.16b, v17.16b, #8 + tbl v7.16b, { v7.16b }, v1.16b + add v16.4s, v16.4s, v7.4s + ext v7.16b, v7.16b, v7.16b, #8 + eor v21.16b, v16.16b, v6.16b + uzp1 v6.4s, v19.4s, v20.4s + ext v16.16b, v16.16b, v16.16b, #4 + ushr v19.4s, v21.4s, #7 + shl v20.4s, v21.4s, #25 + add v4.4s, v4.4s, v6.4s + orr v19.16b, v20.16b, v19.16b + add v4.4s, v4.4s, v19.4s + eor v7.16b, v7.16b, v4.16b + add v4.4s, v4.4s, v2.4s + tbl v7.16b, { v7.16b }, v0.16b + add v16.4s, v16.4s, v7.4s + eor v5.16b, v16.16b, v19.16b + ushr v17.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v17.16b + ext v17.16b, v18.16b, v18.16b, #4 + add v4.4s, v4.4s, v5.4s + uzp1 v18.4s, v17.4s, v17.4s + eor v7.16b, v7.16b, v4.16b + ext v18.16b, v18.16b, v17.16b, #8 + tbl v7.16b, { v7.16b }, v1.16b + uzp2 v18.4s, v18.4s, v3.4s + add v16.4s, v16.4s, v7.4s + add v4.4s, v4.4s, v18.4s + ext v7.16b, v7.16b, v7.16b, #8 + eor v5.16b, v16.16b, v5.16b + ext v4.16b, v4.16b, v4.16b, #4 + ext v16.16b, v16.16b, v16.16b, #12 + ushr v19.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + orr v5.16b, v5.16b, v19.16b + add v19.4s, v4.4s, v5.4s + eor v4.16b, v7.16b, v19.16b + ext v7.16b, v17.16b, v17.16b, #12 + tbl v20.16b, { v4.16b }, v0.16b + ext v4.16b, v17.16b, v7.16b, #12 + mov v7.16b, v2.16b + add v16.4s, v16.4s, v20.4s + rev64 v4.4s, v4.4s + mov v7.s[1], v6.s[2] + eor v5.16b, v16.16b, v5.16b + trn2 v4.4s, v4.4s, v7.4s + ushr v7.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v17.4s, v19.4s, v4.4s + zip1 v19.2d, v2.2d, v3.2d + zip2 v2.4s, v3.4s, v2.4s + orr v5.16b, v5.16b, v7.16b + mov v19.s[3], v6.s[3] + add v7.4s, v17.4s, v5.4s + eor v17.16b, v20.16b, v7.16b + ext v20.16b, v19.16b, v19.16b, #12 + ext v7.16b, v7.16b, v7.16b, #12 + tbl v17.16b, { v17.16b }, v1.16b + add v16.4s, v16.4s, v17.4s + ext v17.16b, v17.16b, v17.16b, #8 + eor v21.16b, v16.16b, v5.16b + uzp1 v5.4s, v19.4s, v20.4s + ext v16.16b, v16.16b, v16.16b, #4 + ushr v19.4s, v21.4s, #7 + shl v20.4s, v21.4s, #25 + add v7.4s, v7.4s, v5.4s + orr v19.16b, v20.16b, v19.16b + add v7.4s, v7.4s, v19.4s + eor v17.16b, v17.16b, v7.16b + tbl v17.16b, { v17.16b }, v0.16b + add v3.4s, v16.4s, v17.4s + zip1 v16.4s, v2.4s, v6.4s + zip1 v2.4s, v6.4s, v2.4s + eor v6.16b, v3.16b, v19.16b + ext v16.16b, v2.16b, v16.16b, #8 + ushr v2.4s, v6.4s, #12 + shl v6.4s, v6.4s, #20 + add v7.4s, v7.4s, v16.4s + orr v2.16b, v6.16b, v2.16b + add v6.4s, v7.4s, v2.4s + ext v7.16b, v18.16b, v18.16b, #4 + eor v17.16b, v17.16b, v6.16b + uzp1 v18.4s, v7.4s, v7.4s + tbl v17.16b, { v17.16b }, v1.16b + ext v18.16b, v18.16b, v7.16b, #8 + add v3.4s, v3.4s, v17.4s + uzp2 v18.4s, v18.4s, v4.4s + eor v2.16b, v3.16b, v2.16b + add v6.4s, v6.4s, v18.4s + ext v3.16b, v3.16b, v3.16b, #12 + ext v18.16b, v18.16b, v18.16b, #4 + ushr v19.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + ext v6.16b, v6.16b, v6.16b, #4 + orr v19.16b, v2.16b, v19.16b + ext v2.16b, v17.16b, v17.16b, #8 + ext v17.16b, v7.16b, v7.16b, #12 + add v6.4s, v6.4s, v19.4s + eor v2.16b, v2.16b, v6.16b + tbl v20.16b, { v2.16b }, v0.16b + ext v2.16b, v7.16b, v17.16b, #12 + mov v7.16b, v16.16b + add v17.4s, v3.4s, v20.4s + rev64 v3.4s, v2.4s + mov v7.s[1], v5.s[2] + eor v19.16b, v17.16b, v19.16b + trn2 v3.4s, v3.4s, v7.4s + ushr v21.4s, v19.4s, #12 + shl v19.4s, v19.4s, #20 + add v6.4s, v6.4s, v3.4s + orr v19.16b, v19.16b, v21.16b + add v21.4s, v6.4s, v19.4s + eor v6.16b, v20.16b, v21.16b + zip1 v20.2d, v16.2d, v4.2d + zip2 v4.4s, v4.4s, v16.4s + tbl v22.16b, { v6.16b }, v1.16b + mov v20.s[3], v5.s[3] + add v17.4s, v17.4s, v22.4s + ext v6.16b, v20.16b, v20.16b, #12 + eor v19.16b, v17.16b, v19.16b + uzp1 v6.4s, v20.4s, v6.4s + ext v20.16b, v21.16b, v21.16b, #12 + ext v17.16b, v17.16b, v17.16b, #4 + ushr v21.4s, v19.4s, #7 + shl v19.4s, v19.4s, #25 + add v20.4s, v20.4s, v6.4s + orr v19.16b, v19.16b, v21.16b + ext v21.16b, v22.16b, v22.16b, #8 + add v20.4s, v20.4s, v19.4s + eor v21.16b, v21.16b, v20.16b + tbl v21.16b, { v21.16b }, v0.16b + add v16.4s, v17.4s, v21.4s + zip1 v17.4s, v4.4s, v5.4s + zip1 v4.4s, v5.4s, v4.4s + eor v5.16b, v16.16b, v19.16b + ext v4.16b, v4.16b, v17.16b, #8 + ushr v17.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v19.4s, v20.4s, v4.4s + ext v20.16b, v18.16b, v18.16b, #8 + zip1 v3.2d, v4.2d, v3.2d + orr v5.16b, v5.16b, v17.16b + zip2 v2.4s, v2.4s, v4.4s + uzp2 v7.4s, v20.4s, v7.4s + mov v3.s[3], v6.s[3] + add v17.4s, v19.4s, v5.4s + ext v7.16b, v7.16b, v20.16b, #4 + eor v19.16b, v21.16b, v17.16b + ext v17.16b, v17.16b, v17.16b, #4 + tbl v19.16b, { v19.16b }, v1.16b + add v7.4s, v17.4s, v7.4s + add v16.4s, v16.4s, v19.4s + ext v17.16b, v19.16b, v19.16b, #8 + ext v19.16b, v18.16b, v18.16b, #12 + eor v5.16b, v16.16b, v5.16b + ext v16.16b, v16.16b, v16.16b, #12 + ext v18.16b, v18.16b, v19.16b, #12 + mov v19.16b, v4.16b + ushr v20.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + rev64 v18.4s, v18.4s + mov v19.s[1], v6.s[2] + orr v5.16b, v5.16b, v20.16b + trn2 v18.4s, v18.4s, v19.4s + add v7.4s, v5.4s, v7.4s + eor v17.16b, v17.16b, v7.16b + add v7.4s, v7.4s, v18.4s + ext v18.16b, v3.16b, v3.16b, #12 + tbl v17.16b, { v17.16b }, v0.16b + uzp1 v3.4s, v3.4s, v18.4s + add v16.4s, v16.4s, v17.4s + eor v5.16b, v16.16b, v5.16b + ushr v19.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v5.16b, v5.16b, v19.16b + add v7.4s, v7.4s, v5.4s + eor v17.16b, v17.16b, v7.16b + ext v7.16b, v7.16b, v7.16b, #12 + tbl v17.16b, { v17.16b }, v1.16b + add v3.4s, v7.4s, v3.4s + add v16.4s, v16.4s, v17.4s + ext v7.16b, v17.16b, v17.16b, #8 + eor v5.16b, v16.16b, v5.16b + ext v16.16b, v16.16b, v16.16b, #4 + ushr v18.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + orr v5.16b, v5.16b, v18.16b + add v3.4s, v3.4s, v5.4s + eor v7.16b, v7.16b, v3.16b + tbl v0.16b, { v7.16b }, v0.16b + zip1 v7.4s, v2.4s, v6.4s + zip1 v2.4s, v6.4s, v2.4s + add v4.4s, v16.4s, v0.4s + ext v2.16b, v2.16b, v7.16b, #8 + eor v5.16b, v4.16b, v5.16b + add v2.4s, v3.4s, v2.4s + ushr v6.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + orr v3.16b, v5.16b, v6.16b + add v2.4s, v2.4s, v3.4s + eor v0.16b, v0.16b, v2.16b + ext v2.16b, v2.16b, v2.16b, #4 + tbl v0.16b, { v0.16b }, v1.16b + add v1.4s, v4.4s, v0.4s + ext v0.16b, v0.16b, v0.16b, #8 + eor v3.16b, v1.16b, v3.16b + ext v1.16b, v1.16b, v1.16b, #12 + ushr v4.4s, v3.4s, #7 + shl v3.4s, v3.4s, #25 + stp q1, q0, [x0, #32] + orr v3.16b, v3.16b, v4.16b + stp q2, q3, [x0] + ret +.Lfunc_end1: + .size compress_pre, .Lfunc_end1-compress_pre + .cfi_endproc + + .globl zfs_blake3_compress_xof_sse41 + .p2align 2 + .type zfs_blake3_compress_xof_sse41,@function +zfs_blake3_compress_xof_sse41: + .cfi_startproc + hint #25 + CFI_NEGATE_RA_STATE + sub sp, sp, #96 + stp x29, x30, [sp, #64] + add x29, sp, #64 + stp x20, x19, [sp, #80] + .cfi_def_cfa w29, 32 + .cfi_offset w19, -8 + .cfi_offset w20, -16 + .cfi_offset w30, -24 + .cfi_offset w29, -32 + mov x20, x0 + mov x19, x5 + mov w5, w4 + mov x4, x3 + mov w3, w2 + mov x2, x1 + mov x0, sp + mov x1, x20 + bl compress_pre + ldp q0, q1, [sp] + ldp q2, q3, [sp, #32] + eor v0.16b, v2.16b, v0.16b + eor v1.16b, v3.16b, v1.16b + ldp x29, x30, [sp, #64] + stp q0, q1, [x19] + ldr q0, [x20] + eor v0.16b, v0.16b, v2.16b + str q0, [x19, #32] + ldr q0, [x20, #16] + eor v0.16b, v0.16b, v3.16b + str q0, [x19, #48] + ldp x20, x19, [sp, #80] + add sp, sp, #96 + hint #29 + ret +.Lfunc_end2: + .size zfs_blake3_compress_xof_sse41, .Lfunc_end2-zfs_blake3_compress_xof_sse41 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI3_0: + .word 0 + .word 1 + .word 2 + .word 3 +.LCPI3_1: + .byte 2 + .byte 3 + .byte 0 + .byte 1 + .byte 6 + .byte 7 + .byte 4 + .byte 5 + .byte 10 + .byte 11 + .byte 8 + .byte 9 + .byte 14 + .byte 15 + .byte 12 + .byte 13 +.LCPI3_2: + .byte 1 + .byte 2 + .byte 3 + .byte 0 + .byte 5 + .byte 6 + .byte 7 + .byte 4 + .byte 9 + .byte 10 + .byte 11 + .byte 8 + .byte 13 + .byte 14 + .byte 15 + .byte 12 +.LCPI3_3: + .word 1779033703 + .word 3144134277 + .word 1013904242 + .word 2773480762 + .text + .globl zfs_blake3_hash_many_sse41 + .p2align 2 + .type zfs_blake3_hash_many_sse41,@function +zfs_blake3_hash_many_sse41: + .cfi_startproc + hint #34 + stp d15, d14, [sp, #-144]! + stp d13, d12, [sp, #16] + stp d11, d10, [sp, #32] + stp d9, d8, [sp, #48] + stp x29, x27, [sp, #64] + stp x26, x25, [sp, #80] + stp x24, x23, [sp, #96] + stp x22, x21, [sp, #112] + stp x20, x19, [sp, #128] + sub sp, sp, #368 + .cfi_def_cfa_offset 512 + .cfi_offset w19, -8 + .cfi_offset w20, -16 + .cfi_offset w21, -24 + .cfi_offset w22, -32 + .cfi_offset w23, -40 + .cfi_offset w24, -48 + .cfi_offset w25, -56 + .cfi_offset w26, -64 + .cfi_offset w27, -72 + .cfi_offset w29, -80 + .cfi_offset b8, -88 + .cfi_offset b9, -96 + .cfi_offset b10, -104 + .cfi_offset b11, -112 + .cfi_offset b12, -120 + .cfi_offset b13, -128 + .cfi_offset b14, -136 + .cfi_offset b15, -144 + ldr x8, [sp, #520] + adrp x11, .LCPI3_1 + ldrb w9, [sp, #512] + adrp x10, .LCPI3_2 + cmp x1, #4 + b.lo .LBB3_6 + adrp x12, .LCPI3_0 + sbfx w13, w5, #0, #1 + mov w15, #58983 + mov w16, #44677 + movk w15, #27145, lsl #16 + movk w16, #47975, lsl #16 + ldr q0, [x12, :lo12:.LCPI3_0] + dup v1.4s, w13 + movi v13.4s, #64 + mov w13, #62322 + mov w14, #62778 + orr w12, w7, w6 + and v0.16b, v1.16b, v0.16b + ldr q1, [x11, :lo12:.LCPI3_1] + movk w13, #15470, lsl #16 + movk w14, #42319, lsl #16 + dup v14.4s, w15 + stp q0, q1, [sp, #16] + orr v0.4s, #128, lsl #24 + str q0, [sp] + dup v0.4s, w16 + stp q0, q14, [sp, #48] + b .LBB3_3 +.LBB3_2: + zip1 v0.4s, v29.4s, v8.4s + add x15, x4, #4 + zip1 v1.4s, v30.4s, v31.4s + tst w5, #0x1 + zip1 v2.4s, v24.4s, v18.4s + csel x4, x15, x4, ne + zip1 v3.4s, v25.4s, v26.4s + add x0, x0, #32 + zip2 v6.4s, v29.4s, v8.4s + sub x1, x1, #4 + zip1 v4.2d, v0.2d, v1.2d + cmp x1, #3 + zip2 v7.4s, v30.4s, v31.4s + zip1 v5.2d, v2.2d, v3.2d + zip2 v0.2d, v0.2d, v1.2d + zip2 v1.2d, v2.2d, v3.2d + zip2 v2.4s, v24.4s, v18.4s + zip2 v3.4s, v25.4s, v26.4s + stp q4, q5, [x8] + zip2 v4.2d, v6.2d, v7.2d + stp q0, q1, [x8, #32] + zip1 v0.2d, v6.2d, v7.2d + zip1 v1.2d, v2.2d, v3.2d + zip2 v2.2d, v2.2d, v3.2d + stp q0, q1, [x8, #64] + stp q4, q2, [x8, #96] + add x8, x8, #128 + b.ls .LBB3_6 +.LBB3_3: + mov x15, x3 + add x16, x3, #8 + add x17, x3, #12 + add x19, x3, #16 + add x20, x3, #20 + ld1r { v29.4s }, [x15], #4 + ld1r { v30.4s }, [x16] + add x16, x3, #24 + ld1r { v31.4s }, [x17] + add x17, x3, #28 + ld1r { v24.4s }, [x19] + ld1r { v18.4s }, [x20] + ld1r { v25.4s }, [x16] + ld1r { v8.4s }, [x15] + ld1r { v26.4s }, [x17] + cbz x2, .LBB3_2 + ldr q1, [sp, #16] + dup v0.4s, w4 + lsr x17, x4, #32 + mov x15, xzr + ldp x19, x20, [x0, #16] + add v1.4s, v0.4s, v1.4s + mov x21, x2 + movi v0.4s, #128, lsl #24 + mov w26, w12 + str q1, [sp, #96] + eor v0.16b, v1.16b, v0.16b + ldr q1, [sp] + cmgt v0.4s, v1.4s, v0.4s + dup v1.4s, w17 + ldp x16, x17, [x0] + sub v0.4s, v1.4s, v0.4s + str q0, [sp, #80] +.LBB3_5: + add x23, x16, x15 + add x24, x17, x15 + add x22, x19, x15 + add x25, x20, x15 + subs x21, x21, #1 + add x15, x15, #64 + ldp q1, q2, [x23] + csel w27, w9, wzr, eq + orr w26, w27, w26 + and w26, w26, #0xff + ldp q4, q5, [x24] + dup v0.4s, w26 + mov w26, w6 + zip1 v22.4s, v1.4s, v4.4s + zip2 v20.4s, v1.4s, v4.4s + ldp q6, q7, [x22] + zip1 v17.4s, v2.4s, v5.4s + zip2 v23.4s, v2.4s, v5.4s + ldp q16, q21, [x25] + zip1 v19.4s, v6.4s, v16.4s + zip2 v1.4s, v6.4s, v16.4s + ldp q27, q28, [x23, #32] + zip1 v4.4s, v7.4s, v21.4s + zip2 v5.4s, v7.4s, v21.4s + zip2 v15.2d, v17.2d, v4.2d + ldp q9, q10, [x24, #32] + mov v17.d[1], v4.d[0] + add v4.4s, v30.4s, v25.4s + zip2 v11.2d, v23.2d, v5.2d + zip2 v3.4s, v27.4s, v9.4s + zip1 v7.4s, v27.4s, v9.4s + ldp q12, q6, [x22, #32] + mov v23.d[1], v5.d[0] + stp q11, q3, [sp, #256] + add v5.4s, v31.4s, v26.4s + add v4.4s, v4.4s, v17.4s + str q23, [sp, #352] + ldp q16, q2, [x25, #32] + add v5.4s, v5.4s, v23.4s + zip1 v3.4s, v12.4s, v16.4s + eor v0.16b, v5.16b, v0.16b + zip1 v9.4s, v6.4s, v2.4s + zip2 v2.4s, v6.4s, v2.4s + stp q7, q3, [sp, #208] + zip2 v3.4s, v12.4s, v16.4s + zip1 v12.4s, v28.4s, v10.4s + zip2 v10.4s, v28.4s, v10.4s + stp q17, q2, [sp, #160] + zip2 v28.2d, v22.2d, v19.2d + mov v22.d[1], v19.d[0] + str q3, [sp, #240] + add v2.4s, v8.4s, v18.4s + eor v16.16b, v4.16b, v13.16b + dup v17.4s, w13 + mov v3.16b, v22.16b + stp q22, q28, [sp, #320] + zip2 v22.2d, v20.2d, v1.2d + mov v20.d[1], v1.d[0] + add v1.4s, v29.4s, v24.4s + add v4.4s, v4.4s, v15.4s + add v5.4s, v5.4s, v11.4s + add v2.4s, v2.4s, v20.4s + stp q15, q20, [sp, #288] + add v1.4s, v1.4s, v3.4s + ldr q3, [sp, #96] + dup v20.4s, w14 + mov v23.16b, v22.16b + mov v15.16b, v10.16b + eor v6.16b, v1.16b, v3.16b + ldr q3, [sp, #80] + add v1.4s, v1.4s, v28.4s + ldr q28, [sp, #272] + str q23, [sp, #128] + eor v7.16b, v2.16b, v3.16b + ldp q27, q3, [sp, #32] + add v2.4s, v2.4s, v22.4s + tbl v6.16b, { v6.16b }, v27.16b + tbl v7.16b, { v7.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + tbl v0.16b, { v0.16b }, v27.16b + add v19.4s, v6.4s, v14.4s + add v21.4s, v7.4s, v3.4s + add v30.4s, v16.4s, v17.4s + add v31.4s, v0.4s, v20.4s + eor v24.16b, v19.16b, v24.16b + eor v17.16b, v21.16b, v18.16b + ushr v18.4s, v24.4s, #12 + shl v20.4s, v24.4s, #20 + eor v24.16b, v30.16b, v25.16b + eor v25.16b, v31.16b, v26.16b + ushr v26.4s, v17.4s, #12 + shl v17.4s, v17.4s, #20 + ushr v29.4s, v24.4s, #12 + shl v24.4s, v24.4s, #20 + ushr v8.4s, v25.4s, #12 + shl v25.4s, v25.4s, #20 + orr v3.16b, v20.16b, v18.16b + ldr q18, [x10, :lo12:.LCPI3_2] + orr v13.16b, v17.16b, v26.16b + orr v24.16b, v24.16b, v29.16b + orr v14.16b, v25.16b, v8.16b + add v8.4s, v1.4s, v3.4s + add v29.4s, v2.4s, v13.4s + add v17.4s, v4.4s, v24.4s + add v20.4s, v5.4s, v14.4s + eor v1.16b, v6.16b, v8.16b + eor v2.16b, v7.16b, v29.16b + eor v4.16b, v16.16b, v17.16b + eor v0.16b, v0.16b, v20.16b + tbl v25.16b, { v1.16b }, v18.16b + tbl v16.16b, { v2.16b }, v18.16b + tbl v6.16b, { v4.16b }, v18.16b + tbl v4.16b, { v0.16b }, v18.16b + add v19.4s, v19.4s, v25.4s + add v21.4s, v21.4s, v16.4s + add v26.4s, v30.4s, v6.4s + add v7.4s, v31.4s, v4.4s + eor v0.16b, v19.16b, v3.16b + eor v1.16b, v21.16b, v13.16b + eor v2.16b, v26.16b, v24.16b + eor v3.16b, v7.16b, v14.16b + ushr v5.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v24.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + ushr v30.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + orr v5.16b, v0.16b, v5.16b + orr v0.16b, v1.16b, v24.16b + ushr v31.4s, v3.4s, #7 + orr v2.16b, v2.16b, v30.16b + ldp q24, q30, [sp, #208] + shl v3.4s, v3.4s, #25 + zip2 v14.2d, v12.2d, v9.2d + mov v22.16b, v24.16b + orr v1.16b, v3.16b, v31.16b + zip2 v3.2d, v24.2d, v30.2d + mov v24.16b, v28.16b + mov v22.d[1], v30.d[0] + ldr q30, [sp, #240] + mov v31.16b, v12.16b + stp q22, q14, [sp, #224] + mov v24.d[1], v30.d[0] + add v12.4s, v8.4s, v22.4s + mov v31.d[1], v9.d[0] + add v22.4s, v29.4s, v24.4s + ldr q29, [sp, #176] + zip2 v28.2d, v28.2d, v30.2d + mov v9.16b, v24.16b + mov v15.d[1], v29.d[0] + zip2 v8.2d, v10.2d, v29.2d + add v10.4s, v12.4s, v0.4s + add v22.4s, v22.4s, v2.4s + str q9, [sp, #144] + add v20.4s, v20.4s, v15.4s + add v17.4s, v17.4s, v31.4s + stp q3, q8, [sp, #192] + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v27.16b + tbl v25.16b, { v25.16b }, v27.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v21.16b, v5.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v10.4s, v10.4s, v3.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v22.4s, v22.4s, v28.4s + ushr v12.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v10.4s, v10.4s, v0.4s + orr v5.16b, v5.16b, v13.16b + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v8.4s + orr v1.16b, v1.16b, v12.16b + add v17.4s, v17.4s, v14.4s + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v18.16b + tbl v25.16b, { v25.16b }, v18.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v18.16b + tbl v16.16b, { v16.16b }, v18.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v21.16b, v5.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v22.4s, v22.4s, v23.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v17.4s, v17.4s, v11.4s + mov v30.16b, v28.16b + mov v28.16b, v23.16b + ldr q23, [sp, #304] + ushr v12.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v22.4s, v22.4s, v0.4s + mov v29.16b, v31.16b + ldr q31, [sp, #160] + orr v5.16b, v5.16b, v13.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v23.4s + orr v1.16b, v1.16b, v12.16b + str q29, [sp, #272] + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v31.4s + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v27.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v27.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v27.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v27.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v5.16b, v19.16b + add v22.4s, v22.4s, v24.4s + ldr q24, [sp, #320] + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v17.4s, v17.4s, v24.4s + ldr q24, [sp, #352] + ushr v13.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v22.4s, v22.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v24.4s + ldr q24, [sp, #336] + orr v1.16b, v1.16b, v13.16b + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v14.4s + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v18.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v18.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v18.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v18.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v19.16b, v5.16b + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + add v10.4s, v10.4s, v24.4s + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v22.4s, v22.4s, v29.4s + ushr v13.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v10.4s, v10.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v8.4s + ldr q8, [sp, #288] + orr v1.16b, v1.16b, v13.16b + add v17.4s, v17.4s, v3.4s + ldr q3, [sp, #352] + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v27.16b + tbl v25.16b, { v25.16b }, v27.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v21.16b, v5.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v10.4s, v10.4s, v30.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v22.4s, v22.4s, v8.4s + mov v24.16b, v30.16b + mov v30.16b, v15.16b + add v17.4s, v17.4s, v15.4s + ldr q15, [sp, #224] + ushr v12.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v10.4s, v10.4s, v0.4s + str q30, [sp, #176] + orr v5.16b, v5.16b, v13.16b + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v15.4s + orr v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v18.16b + tbl v25.16b, { v25.16b }, v18.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v18.16b + tbl v16.16b, { v16.16b }, v18.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v21.16b, v5.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v22.4s, v22.4s, v9.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v17.4s, v17.4s, v14.4s + ushr v12.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v22.4s, v22.4s, v0.4s + orr v5.16b, v5.16b, v13.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v28.4s + orr v1.16b, v1.16b, v12.16b + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v11.4s + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v27.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v27.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v27.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v27.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v5.16b, v19.16b + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + add v22.4s, v22.4s, v29.4s + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v17.4s, v17.4s, v23.4s + ushr v13.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v22.4s, v22.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v31.4s + orr v1.16b, v1.16b, v13.16b + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v30.4s + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v18.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v18.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v18.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v18.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v19.16b, v5.16b + add v10.4s, v10.4s, v3.4s + ldr q3, [sp, #192] + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v22.4s, v22.4s, v3.4s + ushr v13.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v10.4s, v10.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v15.4s + ldr q15, [sp, #128] + orr v1.16b, v1.16b, v13.16b + add v17.4s, v17.4s, v24.4s + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v27.16b + tbl v25.16b, { v25.16b }, v27.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v21.16b, v5.16b + ldp q23, q11, [sp, #320] + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v10.4s, v10.4s, v8.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v22.4s, v22.4s, v23.4s + ushr v12.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v10.4s, v10.4s, v0.4s + mov v28.16b, v31.16b + mov v31.16b, v8.16b + ldr q8, [sp, #208] + orr v5.16b, v5.16b, v13.16b + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v11.4s + orr v1.16b, v1.16b, v12.16b + add v17.4s, v17.4s, v8.4s + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v18.16b + tbl v25.16b, { v25.16b }, v18.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v18.16b + tbl v16.16b, { v16.16b }, v18.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v21.16b, v5.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v22.4s, v22.4s, v29.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v17.4s, v17.4s, v30.4s + ushr v12.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v22.4s, v22.4s, v0.4s + orr v5.16b, v5.16b, v13.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v9.4s + orr v1.16b, v1.16b, v12.16b + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v14.4s + ldr q14, [sp, #256] + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v27.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v27.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v27.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v27.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v5.16b, v19.16b + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + add v22.4s, v22.4s, v3.4s + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v17.4s, v17.4s, v15.4s + ushr v13.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v22.4s, v22.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v14.4s + orr v1.16b, v1.16b, v13.16b + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v8.4s + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v18.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v18.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v18.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v18.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v19.16b, v5.16b + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + add v10.4s, v10.4s, v28.4s + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v22.4s, v22.4s, v24.4s + ushr v13.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v10.4s, v10.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v11.4s + ldr q11, [sp, #304] + orr v1.16b, v1.16b, v13.16b + add v17.4s, v17.4s, v31.4s + ldr q31, [sp, #224] + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v27.16b + tbl v25.16b, { v25.16b }, v27.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v21.16b, v5.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v10.4s, v10.4s, v23.4s + ldr q23, [sp, #240] + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v22.4s, v22.4s, v11.4s + mov v30.16b, v8.16b + mov v8.16b, v24.16b + ldr q24, [sp, #352] + ushr v12.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v10.4s, v10.4s, v0.4s + orr v5.16b, v5.16b, v13.16b + str q8, [sp, #112] + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v24.4s + orr v1.16b, v1.16b, v12.16b + add v17.4s, v17.4s, v31.4s + eor v4.16b, v4.16b, v10.16b + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v18.16b + tbl v25.16b, { v25.16b }, v18.16b + eor v6.16b, v6.16b, v20.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v18.16b + tbl v16.16b, { v16.16b }, v18.16b + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + mov v29.16b, v3.16b + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v21.16b, v5.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v22.4s, v22.4s, v29.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v17.4s, v17.4s, v30.4s + ldr q30, [sp, #272] + ushr v12.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v22.4s, v22.4s, v0.4s + mov v3.16b, v28.16b + ldr q28, [sp, #176] + orr v5.16b, v5.16b, v13.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v30.4s + orr v1.16b, v1.16b, v12.16b + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v28.4s + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v27.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v27.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v27.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v27.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v5.16b, v19.16b + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + add v22.4s, v22.4s, v8.4s + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v17.4s, v17.4s, v9.4s + ldr q9, [sp, #320] + ushr v13.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v22.4s, v22.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v17.4s, v17.4s, v2.4s + add v10.4s, v10.4s, v23.4s + orr v1.16b, v1.16b, v13.16b + eor v16.16b, v16.16b, v22.16b + add v20.4s, v20.4s, v31.4s + eor v6.16b, v6.16b, v17.16b + add v10.4s, v10.4s, v5.4s + tbl v16.16b, { v16.16b }, v18.16b + add v20.4s, v20.4s, v1.4s + tbl v6.16b, { v6.16b }, v18.16b + eor v25.16b, v25.16b, v10.16b + add v21.4s, v21.4s, v16.4s + eor v4.16b, v4.16b, v20.16b + add v26.4s, v26.4s, v6.4s + tbl v25.16b, { v25.16b }, v18.16b + eor v0.16b, v21.16b, v0.16b + tbl v4.16b, { v4.16b }, v18.16b + eor v2.16b, v26.16b, v2.16b + add v19.4s, v19.4s, v25.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + add v7.4s, v7.4s, v4.4s + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v5.16b, v19.16b, v5.16b + add v10.4s, v10.4s, v14.4s + ldr q14, [sp, #288] + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v7.16b, v1.16b + orr v2.16b, v2.16b, v13.16b + ushr v12.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v22.4s, v22.4s, v14.4s + ushr v13.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + add v10.4s, v10.4s, v0.4s + orr v5.16b, v5.16b, v12.16b + add v22.4s, v22.4s, v2.4s + add v20.4s, v20.4s, v24.4s + orr v1.16b, v1.16b, v13.16b + eor v4.16b, v4.16b, v10.16b + add v17.4s, v17.4s, v9.4s + eor v25.16b, v25.16b, v22.16b + add v20.4s, v20.4s, v5.4s + tbl v4.16b, { v4.16b }, v27.16b + add v17.4s, v17.4s, v1.4s + tbl v25.16b, { v25.16b }, v27.16b + eor v6.16b, v6.16b, v20.16b + add v26.4s, v26.4s, v4.4s + eor v16.16b, v16.16b, v17.16b + add v7.4s, v7.4s, v25.4s + tbl v6.16b, { v6.16b }, v27.16b + eor v0.16b, v26.16b, v0.16b + tbl v16.16b, { v16.16b }, v27.16b + eor v2.16b, v7.16b, v2.16b + add v21.4s, v21.4s, v6.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + add v19.4s, v19.4s, v16.4s + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + eor v5.16b, v21.16b, v5.16b + orr v0.16b, v0.16b, v12.16b + eor v1.16b, v19.16b, v1.16b + add v10.4s, v10.4s, v11.4s + orr v2.16b, v2.16b, v13.16b + ushr v13.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + ushr v12.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v10.4s, v10.4s, v0.4s + add v22.4s, v22.4s, v15.4s + orr v5.16b, v5.16b, v13.16b + add v20.4s, v20.4s, v3.4s + mov v24.16b, v3.16b + ldr q3, [sp, #336] + orr v1.16b, v1.16b, v12.16b + eor v4.16b, v4.16b, v10.16b + add v22.4s, v22.4s, v2.4s + add v17.4s, v17.4s, v3.4s + add v20.4s, v20.4s, v5.4s + tbl v4.16b, { v4.16b }, v18.16b + eor v25.16b, v25.16b, v22.16b + add v17.4s, v17.4s, v1.4s + eor v6.16b, v6.16b, v20.16b + add v26.4s, v26.4s, v4.4s + tbl v25.16b, { v25.16b }, v18.16b + eor v16.16b, v16.16b, v17.16b + tbl v6.16b, { v6.16b }, v18.16b + eor v0.16b, v26.16b, v0.16b + add v7.4s, v7.4s, v25.4s + tbl v16.16b, { v16.16b }, v18.16b + add v21.4s, v21.4s, v6.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + eor v2.16b, v7.16b, v2.16b + add v19.4s, v19.4s, v16.4s + eor v5.16b, v21.16b, v5.16b + orr v0.16b, v0.16b, v12.16b + ushr v12.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + eor v1.16b, v19.16b, v1.16b + ushr v13.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v22.4s, v22.4s, v8.4s + orr v2.16b, v2.16b, v12.16b + ushr v12.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + orr v5.16b, v5.16b, v13.16b + add v22.4s, v22.4s, v0.4s + add v10.4s, v10.4s, v29.4s + ldr q29, [sp, #208] + add v17.4s, v17.4s, v31.4s + orr v1.16b, v1.16b, v12.16b + add v20.4s, v20.4s, v29.4s + eor v16.16b, v16.16b, v22.16b + add v10.4s, v10.4s, v5.4s + add v17.4s, v17.4s, v2.4s + add v20.4s, v20.4s, v1.4s + tbl v16.16b, { v16.16b }, v27.16b + eor v25.16b, v25.16b, v10.16b + eor v6.16b, v6.16b, v17.16b + eor v4.16b, v4.16b, v20.16b + add v21.4s, v21.4s, v16.4s + tbl v25.16b, { v25.16b }, v27.16b + tbl v6.16b, { v6.16b }, v27.16b + tbl v4.16b, { v4.16b }, v27.16b + eor v0.16b, v21.16b, v0.16b + add v19.4s, v19.4s, v25.4s + add v26.4s, v26.4s, v6.4s + add v7.4s, v7.4s, v4.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v5.16b, v5.16b, v19.16b + eor v2.16b, v26.16b, v2.16b + eor v1.16b, v7.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + ushr v12.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v22.4s, v22.4s, v14.4s + mov v8.16b, v31.16b + ushr v13.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + mov v31.16b, v14.16b + ushr v14.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + orr v5.16b, v5.16b, v12.16b + add v22.4s, v22.4s, v0.4s + add v10.4s, v10.4s, v28.4s + ldr q28, [sp, #352] + orr v2.16b, v2.16b, v13.16b + orr v1.16b, v1.16b, v14.16b + add v17.4s, v17.4s, v30.4s + add v20.4s, v20.4s, v3.4s + eor v16.16b, v16.16b, v22.16b + add v10.4s, v10.4s, v5.4s + add v17.4s, v17.4s, v2.4s + add v20.4s, v20.4s, v1.4s + tbl v16.16b, { v16.16b }, v18.16b + eor v25.16b, v25.16b, v10.16b + eor v6.16b, v6.16b, v17.16b + eor v4.16b, v4.16b, v20.16b + add v21.4s, v21.4s, v16.4s + tbl v25.16b, { v25.16b }, v18.16b + tbl v6.16b, { v6.16b }, v18.16b + tbl v4.16b, { v4.16b }, v18.16b + eor v0.16b, v21.16b, v0.16b + add v19.4s, v19.4s, v25.4s + add v26.4s, v26.4s, v6.4s + add v7.4s, v7.4s, v4.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + eor v5.16b, v19.16b, v5.16b + eor v2.16b, v26.16b, v2.16b + eor v1.16b, v7.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + ushr v12.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + add v10.4s, v10.4s, v23.4s + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + ushr v14.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + orr v5.16b, v5.16b, v12.16b + add v10.4s, v10.4s, v0.4s + add v20.4s, v20.4s, v24.4s + ldr q24, [sp, #144] + orr v2.16b, v2.16b, v13.16b + orr v1.16b, v1.16b, v14.16b + add v22.4s, v22.4s, v9.4s + add v17.4s, v17.4s, v11.4s + eor v4.16b, v4.16b, v10.16b + add v20.4s, v20.4s, v5.4s + add v22.4s, v22.4s, v2.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v27.16b + eor v6.16b, v6.16b, v20.16b + eor v25.16b, v25.16b, v22.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + tbl v6.16b, { v6.16b }, v27.16b + tbl v25.16b, { v25.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + eor v0.16b, v26.16b, v0.16b + add v21.4s, v21.4s, v6.4s + add v7.4s, v7.4s, v25.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + eor v5.16b, v21.16b, v5.16b + eor v2.16b, v7.16b, v2.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + add v10.4s, v10.4s, v15.4s + ushr v14.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + mov v30.16b, v3.16b + ldr q3, [sp, #256] + ushr v12.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + ushr v13.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + add v10.4s, v10.4s, v0.4s + orr v5.16b, v5.16b, v14.16b + add v20.4s, v20.4s, v3.4s + orr v2.16b, v2.16b, v12.16b + orr v1.16b, v1.16b, v13.16b + add v22.4s, v22.4s, v24.4s + add v17.4s, v17.4s, v28.4s + eor v4.16b, v4.16b, v10.16b + add v20.4s, v20.4s, v5.4s + add v22.4s, v22.4s, v2.4s + add v17.4s, v17.4s, v1.4s + tbl v4.16b, { v4.16b }, v18.16b + eor v6.16b, v6.16b, v20.16b + eor v25.16b, v25.16b, v22.16b + eor v16.16b, v16.16b, v17.16b + add v26.4s, v26.4s, v4.4s + tbl v6.16b, { v6.16b }, v18.16b + tbl v25.16b, { v25.16b }, v18.16b + tbl v16.16b, { v16.16b }, v18.16b + eor v0.16b, v26.16b, v0.16b + add v21.4s, v21.4s, v6.4s + add v7.4s, v7.4s, v25.4s + add v19.4s, v19.4s, v16.4s + ushr v12.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + eor v5.16b, v21.16b, v5.16b + eor v2.16b, v7.16b, v2.16b + eor v1.16b, v19.16b, v1.16b + orr v0.16b, v0.16b, v12.16b + ushr v12.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + mov v23.16b, v9.16b + ldr q9, [sp, #112] + ushr v13.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + ushr v14.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + orr v5.16b, v5.16b, v12.16b + add v9.4s, v10.4s, v9.4s + orr v2.16b, v2.16b, v13.16b + orr v1.16b, v1.16b, v14.16b + ldr q14, [sp, #64] + add v22.4s, v22.4s, v31.4s + add v17.4s, v17.4s, v30.4s + add v20.4s, v20.4s, v8.4s + add v9.4s, v9.4s, v5.4s + add v22.4s, v22.4s, v0.4s + add v17.4s, v17.4s, v2.4s + add v20.4s, v20.4s, v1.4s + eor v25.16b, v25.16b, v9.16b + eor v16.16b, v16.16b, v22.16b + eor v6.16b, v6.16b, v17.16b + eor v4.16b, v4.16b, v20.16b + tbl v25.16b, { v25.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + tbl v6.16b, { v6.16b }, v27.16b + tbl v4.16b, { v4.16b }, v27.16b + add v19.4s, v19.4s, v25.4s + add v21.4s, v21.4s, v16.4s + add v26.4s, v26.4s, v6.4s + add v7.4s, v7.4s, v4.4s + eor v5.16b, v5.16b, v19.16b + eor v0.16b, v21.16b, v0.16b + eor v2.16b, v26.16b, v2.16b + eor v1.16b, v7.16b, v1.16b + ushr v30.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + ushr v10.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + ushr v12.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + ushr v13.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + orr v5.16b, v5.16b, v30.16b + add v30.4s, v9.4s, v29.4s + add v22.4s, v22.4s, v23.4s + ldr q23, [sp, #192] + orr v0.16b, v0.16b, v10.16b + orr v2.16b, v2.16b, v12.16b + orr v1.16b, v1.16b, v13.16b + add v17.4s, v17.4s, v23.4s + add v20.4s, v20.4s, v28.4s + add v23.4s, v30.4s, v5.4s + add v22.4s, v22.4s, v0.4s + add v17.4s, v17.4s, v2.4s + add v20.4s, v20.4s, v1.4s + eor v25.16b, v25.16b, v23.16b + eor v16.16b, v16.16b, v22.16b + eor v6.16b, v6.16b, v17.16b + eor v4.16b, v4.16b, v20.16b + tbl v25.16b, { v25.16b }, v18.16b + tbl v16.16b, { v16.16b }, v18.16b + tbl v6.16b, { v6.16b }, v18.16b + tbl v4.16b, { v4.16b }, v18.16b + add v19.4s, v19.4s, v25.4s + add v21.4s, v21.4s, v16.4s + add v26.4s, v26.4s, v6.4s + add v7.4s, v7.4s, v4.4s + eor v5.16b, v19.16b, v5.16b + eor v0.16b, v21.16b, v0.16b + eor v2.16b, v26.16b, v2.16b + eor v1.16b, v7.16b, v1.16b + ushr v28.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + ushr v30.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v31.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + ushr v8.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + orr v5.16b, v5.16b, v28.16b + ldr q28, [sp, #176] + orr v0.16b, v0.16b, v30.16b + orr v2.16b, v2.16b, v31.16b + orr v1.16b, v1.16b, v8.16b + add v23.4s, v23.4s, v28.4s + add v22.4s, v22.4s, v11.4s + add v17.4s, v17.4s, v15.4s + add v20.4s, v20.4s, v3.4s + ldr q3, [sp, #272] + add v23.4s, v23.4s, v0.4s + add v22.4s, v22.4s, v2.4s + add v17.4s, v17.4s, v1.4s + add v20.4s, v20.4s, v5.4s + eor v4.16b, v4.16b, v23.16b + eor v25.16b, v25.16b, v22.16b + eor v16.16b, v16.16b, v17.16b + eor v6.16b, v6.16b, v20.16b + tbl v4.16b, { v4.16b }, v27.16b + tbl v25.16b, { v25.16b }, v27.16b + tbl v16.16b, { v16.16b }, v27.16b + tbl v6.16b, { v6.16b }, v27.16b + add v26.4s, v26.4s, v4.4s + add v7.4s, v7.4s, v25.4s + add v19.4s, v19.4s, v16.4s + add v21.4s, v21.4s, v6.4s + eor v0.16b, v26.16b, v0.16b + eor v2.16b, v7.16b, v2.16b + eor v1.16b, v19.16b, v1.16b + eor v5.16b, v21.16b, v5.16b + add v3.4s, v22.4s, v3.4s + ldr q22, [sp, #160] + ushr v28.4s, v0.4s, #12 + shl v0.4s, v0.4s, #20 + ushr v29.4s, v2.4s, #12 + shl v2.4s, v2.4s, #20 + ushr v30.4s, v1.4s, #12 + shl v1.4s, v1.4s, #20 + ushr v31.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + add v17.4s, v17.4s, v22.4s + ldr q22, [sp, #240] + orr v0.16b, v0.16b, v28.16b + prfm pldl1keep, [x23, #256] + orr v2.16b, v2.16b, v29.16b + prfm pldl1keep, [x24, #256] + orr v1.16b, v1.16b, v30.16b + prfm pldl1keep, [x22, #256] + orr v5.16b, v5.16b, v31.16b + prfm pldl1keep, [x25, #256] + add v23.4s, v23.4s, v24.4s + add v20.4s, v20.4s, v22.4s + add v3.4s, v3.4s, v2.4s + add v17.4s, v17.4s, v1.4s + add v22.4s, v23.4s, v0.4s + add v20.4s, v20.4s, v5.4s + eor v23.16b, v25.16b, v3.16b + eor v16.16b, v16.16b, v17.16b + eor v4.16b, v4.16b, v22.16b + eor v6.16b, v6.16b, v20.16b + tbl v23.16b, { v23.16b }, v18.16b + tbl v16.16b, { v16.16b }, v18.16b + tbl v4.16b, { v4.16b }, v18.16b + tbl v6.16b, { v6.16b }, v18.16b + add v7.4s, v7.4s, v23.4s + add v19.4s, v19.4s, v16.4s + add v18.4s, v26.4s, v4.4s + add v21.4s, v21.4s, v6.4s + eor v2.16b, v7.16b, v2.16b + eor v1.16b, v19.16b, v1.16b + eor v0.16b, v18.16b, v0.16b + eor v5.16b, v21.16b, v5.16b + ushr v25.4s, v2.4s, #7 + shl v2.4s, v2.4s, #25 + ushr v24.4s, v0.4s, #7 + shl v0.4s, v0.4s, #25 + ushr v26.4s, v1.4s, #7 + shl v1.4s, v1.4s, #25 + ushr v27.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + orr v0.16b, v0.16b, v24.16b + orr v2.16b, v2.16b, v25.16b + orr v1.16b, v1.16b, v26.16b + orr v5.16b, v5.16b, v27.16b + movi v13.4s, #64 + eor v29.16b, v19.16b, v22.16b + eor v8.16b, v21.16b, v3.16b + eor v30.16b, v17.16b, v18.16b + eor v31.16b, v20.16b, v7.16b + eor v24.16b, v5.16b, v23.16b + eor v18.16b, v0.16b, v16.16b + eor v25.16b, v2.16b, v6.16b + eor v26.16b, v1.16b, v4.16b + cbnz x21, .LBB3_5 + b .LBB3_2 +.LBB3_6: + cbz x1, .LBB3_14 + adrp x12, .LCPI3_3 + ldr q0, [x11, :lo12:.LCPI3_1] + orr w11, w7, w6 + ldr q2, [x10, :lo12:.LCPI3_2] + ldr q1, [x12, :lo12:.LCPI3_3] + and x12, x5, #0x1 +.LBB3_8: + movi v3.4s, #64 + lsr x13, x4, #32 + ldp q5, q4, [x3] + mov x15, x2 + mov w14, w11 + mov v3.s[0], w4 + ldr x10, [x0] + mov v3.s[1], w13 + b .LBB3_11 +.LBB3_9: + orr w14, w14, w9 +.LBB3_10: + ldp q6, q7, [x10] + mov v16.16b, v3.16b + and w14, w14, #0xff + add v5.4s, v5.4s, v4.4s + mov x15, x13 + mov v16.s[3], w14 + add x14, x10, #32 + uzp1 v17.4s, v6.4s, v7.4s + add x10, x10, #64 + add v5.4s, v5.4s, v17.4s + eor v16.16b, v5.16b, v16.16b + tbl v16.16b, { v16.16b }, v0.16b + add v18.4s, v16.4s, v1.4s + eor v19.16b, v18.16b, v4.16b + uzp2 v4.4s, v6.4s, v7.4s + ushr v6.4s, v19.4s, #12 + shl v7.4s, v19.4s, #20 + ld2 { v19.4s, v20.4s }, [x14] + add v5.4s, v5.4s, v4.4s + mov w14, w6 + orr v6.16b, v7.16b, v6.16b + add v5.4s, v5.4s, v6.4s + eor v7.16b, v16.16b, v5.16b + add v5.4s, v5.4s, v19.4s + tbl v7.16b, { v7.16b }, v2.16b + ext v5.16b, v5.16b, v5.16b, #12 + add v16.4s, v18.4s, v7.4s + ext v7.16b, v7.16b, v7.16b, #8 + eor v6.16b, v6.16b, v16.16b + ext v16.16b, v16.16b, v16.16b, #4 + ushr v18.4s, v6.4s, #7 + shl v6.4s, v6.4s, #25 + orr v6.16b, v6.16b, v18.16b + ext v18.16b, v20.16b, v20.16b, #12 + add v5.4s, v5.4s, v6.4s + eor v7.16b, v5.16b, v7.16b + add v5.4s, v5.4s, v18.4s + tbl v7.16b, { v7.16b }, v0.16b + add v16.4s, v16.4s, v7.4s + eor v6.16b, v6.16b, v16.16b + ushr v21.4s, v6.4s, #12 + shl v6.4s, v6.4s, #20 + orr v6.16b, v6.16b, v21.16b + uzp1 v21.4s, v17.4s, v17.4s + add v5.4s, v5.4s, v6.4s + ext v21.16b, v21.16b, v17.16b, #8 + eor v7.16b, v7.16b, v5.16b + uzp2 v21.4s, v21.4s, v4.4s + tbl v7.16b, { v7.16b }, v2.16b + add v5.4s, v5.4s, v21.4s + add v16.4s, v16.4s, v7.4s + ext v5.16b, v5.16b, v5.16b, #4 + ext v7.16b, v7.16b, v7.16b, #8 + eor v6.16b, v6.16b, v16.16b + ushr v22.4s, v6.4s, #7 + shl v6.4s, v6.4s, #25 + orr v6.16b, v6.16b, v22.16b + add v22.4s, v5.4s, v6.4s + eor v5.16b, v22.16b, v7.16b + ext v7.16b, v16.16b, v16.16b, #12 + tbl v16.16b, { v5.16b }, v0.16b + ext v5.16b, v17.16b, v17.16b, #12 + add v7.4s, v7.4s, v16.4s + ext v5.16b, v17.16b, v5.16b, #12 + ext v17.16b, v19.16b, v19.16b, #12 + mov v19.16b, v18.16b + eor v6.16b, v6.16b, v7.16b + rev64 v5.4s, v5.4s + mov v19.s[1], v17.s[2] + ushr v20.4s, v6.4s, #12 + shl v6.4s, v6.4s, #20 + trn2 v5.4s, v5.4s, v19.4s + orr v6.16b, v6.16b, v20.16b + zip1 v20.2d, v18.2d, v4.2d + zip2 v4.4s, v4.4s, v18.4s + add v19.4s, v6.4s, v5.4s + mov v20.s[3], v17.s[3] + add v19.4s, v19.4s, v22.4s + ext v22.16b, v20.16b, v20.16b, #12 + eor v16.16b, v16.16b, v19.16b + ext v19.16b, v19.16b, v19.16b, #12 + tbl v16.16b, { v16.16b }, v2.16b + add v7.4s, v7.4s, v16.4s + ext v16.16b, v16.16b, v16.16b, #8 + eor v6.16b, v6.16b, v7.16b + ext v7.16b, v7.16b, v7.16b, #4 + ushr v23.4s, v6.4s, #7 + shl v24.4s, v6.4s, #25 + uzp1 v6.4s, v20.4s, v22.4s + orr v20.16b, v24.16b, v23.16b + add v22.4s, v20.4s, v6.4s + add v19.4s, v22.4s, v19.4s + eor v16.16b, v19.16b, v16.16b + tbl v16.16b, { v16.16b }, v0.16b + add v7.4s, v7.4s, v16.4s + eor v18.16b, v20.16b, v7.16b + zip1 v20.4s, v4.4s, v17.4s + zip1 v4.4s, v17.4s, v4.4s + ushr v17.4s, v18.4s, #12 + shl v18.4s, v18.4s, #20 + ext v20.16b, v4.16b, v20.16b, #8 + orr v4.16b, v18.16b, v17.16b + ext v18.16b, v21.16b, v21.16b, #4 + add v17.4s, v4.4s, v20.4s + add v17.4s, v17.4s, v19.4s + uzp1 v19.4s, v18.4s, v18.4s + eor v16.16b, v16.16b, v17.16b + ext v19.16b, v19.16b, v18.16b, #8 + tbl v16.16b, { v16.16b }, v2.16b + uzp2 v19.4s, v19.4s, v5.4s + add v7.4s, v7.4s, v16.4s + add v17.4s, v17.4s, v19.4s + ext v16.16b, v16.16b, v16.16b, #8 + eor v4.16b, v4.16b, v7.16b + ext v17.16b, v17.16b, v17.16b, #4 + ext v7.16b, v7.16b, v7.16b, #12 + ushr v21.4s, v4.4s, #7 + shl v4.4s, v4.4s, #25 + orr v4.16b, v4.16b, v21.16b + ext v21.16b, v18.16b, v18.16b, #12 + add v17.4s, v17.4s, v4.4s + ext v18.16b, v18.16b, v21.16b, #12 + mov v21.16b, v20.16b + eor v16.16b, v17.16b, v16.16b + rev64 v18.4s, v18.4s + mov v21.s[1], v6.s[2] + tbl v16.16b, { v16.16b }, v0.16b + add v7.4s, v7.4s, v16.4s + eor v4.16b, v4.16b, v7.16b + ushr v22.4s, v4.4s, #12 + shl v23.4s, v4.4s, #20 + trn2 v4.4s, v18.4s, v21.4s + orr v18.16b, v23.16b, v22.16b + add v21.4s, v18.4s, v4.4s + add v17.4s, v21.4s, v17.4s + zip1 v21.2d, v20.2d, v5.2d + zip2 v5.4s, v5.4s, v20.4s + eor v16.16b, v16.16b, v17.16b + mov v21.s[3], v6.s[3] + ext v17.16b, v17.16b, v17.16b, #12 + zip1 v20.4s, v5.4s, v6.4s + tbl v16.16b, { v16.16b }, v2.16b + zip1 v5.4s, v6.4s, v5.4s + add v22.4s, v7.4s, v16.4s + ext v16.16b, v16.16b, v16.16b, #8 + ext v20.16b, v5.16b, v20.16b, #8 + eor v7.16b, v18.16b, v22.16b + ext v18.16b, v21.16b, v21.16b, #12 + ushr v23.4s, v7.4s, #7 + shl v24.4s, v7.4s, #25 + uzp1 v7.4s, v21.4s, v18.4s + orr v18.16b, v24.16b, v23.16b + add v21.4s, v18.4s, v7.4s + add v17.4s, v21.4s, v17.4s + ext v21.16b, v22.16b, v22.16b, #4 + eor v16.16b, v17.16b, v16.16b + tbl v16.16b, { v16.16b }, v0.16b + add v21.4s, v21.4s, v16.4s + eor v18.16b, v18.16b, v21.16b + ushr v6.4s, v18.4s, #12 + shl v18.4s, v18.4s, #20 + orr v5.16b, v18.16b, v6.16b + add v6.4s, v5.4s, v20.4s + add v6.4s, v6.4s, v17.4s + ext v17.16b, v19.16b, v19.16b, #4 + eor v16.16b, v16.16b, v6.16b + uzp1 v18.4s, v17.4s, v17.4s + tbl v16.16b, { v16.16b }, v2.16b + ext v18.16b, v18.16b, v17.16b, #8 + add v19.4s, v21.4s, v16.4s + uzp2 v18.4s, v18.4s, v4.4s + ext v16.16b, v16.16b, v16.16b, #8 + eor v5.16b, v5.16b, v19.16b + add v6.4s, v6.4s, v18.4s + ext v19.16b, v19.16b, v19.16b, #12 + ushr v21.4s, v5.4s, #7 + shl v5.4s, v5.4s, #25 + ext v6.16b, v6.16b, v6.16b, #4 + orr v5.16b, v5.16b, v21.16b + ext v21.16b, v17.16b, v17.16b, #12 + add v6.4s, v6.4s, v5.4s + ext v17.16b, v17.16b, v21.16b, #12 + mov v21.16b, v20.16b + eor v16.16b, v6.16b, v16.16b + rev64 v17.4s, v17.4s + mov v21.s[1], v7.s[2] + tbl v16.16b, { v16.16b }, v0.16b + add v19.4s, v19.4s, v16.4s + eor v5.16b, v5.16b, v19.16b + ushr v22.4s, v5.4s, #12 + shl v23.4s, v5.4s, #20 + trn2 v5.4s, v17.4s, v21.4s + orr v17.16b, v23.16b, v22.16b + add v21.4s, v17.4s, v5.4s + add v6.4s, v21.4s, v6.4s + eor v16.16b, v16.16b, v6.16b + ext v6.16b, v6.16b, v6.16b, #12 + tbl v21.16b, { v16.16b }, v2.16b + zip1 v16.2d, v20.2d, v4.2d + zip2 v4.4s, v4.4s, v20.4s + add v19.4s, v19.4s, v21.4s + mov v16.s[3], v7.s[3] + ext v21.16b, v21.16b, v21.16b, #8 + zip1 v20.4s, v4.4s, v7.4s + eor v17.16b, v17.16b, v19.16b + ext v22.16b, v16.16b, v16.16b, #12 + ext v19.16b, v19.16b, v19.16b, #4 + zip1 v4.4s, v7.4s, v4.4s + ushr v23.4s, v17.4s, #7 + shl v17.4s, v17.4s, #25 + uzp1 v16.4s, v16.4s, v22.4s + ext v4.16b, v4.16b, v20.16b, #8 + orr v17.16b, v17.16b, v23.16b + add v22.4s, v17.4s, v16.4s + add v6.4s, v22.4s, v6.4s + eor v21.16b, v6.16b, v21.16b + tbl v21.16b, { v21.16b }, v0.16b + add v19.4s, v19.4s, v21.4s + eor v17.16b, v17.16b, v19.16b + ushr v7.4s, v17.4s, #12 + shl v17.4s, v17.4s, #20 + orr v7.16b, v17.16b, v7.16b + add v17.4s, v7.4s, v4.4s + add v6.4s, v17.4s, v6.4s + ext v17.16b, v18.16b, v18.16b, #4 + eor v18.16b, v21.16b, v6.16b + uzp1 v20.4s, v17.4s, v17.4s + tbl v18.16b, { v18.16b }, v2.16b + ext v20.16b, v20.16b, v17.16b, #8 + add v19.4s, v19.4s, v18.4s + uzp2 v20.4s, v20.4s, v5.4s + ext v18.16b, v18.16b, v18.16b, #8 + eor v7.16b, v7.16b, v19.16b + add v6.4s, v6.4s, v20.4s + ushr v21.4s, v7.4s, #7 + shl v7.4s, v7.4s, #25 + ext v6.16b, v6.16b, v6.16b, #4 + orr v7.16b, v7.16b, v21.16b + add v21.4s, v6.4s, v7.4s + eor v6.16b, v21.16b, v18.16b + ext v18.16b, v19.16b, v19.16b, #12 + tbl v19.16b, { v6.16b }, v0.16b + ext v6.16b, v17.16b, v17.16b, #12 + add v18.4s, v18.4s, v19.4s + ext v6.16b, v17.16b, v6.16b, #12 + mov v17.16b, v4.16b + eor v7.16b, v7.16b, v18.16b + rev64 v6.4s, v6.4s + mov v17.s[1], v16.s[2] + ushr v22.4s, v7.4s, #12 + shl v7.4s, v7.4s, #20 + trn2 v6.4s, v6.4s, v17.4s + orr v7.16b, v7.16b, v22.16b + add v17.4s, v7.4s, v6.4s + add v17.4s, v17.4s, v21.4s + zip1 v21.2d, v4.2d, v5.2d + zip2 v4.4s, v5.4s, v4.4s + eor v19.16b, v19.16b, v17.16b + mov v21.s[3], v16.s[3] + ext v17.16b, v17.16b, v17.16b, #12 + tbl v19.16b, { v19.16b }, v2.16b + ext v22.16b, v21.16b, v21.16b, #12 + add v18.4s, v18.4s, v19.4s + ext v19.16b, v19.16b, v19.16b, #8 + eor v7.16b, v7.16b, v18.16b + ext v18.16b, v18.16b, v18.16b, #4 + ushr v23.4s, v7.4s, #7 + shl v24.4s, v7.4s, #25 + uzp1 v7.4s, v21.4s, v22.4s + orr v21.16b, v24.16b, v23.16b + add v22.4s, v21.4s, v7.4s + add v17.4s, v22.4s, v17.4s + eor v19.16b, v17.16b, v19.16b + tbl v19.16b, { v19.16b }, v0.16b + add v18.4s, v18.4s, v19.4s + eor v5.16b, v21.16b, v18.16b + zip1 v21.4s, v4.4s, v16.4s + zip1 v4.4s, v16.4s, v4.4s + ushr v16.4s, v5.4s, #12 + shl v5.4s, v5.4s, #20 + ext v21.16b, v4.16b, v21.16b, #8 + orr v4.16b, v5.16b, v16.16b + ext v16.16b, v20.16b, v20.16b, #4 + mov v23.16b, v21.16b + add v5.4s, v4.4s, v21.4s + mov v23.s[1], v7.s[2] + add v5.4s, v5.4s, v17.4s + eor v17.16b, v19.16b, v5.16b + uzp1 v19.4s, v16.4s, v16.4s + tbl v17.16b, { v17.16b }, v2.16b + ext v19.16b, v19.16b, v16.16b, #8 + add v18.4s, v18.4s, v17.4s + uzp2 v19.4s, v19.4s, v6.4s + eor v4.16b, v4.16b, v18.16b + add v5.4s, v5.4s, v19.4s + ext v19.16b, v19.16b, v19.16b, #4 + ushr v20.4s, v4.4s, #7 + shl v4.4s, v4.4s, #25 + ext v5.16b, v5.16b, v5.16b, #4 + orr v20.16b, v4.16b, v20.16b + ext v4.16b, v17.16b, v17.16b, #8 + add v17.4s, v5.4s, v20.4s + ext v5.16b, v18.16b, v18.16b, #12 + eor v4.16b, v17.16b, v4.16b + tbl v18.16b, { v4.16b }, v0.16b + ext v4.16b, v16.16b, v16.16b, #12 + add v22.4s, v5.4s, v18.4s + ext v4.16b, v16.16b, v4.16b, #12 + eor v5.16b, v20.16b, v22.16b + rev64 v16.4s, v4.4s + ushr v20.4s, v5.4s, #12 + shl v24.4s, v5.4s, #20 + trn2 v5.4s, v16.4s, v23.4s + orr v16.16b, v24.16b, v20.16b + add v20.4s, v16.4s, v5.4s + add v17.4s, v20.4s, v17.4s + zip1 v20.2d, v21.2d, v6.2d + zip2 v6.4s, v6.4s, v21.4s + eor v18.16b, v18.16b, v17.16b + mov v20.s[3], v7.s[3] + ext v17.16b, v17.16b, v17.16b, #12 + zip1 v21.4s, v6.4s, v7.4s + tbl v18.16b, { v18.16b }, v2.16b + ext v24.16b, v20.16b, v20.16b, #12 + zip1 v6.4s, v7.4s, v6.4s + add v22.4s, v22.4s, v18.4s + ext v18.16b, v18.16b, v18.16b, #8 + ext v6.16b, v6.16b, v21.16b, #8 + eor v16.16b, v16.16b, v22.16b + ext v22.16b, v22.16b, v22.16b, #4 + zip1 v5.2d, v6.2d, v5.2d + zip2 v4.4s, v4.4s, v6.4s + ushr v25.4s, v16.4s, #7 + shl v26.4s, v16.4s, #25 + uzp1 v16.4s, v20.4s, v24.4s + orr v20.16b, v26.16b, v25.16b + mov v5.s[3], v16.s[3] + add v24.4s, v20.4s, v16.4s + add v17.4s, v24.4s, v17.4s + eor v18.16b, v17.16b, v18.16b + tbl v18.16b, { v18.16b }, v0.16b + add v22.4s, v22.4s, v18.4s + eor v20.16b, v20.16b, v22.16b + ushr v7.4s, v20.4s, #12 + shl v20.4s, v20.4s, #20 + orr v7.16b, v20.16b, v7.16b + add v20.4s, v7.4s, v6.4s + add v17.4s, v20.4s, v17.4s + ext v20.16b, v19.16b, v19.16b, #8 + eor v18.16b, v18.16b, v17.16b + ext v17.16b, v17.16b, v17.16b, #4 + tbl v18.16b, { v18.16b }, v2.16b + add v21.4s, v22.4s, v18.4s + uzp2 v22.4s, v20.4s, v23.4s + ext v18.16b, v18.16b, v18.16b, #8 + eor v7.16b, v7.16b, v21.16b + ext v20.16b, v22.16b, v20.16b, #4 + ushr v22.4s, v7.4s, #7 + shl v7.4s, v7.4s, #25 + add v17.4s, v17.4s, v20.4s + ext v20.16b, v21.16b, v21.16b, #12 + ext v21.16b, v19.16b, v19.16b, #12 + orr v7.16b, v7.16b, v22.16b + ext v19.16b, v19.16b, v21.16b, #12 + add v17.4s, v17.4s, v7.4s + mov v21.16b, v6.16b + rev64 v19.4s, v19.4s + eor v18.16b, v17.16b, v18.16b + mov v21.s[1], v16.s[2] + tbl v18.16b, { v18.16b }, v0.16b + trn2 v19.4s, v19.4s, v21.4s + add v20.4s, v20.4s, v18.4s + eor v7.16b, v7.16b, v20.16b + ushr v22.4s, v7.4s, #12 + shl v7.4s, v7.4s, #20 + orr v7.16b, v7.16b, v22.16b + add v19.4s, v7.4s, v19.4s + add v17.4s, v19.4s, v17.4s + eor v18.16b, v18.16b, v17.16b + ext v17.16b, v17.16b, v17.16b, #12 + tbl v18.16b, { v18.16b }, v2.16b + add v19.4s, v20.4s, v18.4s + ext v20.16b, v5.16b, v5.16b, #12 + ext v18.16b, v18.16b, v18.16b, #8 + eor v7.16b, v7.16b, v19.16b + uzp1 v5.4s, v5.4s, v20.4s + ushr v21.4s, v7.4s, #7 + shl v7.4s, v7.4s, #25 + orr v7.16b, v7.16b, v21.16b + add v5.4s, v7.4s, v5.4s + add v5.4s, v5.4s, v17.4s + eor v17.16b, v5.16b, v18.16b + ext v18.16b, v19.16b, v19.16b, #4 + tbl v17.16b, { v17.16b }, v0.16b + add v18.4s, v18.4s, v17.4s + eor v6.16b, v7.16b, v18.16b + zip1 v7.4s, v4.4s, v16.4s + zip1 v4.4s, v16.4s, v4.4s + ushr v16.4s, v6.4s, #12 + shl v6.4s, v6.4s, #20 + ext v4.16b, v4.16b, v7.16b, #8 + orr v6.16b, v6.16b, v16.16b + add v4.4s, v6.4s, v4.4s + add v4.4s, v4.4s, v5.4s + eor v5.16b, v17.16b, v4.16b + ext v4.16b, v4.16b, v4.16b, #4 + tbl v5.16b, { v5.16b }, v2.16b + add v7.4s, v18.4s, v5.4s + eor v6.16b, v6.16b, v7.16b + ext v7.16b, v7.16b, v7.16b, #12 + ushr v16.4s, v6.4s, #7 + shl v6.4s, v6.4s, #25 + orr v6.16b, v6.16b, v16.16b + ext v16.16b, v5.16b, v5.16b, #8 + eor v5.16b, v4.16b, v7.16b + eor v4.16b, v6.16b, v16.16b +.LBB3_11: + subs x13, x15, #1 + b.eq .LBB3_9 + cbnz x15, .LBB3_10 + add x4, x4, x12 + add x0, x0, #8 + subs x1, x1, #1 + stp q5, q4, [x8], #32 + b.ne .LBB3_8 +.LBB3_14: + add sp, sp, #368 + ldp x20, x19, [sp, #128] + ldp x22, x21, [sp, #112] + ldp x24, x23, [sp, #96] + ldp x26, x25, [sp, #80] + ldp x29, x27, [sp, #64] + ldp d9, d8, [sp, #48] + ldp d11, d10, [sp, #32] + ldp d13, d12, [sp, #16] + ldp d15, d14, [sp], #144 + ret +.Lfunc_end3: + .size zfs_blake3_hash_many_sse41, .Lfunc_end3-zfs_blake3_hash_many_sse41 + .cfi_endproc + .section ".note.GNU-stack","",@progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S new file mode 100644 index 000000000000..4dcdd3b65d0b --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha256-armv8.S @@ -0,0 +1,2012 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if defined(__aarch64__) + + .section .note.gnu.property,"a",@note + .p2align 3 + .word 4 + .word 16 + .word 5 + .asciz "GNU" + .word 3221225472 + .word 4 + .word 3 + .word 0 +.text + +.align 6 +.type .LK256,%object +.LK256: + .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 + .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 + .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 + .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 + .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc + .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da + .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 + .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 + .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 + .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 + .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 + .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 + .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 + .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 + .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 + .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + .long 0 //terminator +.size .LK256,.-.LK256 + +.globl zfs_sha256_block_armv7 +.type zfs_sha256_block_armv7,%function +.align 6 +zfs_sha256_block_armv7: + hint #34 // bti c + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*4 + + ldp w20,w21,[x0] // load context + ldp w22,w23,[x0,#2*4] + ldp w24,w25,[x0,#4*4] + add x2,x1,x2,lsl#6 // end of input + ldp w26,w27,[x0,#6*4] + adr x30,.LK256 + stp x0,x2,[x29,#96] + +.Loop: + ldp w3,w4,[x1],#2*4 + ldr w19,[x30],#4 // *K++ + eor w28,w21,w22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev w3,w3 // 0 +#endif + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w6,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w3 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w4,w4 // 1 +#endif + ldp w5,w6,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w7,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w4 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w5,w5 // 2 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w8,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w5 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w6,w6 // 3 +#endif + ldp w7,w8,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w9,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w6 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w7,w7 // 4 +#endif + add w24,w24,w17 // h+=Sigma0(a) + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w10,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w7 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w10,ror#11 // Sigma1(e) + ror w10,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w10,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w8,w8 // 5 +#endif + ldp w9,w10,[x1],#2*4 + add w23,w23,w17 // h+=Sigma0(a) + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w11,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w8 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w11,ror#11 // Sigma1(e) + ror w11,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w11,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w9,w9 // 6 +#endif + add w22,w22,w17 // h+=Sigma0(a) + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w12,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w9 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w12,ror#11 // Sigma1(e) + ror w12,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w12,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w10,w10 // 7 +#endif + ldp w11,w12,[x1],#2*4 + add w21,w21,w17 // h+=Sigma0(a) + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + eor w13,w25,w25,ror#14 + and w17,w26,w25 + bic w28,w27,w25 + add w20,w20,w10 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w13,ror#11 // Sigma1(e) + ror w13,w21,#2 + add w20,w20,w17 // h+=Ch(e,f,g) + eor w17,w21,w21,ror#9 + add w20,w20,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w24,w24,w20 // d+=h + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w13,w17,ror#13 // Sigma0(a) + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w20,w20,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w11,w11 // 8 +#endif + add w20,w20,w17 // h+=Sigma0(a) + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + eor w14,w24,w24,ror#14 + and w17,w25,w24 + bic w19,w26,w24 + add w27,w27,w11 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w14,ror#11 // Sigma1(e) + ror w14,w20,#2 + add w27,w27,w17 // h+=Ch(e,f,g) + eor w17,w20,w20,ror#9 + add w27,w27,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w23,w23,w27 // d+=h + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w14,w17,ror#13 // Sigma0(a) + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w27,w27,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w12,w12 // 9 +#endif + ldp w13,w14,[x1],#2*4 + add w27,w27,w17 // h+=Sigma0(a) + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + eor w15,w23,w23,ror#14 + and w17,w24,w23 + bic w28,w25,w23 + add w26,w26,w12 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w15,ror#11 // Sigma1(e) + ror w15,w27,#2 + add w26,w26,w17 // h+=Ch(e,f,g) + eor w17,w27,w27,ror#9 + add w26,w26,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w22,w22,w26 // d+=h + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w15,w17,ror#13 // Sigma0(a) + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w26,w26,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w13,w13 // 10 +#endif + add w26,w26,w17 // h+=Sigma0(a) + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + eor w0,w22,w22,ror#14 + and w17,w23,w22 + bic w19,w24,w22 + add w25,w25,w13 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w0,ror#11 // Sigma1(e) + ror w0,w26,#2 + add w25,w25,w17 // h+=Ch(e,f,g) + eor w17,w26,w26,ror#9 + add w25,w25,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w21,w21,w25 // d+=h + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w0,w17,ror#13 // Sigma0(a) + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w25,w25,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w14,w14 // 11 +#endif + ldp w15,w0,[x1],#2*4 + add w25,w25,w17 // h+=Sigma0(a) + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + eor w6,w21,w21,ror#14 + and w17,w22,w21 + bic w28,w23,w21 + add w24,w24,w14 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w6,ror#11 // Sigma1(e) + ror w6,w25,#2 + add w24,w24,w17 // h+=Ch(e,f,g) + eor w17,w25,w25,ror#9 + add w24,w24,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w20,w20,w24 // d+=h + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w17,ror#13 // Sigma0(a) + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w24,w24,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w15,w15 // 12 +#endif + add w24,w24,w17 // h+=Sigma0(a) + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + eor w7,w20,w20,ror#14 + and w17,w21,w20 + bic w19,w22,w20 + add w23,w23,w15 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w7,ror#11 // Sigma1(e) + ror w7,w24,#2 + add w23,w23,w17 // h+=Ch(e,f,g) + eor w17,w24,w24,ror#9 + add w23,w23,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w27,w27,w23 // d+=h + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w17,ror#13 // Sigma0(a) + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w23,w23,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w0,w0 // 13 +#endif + ldp w1,w2,[x1] + add w23,w23,w17 // h+=Sigma0(a) + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + eor w8,w27,w27,ror#14 + and w17,w20,w27 + bic w28,w21,w27 + add w22,w22,w0 // h+=X[i] + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w8,ror#11 // Sigma1(e) + ror w8,w23,#2 + add w22,w22,w17 // h+=Ch(e,f,g) + eor w17,w23,w23,ror#9 + add w22,w22,w16 // h+=Sigma1(e) + and w19,w19,w28 // (b^c)&=(a^b) + add w26,w26,w22 // d+=h + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w17,ror#13 // Sigma0(a) + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + //add w22,w22,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w1,w1 // 14 +#endif + ldr w6,[sp,#12] + add w22,w22,w17 // h+=Sigma0(a) + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + eor w9,w26,w26,ror#14 + and w17,w27,w26 + bic w19,w20,w26 + add w21,w21,w1 // h+=X[i] + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w9,ror#11 // Sigma1(e) + ror w9,w22,#2 + add w21,w21,w17 // h+=Ch(e,f,g) + eor w17,w22,w22,ror#9 + add w21,w21,w16 // h+=Sigma1(e) + and w28,w28,w19 // (b^c)&=(a^b) + add w25,w25,w21 // d+=h + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w17,ror#13 // Sigma0(a) + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + //add w21,w21,w17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev w2,w2 // 15 +#endif + ldr w7,[sp,#0] + add w21,w21,w17 // h+=Sigma0(a) + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 +.Loop_16_xx: + ldr w8,[sp,#4] + str w11,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w10,w5,#7 + and w17,w25,w24 + ror w9,w2,#17 + bic w19,w26,w24 + ror w11,w20,#2 + add w27,w27,w3 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w10,w10,w5,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w11,w11,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w9,w9,w2,ror#19 + eor w10,w10,w5,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w11,w20,ror#22 // Sigma0(a) + eor w9,w9,w2,lsr#10 // sigma1(X[i+14]) + add w4,w4,w13 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w4,w4,w10 + add w27,w27,w17 // h+=Sigma0(a) + add w4,w4,w9 + ldr w9,[sp,#8] + str w12,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w11,w6,#7 + and w17,w24,w23 + ror w10,w3,#17 + bic w28,w25,w23 + ror w12,w27,#2 + add w26,w26,w4 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w11,w11,w6,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w12,w12,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w10,w10,w3,ror#19 + eor w11,w11,w6,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w12,w27,ror#22 // Sigma0(a) + eor w10,w10,w3,lsr#10 // sigma1(X[i+14]) + add w5,w5,w14 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w5,w5,w11 + add w26,w26,w17 // h+=Sigma0(a) + add w5,w5,w10 + ldr w10,[sp,#12] + str w13,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w12,w7,#7 + and w17,w23,w22 + ror w11,w4,#17 + bic w19,w24,w22 + ror w13,w26,#2 + add w25,w25,w5 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w12,w12,w7,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w13,w13,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w11,w11,w4,ror#19 + eor w12,w12,w7,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w13,w26,ror#22 // Sigma0(a) + eor w11,w11,w4,lsr#10 // sigma1(X[i+14]) + add w6,w6,w15 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w6,w6,w12 + add w25,w25,w17 // h+=Sigma0(a) + add w6,w6,w11 + ldr w11,[sp,#0] + str w14,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w13,w8,#7 + and w17,w22,w21 + ror w12,w5,#17 + bic w28,w23,w21 + ror w14,w25,#2 + add w24,w24,w6 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w13,w13,w8,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w14,w14,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w12,w12,w5,ror#19 + eor w13,w13,w8,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w14,w25,ror#22 // Sigma0(a) + eor w12,w12,w5,lsr#10 // sigma1(X[i+14]) + add w7,w7,w0 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w7,w7,w13 + add w24,w24,w17 // h+=Sigma0(a) + add w7,w7,w12 + ldr w12,[sp,#4] + str w15,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w14,w9,#7 + and w17,w21,w20 + ror w13,w6,#17 + bic w19,w22,w20 + ror w15,w24,#2 + add w23,w23,w7 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w14,w14,w9,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w15,w15,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w13,w13,w6,ror#19 + eor w14,w14,w9,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w15,w24,ror#22 // Sigma0(a) + eor w13,w13,w6,lsr#10 // sigma1(X[i+14]) + add w8,w8,w1 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w8,w8,w14 + add w23,w23,w17 // h+=Sigma0(a) + add w8,w8,w13 + ldr w13,[sp,#8] + str w0,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w15,w10,#7 + and w17,w20,w27 + ror w14,w7,#17 + bic w28,w21,w27 + ror w0,w23,#2 + add w22,w22,w8 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w15,w15,w10,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w0,w0,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w14,w14,w7,ror#19 + eor w15,w15,w10,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w0,w23,ror#22 // Sigma0(a) + eor w14,w14,w7,lsr#10 // sigma1(X[i+14]) + add w9,w9,w2 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w9,w9,w15 + add w22,w22,w17 // h+=Sigma0(a) + add w9,w9,w14 + ldr w14,[sp,#12] + str w1,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w0,w11,#7 + and w17,w27,w26 + ror w15,w8,#17 + bic w19,w20,w26 + ror w1,w22,#2 + add w21,w21,w9 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w0,w0,w11,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w1,w1,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w15,w15,w8,ror#19 + eor w0,w0,w11,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w1,w22,ror#22 // Sigma0(a) + eor w15,w15,w8,lsr#10 // sigma1(X[i+14]) + add w10,w10,w3 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w10,w10,w0 + add w21,w21,w17 // h+=Sigma0(a) + add w10,w10,w15 + ldr w15,[sp,#0] + str w2,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w1,w12,#7 + and w17,w26,w25 + ror w0,w9,#17 + bic w28,w27,w25 + ror w2,w21,#2 + add w20,w20,w10 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w1,w1,w12,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w2,w2,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w0,w0,w9,ror#19 + eor w1,w1,w12,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w2,w21,ror#22 // Sigma0(a) + eor w0,w0,w9,lsr#10 // sigma1(X[i+14]) + add w11,w11,w4 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w11,w11,w1 + add w20,w20,w17 // h+=Sigma0(a) + add w11,w11,w0 + ldr w0,[sp,#4] + str w3,[sp,#0] + ror w16,w24,#6 + add w27,w27,w19 // h+=K[i] + ror w2,w13,#7 + and w17,w25,w24 + ror w1,w10,#17 + bic w19,w26,w24 + ror w3,w20,#2 + add w27,w27,w11 // h+=X[i] + eor w16,w16,w24,ror#11 + eor w2,w2,w13,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w20,w21 // a^b, b^c in next round + eor w16,w16,w24,ror#25 // Sigma1(e) + eor w3,w3,w20,ror#13 + add w27,w27,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w1,w1,w10,ror#19 + eor w2,w2,w13,lsr#3 // sigma0(X[i+1]) + add w27,w27,w16 // h+=Sigma1(e) + eor w28,w28,w21 // Maj(a,b,c) + eor w17,w3,w20,ror#22 // Sigma0(a) + eor w1,w1,w10,lsr#10 // sigma1(X[i+14]) + add w12,w12,w5 + add w23,w23,w27 // d+=h + add w27,w27,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w12,w12,w2 + add w27,w27,w17 // h+=Sigma0(a) + add w12,w12,w1 + ldr w1,[sp,#8] + str w4,[sp,#4] + ror w16,w23,#6 + add w26,w26,w28 // h+=K[i] + ror w3,w14,#7 + and w17,w24,w23 + ror w2,w11,#17 + bic w28,w25,w23 + ror w4,w27,#2 + add w26,w26,w12 // h+=X[i] + eor w16,w16,w23,ror#11 + eor w3,w3,w14,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w27,w20 // a^b, b^c in next round + eor w16,w16,w23,ror#25 // Sigma1(e) + eor w4,w4,w27,ror#13 + add w26,w26,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w2,w2,w11,ror#19 + eor w3,w3,w14,lsr#3 // sigma0(X[i+1]) + add w26,w26,w16 // h+=Sigma1(e) + eor w19,w19,w20 // Maj(a,b,c) + eor w17,w4,w27,ror#22 // Sigma0(a) + eor w2,w2,w11,lsr#10 // sigma1(X[i+14]) + add w13,w13,w6 + add w22,w22,w26 // d+=h + add w26,w26,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w13,w13,w3 + add w26,w26,w17 // h+=Sigma0(a) + add w13,w13,w2 + ldr w2,[sp,#12] + str w5,[sp,#8] + ror w16,w22,#6 + add w25,w25,w19 // h+=K[i] + ror w4,w15,#7 + and w17,w23,w22 + ror w3,w12,#17 + bic w19,w24,w22 + ror w5,w26,#2 + add w25,w25,w13 // h+=X[i] + eor w16,w16,w22,ror#11 + eor w4,w4,w15,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w26,w27 // a^b, b^c in next round + eor w16,w16,w22,ror#25 // Sigma1(e) + eor w5,w5,w26,ror#13 + add w25,w25,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w3,w3,w12,ror#19 + eor w4,w4,w15,lsr#3 // sigma0(X[i+1]) + add w25,w25,w16 // h+=Sigma1(e) + eor w28,w28,w27 // Maj(a,b,c) + eor w17,w5,w26,ror#22 // Sigma0(a) + eor w3,w3,w12,lsr#10 // sigma1(X[i+14]) + add w14,w14,w7 + add w21,w21,w25 // d+=h + add w25,w25,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w14,w14,w4 + add w25,w25,w17 // h+=Sigma0(a) + add w14,w14,w3 + ldr w3,[sp,#0] + str w6,[sp,#12] + ror w16,w21,#6 + add w24,w24,w28 // h+=K[i] + ror w5,w0,#7 + and w17,w22,w21 + ror w4,w13,#17 + bic w28,w23,w21 + ror w6,w25,#2 + add w24,w24,w14 // h+=X[i] + eor w16,w16,w21,ror#11 + eor w5,w5,w0,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w25,w26 // a^b, b^c in next round + eor w16,w16,w21,ror#25 // Sigma1(e) + eor w6,w6,w25,ror#13 + add w24,w24,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w4,w4,w13,ror#19 + eor w5,w5,w0,lsr#3 // sigma0(X[i+1]) + add w24,w24,w16 // h+=Sigma1(e) + eor w19,w19,w26 // Maj(a,b,c) + eor w17,w6,w25,ror#22 // Sigma0(a) + eor w4,w4,w13,lsr#10 // sigma1(X[i+14]) + add w15,w15,w8 + add w20,w20,w24 // d+=h + add w24,w24,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w15,w15,w5 + add w24,w24,w17 // h+=Sigma0(a) + add w15,w15,w4 + ldr w4,[sp,#4] + str w7,[sp,#0] + ror w16,w20,#6 + add w23,w23,w19 // h+=K[i] + ror w6,w1,#7 + and w17,w21,w20 + ror w5,w14,#17 + bic w19,w22,w20 + ror w7,w24,#2 + add w23,w23,w15 // h+=X[i] + eor w16,w16,w20,ror#11 + eor w6,w6,w1,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w24,w25 // a^b, b^c in next round + eor w16,w16,w20,ror#25 // Sigma1(e) + eor w7,w7,w24,ror#13 + add w23,w23,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w5,w5,w14,ror#19 + eor w6,w6,w1,lsr#3 // sigma0(X[i+1]) + add w23,w23,w16 // h+=Sigma1(e) + eor w28,w28,w25 // Maj(a,b,c) + eor w17,w7,w24,ror#22 // Sigma0(a) + eor w5,w5,w14,lsr#10 // sigma1(X[i+14]) + add w0,w0,w9 + add w27,w27,w23 // d+=h + add w23,w23,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w0,w0,w6 + add w23,w23,w17 // h+=Sigma0(a) + add w0,w0,w5 + ldr w5,[sp,#8] + str w8,[sp,#4] + ror w16,w27,#6 + add w22,w22,w28 // h+=K[i] + ror w7,w2,#7 + and w17,w20,w27 + ror w6,w15,#17 + bic w28,w21,w27 + ror w8,w23,#2 + add w22,w22,w0 // h+=X[i] + eor w16,w16,w27,ror#11 + eor w7,w7,w2,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w23,w24 // a^b, b^c in next round + eor w16,w16,w27,ror#25 // Sigma1(e) + eor w8,w8,w23,ror#13 + add w22,w22,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w6,w6,w15,ror#19 + eor w7,w7,w2,lsr#3 // sigma0(X[i+1]) + add w22,w22,w16 // h+=Sigma1(e) + eor w19,w19,w24 // Maj(a,b,c) + eor w17,w8,w23,ror#22 // Sigma0(a) + eor w6,w6,w15,lsr#10 // sigma1(X[i+14]) + add w1,w1,w10 + add w26,w26,w22 // d+=h + add w22,w22,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w1,w1,w7 + add w22,w22,w17 // h+=Sigma0(a) + add w1,w1,w6 + ldr w6,[sp,#12] + str w9,[sp,#8] + ror w16,w26,#6 + add w21,w21,w19 // h+=K[i] + ror w8,w3,#7 + and w17,w27,w26 + ror w7,w0,#17 + bic w19,w20,w26 + ror w9,w22,#2 + add w21,w21,w1 // h+=X[i] + eor w16,w16,w26,ror#11 + eor w8,w8,w3,ror#18 + orr w17,w17,w19 // Ch(e,f,g) + eor w19,w22,w23 // a^b, b^c in next round + eor w16,w16,w26,ror#25 // Sigma1(e) + eor w9,w9,w22,ror#13 + add w21,w21,w17 // h+=Ch(e,f,g) + and w28,w28,w19 // (b^c)&=(a^b) + eor w7,w7,w0,ror#19 + eor w8,w8,w3,lsr#3 // sigma0(X[i+1]) + add w21,w21,w16 // h+=Sigma1(e) + eor w28,w28,w23 // Maj(a,b,c) + eor w17,w9,w22,ror#22 // Sigma0(a) + eor w7,w7,w0,lsr#10 // sigma1(X[i+14]) + add w2,w2,w11 + add w25,w25,w21 // d+=h + add w21,w21,w28 // h+=Maj(a,b,c) + ldr w28,[x30],#4 // *K++, w19 in next round + add w2,w2,w8 + add w21,w21,w17 // h+=Sigma0(a) + add w2,w2,w7 + ldr w7,[sp,#0] + str w10,[sp,#12] + ror w16,w25,#6 + add w20,w20,w28 // h+=K[i] + ror w9,w4,#7 + and w17,w26,w25 + ror w8,w1,#17 + bic w28,w27,w25 + ror w10,w21,#2 + add w20,w20,w2 // h+=X[i] + eor w16,w16,w25,ror#11 + eor w9,w9,w4,ror#18 + orr w17,w17,w28 // Ch(e,f,g) + eor w28,w21,w22 // a^b, b^c in next round + eor w16,w16,w25,ror#25 // Sigma1(e) + eor w10,w10,w21,ror#13 + add w20,w20,w17 // h+=Ch(e,f,g) + and w19,w19,w28 // (b^c)&=(a^b) + eor w8,w8,w1,ror#19 + eor w9,w9,w4,lsr#3 // sigma0(X[i+1]) + add w20,w20,w16 // h+=Sigma1(e) + eor w19,w19,w22 // Maj(a,b,c) + eor w17,w10,w21,ror#22 // Sigma0(a) + eor w8,w8,w1,lsr#10 // sigma1(X[i+14]) + add w3,w3,w12 + add w24,w24,w20 // d+=h + add w20,w20,w19 // h+=Maj(a,b,c) + ldr w19,[x30],#4 // *K++, w28 in next round + add w3,w3,w9 + add w20,w20,w17 // h+=Sigma0(a) + add w3,w3,w8 + cbnz w19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#260 // rewind + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#2*4] + add x1,x1,#14*4 // advance input pointer + ldp w7,w8,[x0,#4*4] + add w20,w20,w3 + ldp w9,w10,[x0,#6*4] + add w21,w21,w4 + add w22,w22,w5 + add w23,w23,w6 + stp w20,w21,[x0] + add w24,w24,w7 + add w25,w25,w8 + stp w22,w23,[x0,#2*4] + add w26,w26,w9 + add w27,w27,w10 + cmp x1,x2 + stp w24,w25,[x0,#4*4] + stp w26,w27,[x0,#6*4] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*4 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size zfs_sha256_block_armv7,.-zfs_sha256_block_armv7 + +.globl zfs_sha256_block_armv8 +.type zfs_sha256_block_armv8,%function +.align 6 +zfs_sha256_block_armv8: + hint #34 // bti c +.Lv8_entry: + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v0.4s,v1.4s},[x0] + adr x3,.LK256 + +.Loop_hw: + ld1 {v4.16b-v7.16b},[x1],#64 + sub x2,x2,#1 + ld1 {v16.4s},[x3],#16 + rev32 v4.16b,v4.16b + rev32 v5.16b,v5.16b + rev32 v6.16b,v6.16b + rev32 v7.16b,v7.16b + orr v18.16b,v0.16b,v0.16b // offload + orr v19.16b,v1.16b,v1.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + .inst 0x5e2828a4 //sha256su0 v4.16b,v5.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e0760c4 //sha256su1 v4.16b,v6.16b,v7.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + .inst 0x5e2828c5 //sha256su0 v5.16b,v6.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0460e5 //sha256su1 v5.16b,v7.16b,v4.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v6.4s + .inst 0x5e2828e6 //sha256su0 v6.16b,v7.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + .inst 0x5e056086 //sha256su1 v6.16b,v4.16b,v5.16b + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v7.4s + .inst 0x5e282887 //sha256su0 v7.16b,v4.16b + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + .inst 0x5e0660a7 //sha256su1 v7.16b,v5.16b,v6.16b + ld1 {v17.4s},[x3],#16 + add v16.4s,v16.4s,v4.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + ld1 {v16.4s},[x3],#16 + add v17.4s,v17.4s,v5.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + ld1 {v17.4s},[x3] + add v16.4s,v16.4s,v6.4s + sub x3,x3,#64*4-16 // rewind + orr v2.16b,v0.16b,v0.16b + .inst 0x5e104020 //sha256h v0.16b,v1.16b,v16.4s + .inst 0x5e105041 //sha256h2 v1.16b,v2.16b,v16.4s + + add v17.4s,v17.4s,v7.4s + orr v2.16b,v0.16b,v0.16b + .inst 0x5e114020 //sha256h v0.16b,v1.16b,v17.4s + .inst 0x5e115041 //sha256h2 v1.16b,v2.16b,v17.4s + + add v0.4s,v0.4s,v18.4s + add v1.4s,v1.4s,v19.4s + + cbnz x2,.Loop_hw + + st1 {v0.4s,v1.4s},[x0] + + ldr x29,[sp],#16 + ret +.size zfs_sha256_block_armv8,.-zfs_sha256_block_armv8 + +.globl zfs_sha256_block_neon +.type zfs_sha256_block_neon,%function +.align 4 +zfs_sha256_block_neon: + hint #34 // bti c +.Lneon_entry: + stp x29, x30, [sp, #-16]! + mov x29, sp + sub sp,sp,#16*4 + + adr x16,.LK256 + add x2,x1,x2,lsl#6 // len to point at the end of inp + + ld1 {v0.16b},[x1], #16 + ld1 {v1.16b},[x1], #16 + ld1 {v2.16b},[x1], #16 + ld1 {v3.16b},[x1], #16 + ld1 {v4.4s},[x16], #16 + ld1 {v5.4s},[x16], #16 + ld1 {v6.4s},[x16], #16 + ld1 {v7.4s},[x16], #16 + rev32 v0.16b,v0.16b // yes, even on + rev32 v1.16b,v1.16b // big-endian + rev32 v2.16b,v2.16b + rev32 v3.16b,v3.16b + mov x17,sp + add v4.4s,v4.4s,v0.4s + add v5.4s,v5.4s,v1.4s + add v6.4s,v6.4s,v2.4s + st1 {v4.4s-v5.4s},[x17], #32 + add v7.4s,v7.4s,v3.4s + st1 {v6.4s-v7.4s},[x17] + sub x17,x17,#32 + + ldp w3,w4,[x0] + ldp w5,w6,[x0,#8] + ldp w7,w8,[x0,#16] + ldp w9,w10,[x0,#24] + ldr w12,[sp,#0] + mov w13,wzr + eor w14,w4,w5 + mov w15,wzr + b .L_00_48 + +.align 4 +.L_00_48: + ext v4.16b,v0.16b,v1.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v2.16b,v3.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v3.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v0.4s,v0.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v0.4s,v0.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v0.4s,v0.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v0.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v0.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v0.4s,#15 + add w8,w8,w12 + ushr v17.4s,v0.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v0.4s,#13 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v0.4s,v0.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v0.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v1.16b,v2.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v3.16b,v0.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v0.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v1.4s,v1.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v1.4s,v1.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v1.4s,v1.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v1.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v1.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v1.4s,#15 + add w4,w4,w12 + ushr v17.4s,v1.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v1.4s,#13 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v1.4s,v1.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v1.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + ext v4.16b,v2.16b,v3.16b,#4 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + bic w15,w9,w7 + ext v7.16b,v0.16b,v1.16b,#4 + eor w11,w7,w7,ror#5 + add w3,w3,w13 + mov d19,v1.d[1] + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w3,w3,ror#11 + ushr v5.4s,v4.4s,#3 + add w10,w10,w12 + add v2.4s,v2.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + ushr v7.4s,v4.4s,#18 + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w6,w6,w10 + sli v7.4s,v4.4s,#14 + eor w14,w14,w4 + ushr v16.4s,v19.4s,#17 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + eor v5.16b,v5.16b,v7.16b + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + sli v16.4s,v19.4s,#15 + add w10,w10,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + ushr v7.4s,v19.4s,#19 + add w9,w9,w12 + ror w11,w11,#6 + add v2.4s,v2.4s,v5.4s + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + sli v7.4s,v19.4s,#13 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + eor v17.16b,v17.16b,v7.16b + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + add v2.4s,v2.4s,v17.4s + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + ushr v18.4s,v2.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v2.4s,#10 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + sli v18.4s,v2.4s,#15 + add w8,w8,w12 + ushr v17.4s,v2.4s,#19 + ror w11,w11,#6 + eor w13,w9,w10 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w9,ror#20 + add w8,w8,w11 + sli v17.4s,v2.4s,#13 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w4,w4,w8 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w10 + eor v17.16b,v17.16b,v17.16b + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + mov v17.d[1],v19.d[0] + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + add v2.4s,v2.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add v4.4s,v4.4s,v2.4s + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + ext v4.16b,v3.16b,v0.16b,#4 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + bic w15,w5,w3 + ext v7.16b,v1.16b,v2.16b,#4 + eor w11,w3,w3,ror#5 + add w7,w7,w13 + mov d19,v2.d[1] + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + ushr v6.4s,v4.4s,#7 + eor w15,w7,w7,ror#11 + ushr v5.4s,v4.4s,#3 + add w6,w6,w12 + add v3.4s,v3.4s,v7.4s + ror w11,w11,#6 + sli v6.4s,v4.4s,#25 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + ushr v7.4s,v4.4s,#18 + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + eor v5.16b,v5.16b,v6.16b + ror w15,w15,#2 + add w10,w10,w6 + sli v7.4s,v4.4s,#14 + eor w14,w14,w8 + ushr v16.4s,v19.4s,#17 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + eor v5.16b,v5.16b,v7.16b + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + sli v16.4s,v19.4s,#15 + add w6,w6,w14 + orr w12,w12,w15 + ushr v17.4s,v19.4s,#10 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + ushr v7.4s,v19.4s,#19 + add w5,w5,w12 + ror w11,w11,#6 + add v3.4s,v3.4s,v5.4s + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + sli v7.4s,v19.4s,#13 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + eor v17.16b,v17.16b,v16.16b + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + eor v17.16b,v17.16b,v7.16b + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + add v3.4s,v3.4s,v17.4s + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + ushr v18.4s,v3.4s,#17 + orr w12,w12,w15 + ushr v19.4s,v3.4s,#10 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + sli v18.4s,v3.4s,#15 + add w4,w4,w12 + ushr v17.4s,v3.4s,#19 + ror w11,w11,#6 + eor w13,w5,w6 + eor v19.16b,v19.16b,v18.16b + eor w15,w15,w5,ror#20 + add w4,w4,w11 + sli v17.4s,v3.4s,#13 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + ld1 {v4.4s},[x16], #16 + add w8,w8,w4 + eor v19.16b,v19.16b,v17.16b + eor w14,w14,w6 + eor v17.16b,v17.16b,v17.16b + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + mov v17.d[1],v19.d[0] + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + add v3.4s,v3.4s,v17.4s + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add v4.4s,v4.4s,v3.4s + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[x16] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + cmp w12,#0 // check for K256 terminator + ldr w12,[sp,#0] + sub x17,x17,#64 + bne .L_00_48 + + sub x16,x16,#256 // rewind x16 + cmp x1,x2 + mov x17, #64 + csel x17, x17, xzr, eq + sub x1,x1,x17 // avoid SEGV + mov x17,sp + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v0.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v0.16b,v0.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v0.4s + add w10,w10,w11 + ldr w12,[sp,#4] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#8] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#12] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#16] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v1.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v1.16b,v1.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v1.4s + add w6,w6,w11 + ldr w12,[sp,#20] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#24] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#28] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + ldr w12,[sp,#32] + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w10,w10,w12 + add w3,w3,w15 + and w12,w8,w7 + ld1 {v2.16b},[x1],#16 + bic w15,w9,w7 + eor w11,w7,w7,ror#5 + ld1 {v4.4s},[x16],#16 + add w3,w3,w13 + orr w12,w12,w15 + eor w11,w11,w7,ror#19 + eor w15,w3,w3,ror#11 + rev32 v2.16b,v2.16b + add w10,w10,w12 + ror w11,w11,#6 + eor w13,w3,w4 + eor w15,w15,w3,ror#20 + add v4.4s,v4.4s,v2.4s + add w10,w10,w11 + ldr w12,[sp,#36] + and w14,w14,w13 + ror w15,w15,#2 + add w6,w6,w10 + eor w14,w14,w4 + add w9,w9,w12 + add w10,w10,w15 + and w12,w7,w6 + bic w15,w8,w6 + eor w11,w6,w6,ror#5 + add w10,w10,w14 + orr w12,w12,w15 + eor w11,w11,w6,ror#19 + eor w15,w10,w10,ror#11 + add w9,w9,w12 + ror w11,w11,#6 + eor w14,w10,w3 + eor w15,w15,w10,ror#20 + add w9,w9,w11 + ldr w12,[sp,#40] + and w13,w13,w14 + ror w15,w15,#2 + add w5,w5,w9 + eor w13,w13,w3 + add w8,w8,w12 + add w9,w9,w15 + and w12,w6,w5 + bic w15,w7,w5 + eor w11,w5,w5,ror#5 + add w9,w9,w13 + orr w12,w12,w15 + eor w11,w11,w5,ror#19 + eor w15,w9,w9,ror#11 + add w8,w8,w12 + ror w11,w11,#6 + eor w13,w9,w10 + eor w15,w15,w9,ror#20 + add w8,w8,w11 + ldr w12,[sp,#44] + and w14,w14,w13 + ror w15,w15,#2 + add w4,w4,w8 + eor w14,w14,w10 + add w7,w7,w12 + add w8,w8,w15 + and w12,w5,w4 + bic w15,w6,w4 + eor w11,w4,w4,ror#5 + add w8,w8,w14 + orr w12,w12,w15 + eor w11,w11,w4,ror#19 + eor w15,w8,w8,ror#11 + add w7,w7,w12 + ror w11,w11,#6 + eor w14,w8,w9 + eor w15,w15,w8,ror#20 + add w7,w7,w11 + ldr w12,[sp,#48] + and w13,w13,w14 + ror w15,w15,#2 + add w3,w3,w7 + eor w13,w13,w9 + st1 {v4.4s},[x17], #16 + add w6,w6,w12 + add w7,w7,w15 + and w12,w4,w3 + ld1 {v3.16b},[x1],#16 + bic w15,w5,w3 + eor w11,w3,w3,ror#5 + ld1 {v4.4s},[x16],#16 + add w7,w7,w13 + orr w12,w12,w15 + eor w11,w11,w3,ror#19 + eor w15,w7,w7,ror#11 + rev32 v3.16b,v3.16b + add w6,w6,w12 + ror w11,w11,#6 + eor w13,w7,w8 + eor w15,w15,w7,ror#20 + add v4.4s,v4.4s,v3.4s + add w6,w6,w11 + ldr w12,[sp,#52] + and w14,w14,w13 + ror w15,w15,#2 + add w10,w10,w6 + eor w14,w14,w8 + add w5,w5,w12 + add w6,w6,w15 + and w12,w3,w10 + bic w15,w4,w10 + eor w11,w10,w10,ror#5 + add w6,w6,w14 + orr w12,w12,w15 + eor w11,w11,w10,ror#19 + eor w15,w6,w6,ror#11 + add w5,w5,w12 + ror w11,w11,#6 + eor w14,w6,w7 + eor w15,w15,w6,ror#20 + add w5,w5,w11 + ldr w12,[sp,#56] + and w13,w13,w14 + ror w15,w15,#2 + add w9,w9,w5 + eor w13,w13,w7 + add w4,w4,w12 + add w5,w5,w15 + and w12,w10,w9 + bic w15,w3,w9 + eor w11,w9,w9,ror#5 + add w5,w5,w13 + orr w12,w12,w15 + eor w11,w11,w9,ror#19 + eor w15,w5,w5,ror#11 + add w4,w4,w12 + ror w11,w11,#6 + eor w13,w5,w6 + eor w15,w15,w5,ror#20 + add w4,w4,w11 + ldr w12,[sp,#60] + and w14,w14,w13 + ror w15,w15,#2 + add w8,w8,w4 + eor w14,w14,w6 + add w3,w3,w12 + add w4,w4,w15 + and w12,w9,w8 + bic w15,w10,w8 + eor w11,w8,w8,ror#5 + add w4,w4,w14 + orr w12,w12,w15 + eor w11,w11,w8,ror#19 + eor w15,w4,w4,ror#11 + add w3,w3,w12 + ror w11,w11,#6 + eor w14,w4,w5 + eor w15,w15,w4,ror#20 + add w3,w3,w11 + and w13,w13,w14 + ror w15,w15,#2 + add w7,w7,w3 + eor w13,w13,w5 + st1 {v4.4s},[x17], #16 + add w3,w3,w15 // h+=Sigma0(a) from the past + ldp w11,w12,[x0,#0] + add w3,w3,w13 // h+=Maj(a,b,c) from the past + ldp w13,w14,[x0,#8] + add w3,w3,w11 // accumulate + add w4,w4,w12 + ldp w11,w12,[x0,#16] + add w5,w5,w13 + add w6,w6,w14 + ldp w13,w14,[x0,#24] + add w7,w7,w11 + add w8,w8,w12 + ldr w12,[sp,#0] + stp w3,w4,[x0,#0] + add w9,w9,w13 + mov w13,wzr + stp w5,w6,[x0,#8] + add w10,w10,w14 + stp w7,w8,[x0,#16] + eor w14,w4,w5 + stp w9,w10,[x0,#24] + mov w15,wzr + mov x17,sp + b.ne .L_00_48 + + ldr x29,[x29] + add sp,sp,#16*4+16 + ret +.size zfs_sha256_block_neon,.-zfs_sha256_block_neon + +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S new file mode 100644 index 000000000000..f6c8f7742912 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S @@ -0,0 +1,1570 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if defined(__aarch64__) + + .section .note.gnu.property,"a",@note + .p2align 3 + .word 4 + .word 16 + .word 5 + .asciz "GNU" + .word 3221225472 + .word 4 + .word 3 + .word 0 +.text + +.align 6 +.type .LK512,%object +.LK512: + .quad 0x428a2f98d728ae22,0x7137449123ef65cd + .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc + .quad 0x3956c25bf348b538,0x59f111f1b605d019 + .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 + .quad 0xd807aa98a3030242,0x12835b0145706fbe + .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 + .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 + .quad 0x9bdc06a725c71235,0xc19bf174cf692694 + .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 + .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 + .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 + .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 + .quad 0x983e5152ee66dfab,0xa831c66d2db43210 + .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 + .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 + .quad 0x06ca6351e003826f,0x142929670a0e6e70 + .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 + .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df + .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 + .quad 0x81c2c92e47edaee6,0x92722c851482353b + .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 + .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 + .quad 0xd192e819d6ef5218,0xd69906245565a910 + .quad 0xf40e35855771202a,0x106aa07032bbd1b8 + .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 + .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 + .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb + .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 + .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 + .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec + .quad 0x90befffa23631e28,0xa4506cebde82bde9 + .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b + .quad 0xca273eceea26619c,0xd186b8c721c0c207 + .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 + .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 + .quad 0x113f9804bef90dae,0x1b710b35131c471b + .quad 0x28db77f523047d84,0x32caab7b40c72493 + .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c + .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a + .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 + .quad 0 // terminator +.size .LK512,.-.LK512 + +.globl zfs_sha512_block_armv7 +.type zfs_sha512_block_armv7,%function +.align 6 +zfs_sha512_block_armv7: + hint #34 // bti c + stp x29,x30,[sp,#-128]! + add x29,sp,#0 + + stp x19,x20,[sp,#16] + stp x21,x22,[sp,#32] + stp x23,x24,[sp,#48] + stp x25,x26,[sp,#64] + stp x27,x28,[sp,#80] + sub sp,sp,#4*8 + + ldp x20,x21,[x0] // load context + ldp x22,x23,[x0,#2*8] + ldp x24,x25,[x0,#4*8] + add x2,x1,x2,lsl#7 // end of input + ldp x26,x27,[x0,#6*8] + adr x30,.LK512 + stp x0,x2,[x29,#96] + +.Loop: + ldp x3,x4,[x1],#2*8 + ldr x19,[x30],#8 // *K++ + eor x28,x21,x22 // magic seed + str x1,[x29,#112] +#ifndef __AARCH64EB__ + rev x3,x3 // 0 +#endif + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x6,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x3 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x4,x4 // 1 +#endif + ldp x5,x6,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x7,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x4 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x5,x5 // 2 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x8,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x5 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x6,x6 // 3 +#endif + ldp x7,x8,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x9,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x6 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x7,x7 // 4 +#endif + add x24,x24,x17 // h+=Sigma0(a) + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x10,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x7 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x10,ror#18 // Sigma1(e) + ror x10,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x10,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x8,x8 // 5 +#endif + ldp x9,x10,[x1],#2*8 + add x23,x23,x17 // h+=Sigma0(a) + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x11,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x8 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x11,ror#18 // Sigma1(e) + ror x11,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x11,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x9,x9 // 6 +#endif + add x22,x22,x17 // h+=Sigma0(a) + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x12,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x9 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x12,ror#18 // Sigma1(e) + ror x12,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x12,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x10,x10 // 7 +#endif + ldp x11,x12,[x1],#2*8 + add x21,x21,x17 // h+=Sigma0(a) + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + eor x13,x25,x25,ror#23 + and x17,x26,x25 + bic x28,x27,x25 + add x20,x20,x10 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x13,ror#18 // Sigma1(e) + ror x13,x21,#28 + add x20,x20,x17 // h+=Ch(e,f,g) + eor x17,x21,x21,ror#5 + add x20,x20,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x24,x24,x20 // d+=h + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x13,x17,ror#34 // Sigma0(a) + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x20,x20,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x11,x11 // 8 +#endif + add x20,x20,x17 // h+=Sigma0(a) + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + eor x14,x24,x24,ror#23 + and x17,x25,x24 + bic x19,x26,x24 + add x27,x27,x11 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x14,ror#18 // Sigma1(e) + ror x14,x20,#28 + add x27,x27,x17 // h+=Ch(e,f,g) + eor x17,x20,x20,ror#5 + add x27,x27,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x23,x23,x27 // d+=h + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x14,x17,ror#34 // Sigma0(a) + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x27,x27,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x12,x12 // 9 +#endif + ldp x13,x14,[x1],#2*8 + add x27,x27,x17 // h+=Sigma0(a) + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + eor x15,x23,x23,ror#23 + and x17,x24,x23 + bic x28,x25,x23 + add x26,x26,x12 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x15,ror#18 // Sigma1(e) + ror x15,x27,#28 + add x26,x26,x17 // h+=Ch(e,f,g) + eor x17,x27,x27,ror#5 + add x26,x26,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x22,x22,x26 // d+=h + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x15,x17,ror#34 // Sigma0(a) + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x26,x26,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x13,x13 // 10 +#endif + add x26,x26,x17 // h+=Sigma0(a) + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + eor x0,x22,x22,ror#23 + and x17,x23,x22 + bic x19,x24,x22 + add x25,x25,x13 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x0,ror#18 // Sigma1(e) + ror x0,x26,#28 + add x25,x25,x17 // h+=Ch(e,f,g) + eor x17,x26,x26,ror#5 + add x25,x25,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x21,x21,x25 // d+=h + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x0,x17,ror#34 // Sigma0(a) + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x25,x25,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x14,x14 // 11 +#endif + ldp x15,x0,[x1],#2*8 + add x25,x25,x17 // h+=Sigma0(a) + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + eor x6,x21,x21,ror#23 + and x17,x22,x21 + bic x28,x23,x21 + add x24,x24,x14 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x6,ror#18 // Sigma1(e) + ror x6,x25,#28 + add x24,x24,x17 // h+=Ch(e,f,g) + eor x17,x25,x25,ror#5 + add x24,x24,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x20,x20,x24 // d+=h + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x17,ror#34 // Sigma0(a) + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x24,x24,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x15,x15 // 12 +#endif + add x24,x24,x17 // h+=Sigma0(a) + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + eor x7,x20,x20,ror#23 + and x17,x21,x20 + bic x19,x22,x20 + add x23,x23,x15 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x7,ror#18 // Sigma1(e) + ror x7,x24,#28 + add x23,x23,x17 // h+=Ch(e,f,g) + eor x17,x24,x24,ror#5 + add x23,x23,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x27,x27,x23 // d+=h + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x17,ror#34 // Sigma0(a) + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x23,x23,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x0,x0 // 13 +#endif + ldp x1,x2,[x1] + add x23,x23,x17 // h+=Sigma0(a) + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + eor x8,x27,x27,ror#23 + and x17,x20,x27 + bic x28,x21,x27 + add x22,x22,x0 // h+=X[i] + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x8,ror#18 // Sigma1(e) + ror x8,x23,#28 + add x22,x22,x17 // h+=Ch(e,f,g) + eor x17,x23,x23,ror#5 + add x22,x22,x16 // h+=Sigma1(e) + and x19,x19,x28 // (b^c)&=(a^b) + add x26,x26,x22 // d+=h + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x17,ror#34 // Sigma0(a) + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + //add x22,x22,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x1,x1 // 14 +#endif + ldr x6,[sp,#24] + add x22,x22,x17 // h+=Sigma0(a) + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + eor x9,x26,x26,ror#23 + and x17,x27,x26 + bic x19,x20,x26 + add x21,x21,x1 // h+=X[i] + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x9,ror#18 // Sigma1(e) + ror x9,x22,#28 + add x21,x21,x17 // h+=Ch(e,f,g) + eor x17,x22,x22,ror#5 + add x21,x21,x16 // h+=Sigma1(e) + and x28,x28,x19 // (b^c)&=(a^b) + add x25,x25,x21 // d+=h + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x17,ror#34 // Sigma0(a) + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + //add x21,x21,x17 // h+=Sigma0(a) +#ifndef __AARCH64EB__ + rev x2,x2 // 15 +#endif + ldr x7,[sp,#0] + add x21,x21,x17 // h+=Sigma0(a) + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 +.Loop_16_xx: + ldr x8,[sp,#8] + str x11,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x10,x5,#1 + and x17,x25,x24 + ror x9,x2,#19 + bic x19,x26,x24 + ror x11,x20,#28 + add x27,x27,x3 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x10,x10,x5,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x11,x11,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x9,x9,x2,ror#61 + eor x10,x10,x5,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x11,x20,ror#39 // Sigma0(a) + eor x9,x9,x2,lsr#6 // sigma1(X[i+14]) + add x4,x4,x13 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x4,x4,x10 + add x27,x27,x17 // h+=Sigma0(a) + add x4,x4,x9 + ldr x9,[sp,#16] + str x12,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x11,x6,#1 + and x17,x24,x23 + ror x10,x3,#19 + bic x28,x25,x23 + ror x12,x27,#28 + add x26,x26,x4 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x11,x11,x6,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x12,x12,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x10,x10,x3,ror#61 + eor x11,x11,x6,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x12,x27,ror#39 // Sigma0(a) + eor x10,x10,x3,lsr#6 // sigma1(X[i+14]) + add x5,x5,x14 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x5,x5,x11 + add x26,x26,x17 // h+=Sigma0(a) + add x5,x5,x10 + ldr x10,[sp,#24] + str x13,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x12,x7,#1 + and x17,x23,x22 + ror x11,x4,#19 + bic x19,x24,x22 + ror x13,x26,#28 + add x25,x25,x5 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x12,x12,x7,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x13,x13,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x11,x11,x4,ror#61 + eor x12,x12,x7,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x13,x26,ror#39 // Sigma0(a) + eor x11,x11,x4,lsr#6 // sigma1(X[i+14]) + add x6,x6,x15 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x6,x6,x12 + add x25,x25,x17 // h+=Sigma0(a) + add x6,x6,x11 + ldr x11,[sp,#0] + str x14,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x13,x8,#1 + and x17,x22,x21 + ror x12,x5,#19 + bic x28,x23,x21 + ror x14,x25,#28 + add x24,x24,x6 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x13,x13,x8,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x14,x14,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x12,x12,x5,ror#61 + eor x13,x13,x8,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x14,x25,ror#39 // Sigma0(a) + eor x12,x12,x5,lsr#6 // sigma1(X[i+14]) + add x7,x7,x0 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x7,x7,x13 + add x24,x24,x17 // h+=Sigma0(a) + add x7,x7,x12 + ldr x12,[sp,#8] + str x15,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x14,x9,#1 + and x17,x21,x20 + ror x13,x6,#19 + bic x19,x22,x20 + ror x15,x24,#28 + add x23,x23,x7 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x14,x14,x9,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x15,x15,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x13,x13,x6,ror#61 + eor x14,x14,x9,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x15,x24,ror#39 // Sigma0(a) + eor x13,x13,x6,lsr#6 // sigma1(X[i+14]) + add x8,x8,x1 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x8,x8,x14 + add x23,x23,x17 // h+=Sigma0(a) + add x8,x8,x13 + ldr x13,[sp,#16] + str x0,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x15,x10,#1 + and x17,x20,x27 + ror x14,x7,#19 + bic x28,x21,x27 + ror x0,x23,#28 + add x22,x22,x8 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x15,x15,x10,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x0,x0,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x14,x14,x7,ror#61 + eor x15,x15,x10,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x0,x23,ror#39 // Sigma0(a) + eor x14,x14,x7,lsr#6 // sigma1(X[i+14]) + add x9,x9,x2 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x9,x9,x15 + add x22,x22,x17 // h+=Sigma0(a) + add x9,x9,x14 + ldr x14,[sp,#24] + str x1,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x0,x11,#1 + and x17,x27,x26 + ror x15,x8,#19 + bic x19,x20,x26 + ror x1,x22,#28 + add x21,x21,x9 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x0,x0,x11,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x1,x1,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x15,x15,x8,ror#61 + eor x0,x0,x11,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x1,x22,ror#39 // Sigma0(a) + eor x15,x15,x8,lsr#6 // sigma1(X[i+14]) + add x10,x10,x3 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x10,x10,x0 + add x21,x21,x17 // h+=Sigma0(a) + add x10,x10,x15 + ldr x15,[sp,#0] + str x2,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x1,x12,#1 + and x17,x26,x25 + ror x0,x9,#19 + bic x28,x27,x25 + ror x2,x21,#28 + add x20,x20,x10 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x1,x1,x12,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x2,x2,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x0,x0,x9,ror#61 + eor x1,x1,x12,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x2,x21,ror#39 // Sigma0(a) + eor x0,x0,x9,lsr#6 // sigma1(X[i+14]) + add x11,x11,x4 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x11,x11,x1 + add x20,x20,x17 // h+=Sigma0(a) + add x11,x11,x0 + ldr x0,[sp,#8] + str x3,[sp,#0] + ror x16,x24,#14 + add x27,x27,x19 // h+=K[i] + ror x2,x13,#1 + and x17,x25,x24 + ror x1,x10,#19 + bic x19,x26,x24 + ror x3,x20,#28 + add x27,x27,x11 // h+=X[i] + eor x16,x16,x24,ror#18 + eor x2,x2,x13,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x20,x21 // a^b, b^c in next round + eor x16,x16,x24,ror#41 // Sigma1(e) + eor x3,x3,x20,ror#34 + add x27,x27,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x1,x1,x10,ror#61 + eor x2,x2,x13,lsr#7 // sigma0(X[i+1]) + add x27,x27,x16 // h+=Sigma1(e) + eor x28,x28,x21 // Maj(a,b,c) + eor x17,x3,x20,ror#39 // Sigma0(a) + eor x1,x1,x10,lsr#6 // sigma1(X[i+14]) + add x12,x12,x5 + add x23,x23,x27 // d+=h + add x27,x27,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x12,x12,x2 + add x27,x27,x17 // h+=Sigma0(a) + add x12,x12,x1 + ldr x1,[sp,#16] + str x4,[sp,#8] + ror x16,x23,#14 + add x26,x26,x28 // h+=K[i] + ror x3,x14,#1 + and x17,x24,x23 + ror x2,x11,#19 + bic x28,x25,x23 + ror x4,x27,#28 + add x26,x26,x12 // h+=X[i] + eor x16,x16,x23,ror#18 + eor x3,x3,x14,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x27,x20 // a^b, b^c in next round + eor x16,x16,x23,ror#41 // Sigma1(e) + eor x4,x4,x27,ror#34 + add x26,x26,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x2,x2,x11,ror#61 + eor x3,x3,x14,lsr#7 // sigma0(X[i+1]) + add x26,x26,x16 // h+=Sigma1(e) + eor x19,x19,x20 // Maj(a,b,c) + eor x17,x4,x27,ror#39 // Sigma0(a) + eor x2,x2,x11,lsr#6 // sigma1(X[i+14]) + add x13,x13,x6 + add x22,x22,x26 // d+=h + add x26,x26,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x13,x13,x3 + add x26,x26,x17 // h+=Sigma0(a) + add x13,x13,x2 + ldr x2,[sp,#24] + str x5,[sp,#16] + ror x16,x22,#14 + add x25,x25,x19 // h+=K[i] + ror x4,x15,#1 + and x17,x23,x22 + ror x3,x12,#19 + bic x19,x24,x22 + ror x5,x26,#28 + add x25,x25,x13 // h+=X[i] + eor x16,x16,x22,ror#18 + eor x4,x4,x15,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x26,x27 // a^b, b^c in next round + eor x16,x16,x22,ror#41 // Sigma1(e) + eor x5,x5,x26,ror#34 + add x25,x25,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x3,x3,x12,ror#61 + eor x4,x4,x15,lsr#7 // sigma0(X[i+1]) + add x25,x25,x16 // h+=Sigma1(e) + eor x28,x28,x27 // Maj(a,b,c) + eor x17,x5,x26,ror#39 // Sigma0(a) + eor x3,x3,x12,lsr#6 // sigma1(X[i+14]) + add x14,x14,x7 + add x21,x21,x25 // d+=h + add x25,x25,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x14,x14,x4 + add x25,x25,x17 // h+=Sigma0(a) + add x14,x14,x3 + ldr x3,[sp,#0] + str x6,[sp,#24] + ror x16,x21,#14 + add x24,x24,x28 // h+=K[i] + ror x5,x0,#1 + and x17,x22,x21 + ror x4,x13,#19 + bic x28,x23,x21 + ror x6,x25,#28 + add x24,x24,x14 // h+=X[i] + eor x16,x16,x21,ror#18 + eor x5,x5,x0,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x25,x26 // a^b, b^c in next round + eor x16,x16,x21,ror#41 // Sigma1(e) + eor x6,x6,x25,ror#34 + add x24,x24,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x4,x4,x13,ror#61 + eor x5,x5,x0,lsr#7 // sigma0(X[i+1]) + add x24,x24,x16 // h+=Sigma1(e) + eor x19,x19,x26 // Maj(a,b,c) + eor x17,x6,x25,ror#39 // Sigma0(a) + eor x4,x4,x13,lsr#6 // sigma1(X[i+14]) + add x15,x15,x8 + add x20,x20,x24 // d+=h + add x24,x24,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x15,x15,x5 + add x24,x24,x17 // h+=Sigma0(a) + add x15,x15,x4 + ldr x4,[sp,#8] + str x7,[sp,#0] + ror x16,x20,#14 + add x23,x23,x19 // h+=K[i] + ror x6,x1,#1 + and x17,x21,x20 + ror x5,x14,#19 + bic x19,x22,x20 + ror x7,x24,#28 + add x23,x23,x15 // h+=X[i] + eor x16,x16,x20,ror#18 + eor x6,x6,x1,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x24,x25 // a^b, b^c in next round + eor x16,x16,x20,ror#41 // Sigma1(e) + eor x7,x7,x24,ror#34 + add x23,x23,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x5,x5,x14,ror#61 + eor x6,x6,x1,lsr#7 // sigma0(X[i+1]) + add x23,x23,x16 // h+=Sigma1(e) + eor x28,x28,x25 // Maj(a,b,c) + eor x17,x7,x24,ror#39 // Sigma0(a) + eor x5,x5,x14,lsr#6 // sigma1(X[i+14]) + add x0,x0,x9 + add x27,x27,x23 // d+=h + add x23,x23,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x0,x0,x6 + add x23,x23,x17 // h+=Sigma0(a) + add x0,x0,x5 + ldr x5,[sp,#16] + str x8,[sp,#8] + ror x16,x27,#14 + add x22,x22,x28 // h+=K[i] + ror x7,x2,#1 + and x17,x20,x27 + ror x6,x15,#19 + bic x28,x21,x27 + ror x8,x23,#28 + add x22,x22,x0 // h+=X[i] + eor x16,x16,x27,ror#18 + eor x7,x7,x2,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x23,x24 // a^b, b^c in next round + eor x16,x16,x27,ror#41 // Sigma1(e) + eor x8,x8,x23,ror#34 + add x22,x22,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x6,x6,x15,ror#61 + eor x7,x7,x2,lsr#7 // sigma0(X[i+1]) + add x22,x22,x16 // h+=Sigma1(e) + eor x19,x19,x24 // Maj(a,b,c) + eor x17,x8,x23,ror#39 // Sigma0(a) + eor x6,x6,x15,lsr#6 // sigma1(X[i+14]) + add x1,x1,x10 + add x26,x26,x22 // d+=h + add x22,x22,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x1,x1,x7 + add x22,x22,x17 // h+=Sigma0(a) + add x1,x1,x6 + ldr x6,[sp,#24] + str x9,[sp,#16] + ror x16,x26,#14 + add x21,x21,x19 // h+=K[i] + ror x8,x3,#1 + and x17,x27,x26 + ror x7,x0,#19 + bic x19,x20,x26 + ror x9,x22,#28 + add x21,x21,x1 // h+=X[i] + eor x16,x16,x26,ror#18 + eor x8,x8,x3,ror#8 + orr x17,x17,x19 // Ch(e,f,g) + eor x19,x22,x23 // a^b, b^c in next round + eor x16,x16,x26,ror#41 // Sigma1(e) + eor x9,x9,x22,ror#34 + add x21,x21,x17 // h+=Ch(e,f,g) + and x28,x28,x19 // (b^c)&=(a^b) + eor x7,x7,x0,ror#61 + eor x8,x8,x3,lsr#7 // sigma0(X[i+1]) + add x21,x21,x16 // h+=Sigma1(e) + eor x28,x28,x23 // Maj(a,b,c) + eor x17,x9,x22,ror#39 // Sigma0(a) + eor x7,x7,x0,lsr#6 // sigma1(X[i+14]) + add x2,x2,x11 + add x25,x25,x21 // d+=h + add x21,x21,x28 // h+=Maj(a,b,c) + ldr x28,[x30],#8 // *K++, x19 in next round + add x2,x2,x8 + add x21,x21,x17 // h+=Sigma0(a) + add x2,x2,x7 + ldr x7,[sp,#0] + str x10,[sp,#24] + ror x16,x25,#14 + add x20,x20,x28 // h+=K[i] + ror x9,x4,#1 + and x17,x26,x25 + ror x8,x1,#19 + bic x28,x27,x25 + ror x10,x21,#28 + add x20,x20,x2 // h+=X[i] + eor x16,x16,x25,ror#18 + eor x9,x9,x4,ror#8 + orr x17,x17,x28 // Ch(e,f,g) + eor x28,x21,x22 // a^b, b^c in next round + eor x16,x16,x25,ror#41 // Sigma1(e) + eor x10,x10,x21,ror#34 + add x20,x20,x17 // h+=Ch(e,f,g) + and x19,x19,x28 // (b^c)&=(a^b) + eor x8,x8,x1,ror#61 + eor x9,x9,x4,lsr#7 // sigma0(X[i+1]) + add x20,x20,x16 // h+=Sigma1(e) + eor x19,x19,x22 // Maj(a,b,c) + eor x17,x10,x21,ror#39 // Sigma0(a) + eor x8,x8,x1,lsr#6 // sigma1(X[i+14]) + add x3,x3,x12 + add x24,x24,x20 // d+=h + add x20,x20,x19 // h+=Maj(a,b,c) + ldr x19,[x30],#8 // *K++, x28 in next round + add x3,x3,x9 + add x20,x20,x17 // h+=Sigma0(a) + add x3,x3,x8 + cbnz x19,.Loop_16_xx + + ldp x0,x2,[x29,#96] + ldr x1,[x29,#112] + sub x30,x30,#648 // rewind + + ldp x3,x4,[x0] + ldp x5,x6,[x0,#2*8] + add x1,x1,#14*8 // advance input pointer + ldp x7,x8,[x0,#4*8] + add x20,x20,x3 + ldp x9,x10,[x0,#6*8] + add x21,x21,x4 + add x22,x22,x5 + add x23,x23,x6 + stp x20,x21,[x0] + add x24,x24,x7 + add x25,x25,x8 + stp x22,x23,[x0,#2*8] + add x26,x26,x9 + add x27,x27,x10 + cmp x1,x2 + stp x24,x25,[x0,#4*8] + stp x26,x27,[x0,#6*8] + b.ne .Loop + + ldp x19,x20,[x29,#16] + add sp,sp,#4*8 + ldp x21,x22,[x29,#32] + ldp x23,x24,[x29,#48] + ldp x25,x26,[x29,#64] + ldp x27,x28,[x29,#80] + ldp x29,x30,[sp],#128 + ret +.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 + + +.globl zfs_sha512_block_armv8 +.type zfs_sha512_block_armv8,%function +.align 6 +zfs_sha512_block_armv8: + hint #34 // bti c +.Lv8_entry: + // Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later + stp x29,x30,[sp,#-16]! + add x29,sp,#0 + + ld1 {v16.16b-v19.16b},[x1],#64 // load input + ld1 {v20.16b-v23.16b},[x1],#64 + + ld1 {v0.2d-v3.2d},[x0] // load context + adr x3,.LK512 + + rev64 v16.16b,v16.16b + rev64 v17.16b,v17.16b + rev64 v18.16b,v18.16b + rev64 v19.16b,v19.16b + rev64 v20.16b,v20.16b + rev64 v21.16b,v21.16b + rev64 v22.16b,v22.16b + rev64 v23.16b,v23.16b + b .Loop_hw + +.align 4 +.Loop_hw: + ld1 {v24.2d},[x3],#16 + subs x2,x2,#1 + sub x4,x1,#128 + orr v26.16b,v0.16b,v0.16b // offload + orr v27.16b,v1.16b,v1.16b + orr v28.16b,v2.16b,v2.16b + orr v29.16b,v3.16b,v3.16b + csel x1,x1,x4,ne // conditional rewind + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v24.2d,v24.2d,v16.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08230 //sha512su0 v16.16b,v17.16b + ext v7.16b,v20.16b,v21.16b,#8 + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + .inst 0xce678af0 //sha512su1 v16.16b,v23.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v25.2d,v25.2d,v17.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08251 //sha512su0 v17.16b,v18.16b + ext v7.16b,v21.16b,v22.16b,#8 + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + .inst 0xce678a11 //sha512su1 v17.16b,v16.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v24.2d,v24.2d,v18.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec08272 //sha512su0 v18.16b,v19.16b + ext v7.16b,v22.16b,v23.16b,#8 + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + .inst 0xce678a32 //sha512su1 v18.16b,v17.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + add v25.2d,v25.2d,v19.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08293 //sha512su0 v19.16b,v20.16b + ext v7.16b,v23.16b,v16.16b,#8 + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + .inst 0xce678a53 //sha512su1 v19.16b,v18.16b,v7.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + add v24.2d,v24.2d,v20.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082b4 //sha512su0 v20.16b,v21.16b + ext v7.16b,v16.16b,v17.16b,#8 + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + .inst 0xce678a74 //sha512su1 v20.16b,v19.16b,v7.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + add v25.2d,v25.2d,v21.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec082d5 //sha512su0 v21.16b,v22.16b + ext v7.16b,v17.16b,v18.16b,#8 + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + .inst 0xce678a95 //sha512su1 v21.16b,v20.16b,v7.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v24.2d,v24.2d,v22.2d + ld1 {v25.2d},[x3],#16 + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xcec082f6 //sha512su0 v22.16b,v23.16b + ext v7.16b,v18.16b,v19.16b,#8 + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + .inst 0xce678ab6 //sha512su1 v22.16b,v21.16b,v7.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + add v25.2d,v25.2d,v23.2d + ld1 {v24.2d},[x3],#16 + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xcec08217 //sha512su0 v23.16b,v16.16b + ext v7.16b,v19.16b,v20.16b,#8 + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + .inst 0xce678ad7 //sha512su1 v23.16b,v22.16b,v7.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v16.2d + ld1 {v16.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v16.16b,v16.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v17.2d + ld1 {v17.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v17.16b,v17.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v18.2d + ld1 {v18.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v18.16b,v18.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v19.2d + ld1 {v19.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v2.16b,v3.16b,#8 + ext v6.16b,v1.16b,v2.16b,#8 + add v3.2d,v3.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xce6680a3 //sha512h v3.16b,v5.16b,v6.16b + rev64 v19.16b,v19.16b + add v4.2d,v1.2d,v3.2d // "D + T1" + .inst 0xce608423 //sha512h2 v3.16b,v1.16b,v0.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v20.2d + ld1 {v20.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v4.16b,v2.16b,#8 + ext v6.16b,v0.16b,v4.16b,#8 + add v2.2d,v2.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xce6680a2 //sha512h v2.16b,v5.16b,v6.16b + rev64 v20.16b,v20.16b + add v1.2d,v0.2d,v2.2d // "D + T1" + .inst 0xce638402 //sha512h2 v2.16b,v0.16b,v3.16b + ld1 {v24.2d},[x3],#16 + add v25.2d,v25.2d,v21.2d + ld1 {v21.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v1.16b,v4.16b,#8 + ext v6.16b,v3.16b,v1.16b,#8 + add v4.2d,v4.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xce6680a4 //sha512h v4.16b,v5.16b,v6.16b + rev64 v21.16b,v21.16b + add v0.2d,v3.2d,v4.2d // "D + T1" + .inst 0xce628464 //sha512h2 v4.16b,v3.16b,v2.16b + ld1 {v25.2d},[x3],#16 + add v24.2d,v24.2d,v22.2d + ld1 {v22.16b},[x1],#16 // load next input + ext v24.16b,v24.16b,v24.16b,#8 + ext v5.16b,v0.16b,v1.16b,#8 + ext v6.16b,v2.16b,v0.16b,#8 + add v1.2d,v1.2d,v24.2d // "T1 + H + K512[i]" + .inst 0xce6680a1 //sha512h v1.16b,v5.16b,v6.16b + rev64 v22.16b,v22.16b + add v3.2d,v2.2d,v1.2d // "D + T1" + .inst 0xce648441 //sha512h2 v1.16b,v2.16b,v4.16b + sub x3,x3,#80*8 // rewind + add v25.2d,v25.2d,v23.2d + ld1 {v23.16b},[x1],#16 // load next input + ext v25.16b,v25.16b,v25.16b,#8 + ext v5.16b,v3.16b,v0.16b,#8 + ext v6.16b,v4.16b,v3.16b,#8 + add v0.2d,v0.2d,v25.2d // "T1 + H + K512[i]" + .inst 0xce6680a0 //sha512h v0.16b,v5.16b,v6.16b + rev64 v23.16b,v23.16b + add v2.2d,v4.2d,v0.2d // "D + T1" + .inst 0xce618480 //sha512h2 v0.16b,v4.16b,v1.16b + add v0.2d,v0.2d,v26.2d // accumulate + add v1.2d,v1.2d,v27.2d + add v2.2d,v2.2d,v28.2d + add v3.2d,v3.2d,v29.2d + + cbnz x2,.Loop_hw + + st1 {v0.2d-v3.2d},[x0] // store context + + ldr x29,[sp],#16 + ret +.size zfs_sha512_block_armv8,.-zfs_sha512_block_armv8 +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S new file mode 100644 index 000000000000..3ae66626df31 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S @@ -0,0 +1,2774 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if defined(__arm__) + +#ifndef __ARM_ARCH +# define __ARM_ARCH__ 7 +#else +# define __ARM_ARCH__ __ARM_ARCH +#endif + +#if defined(__thumb2__) +.syntax unified +.thumb +#else +.code 32 +#endif + +.text + +.type K256,%object +.align 5 +K256: +.word 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.word 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.word 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.word 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.word 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.word 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.word 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.word 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.word 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.word 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.word 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.word 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.word 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.word 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.word 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.size K256,.-K256 +.word 0 @ terminator + +.align 5 +.globl zfs_sha256_block_armv7 +.type zfs_sha256_block_armv7,%function +zfs_sha256_block_armv7: +.Lzfs_sha256_block_armv7: + +#if __ARM_ARCH__<7 && !defined(__thumb2__) + sub r3,pc,#8 @ zfs_sha256_block_armv7 +#else + adr r3,.Lzfs_sha256_block_armv7 +#endif + + add r2,r1,r2,lsl#6 @ len to point at the end of inp + stmdb sp!,{r0,r1,r2,r4-r11,lr} + ldmia r0,{r4,r5,r6,r7,r8,r9,r10,r11} + sub r14,r3,#256+32 @ K256 + sub sp,sp,#16*4 @ alloca(X[16]) +.Loop: +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ magic + eor r12,r12,r12 +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 0 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 0 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 0==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 0==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 0<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 1 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 1 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 1==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 1==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 1<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 2 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 2 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 2==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 2==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 2<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 3 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 3 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 3==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 3==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 3<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 4 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 4 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 4==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 4==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 4<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 5 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 5==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 5==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 5<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 6 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 6 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 6==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 6==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 6<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 7 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 7==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 7==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 7<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 8 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r8,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 8 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 8==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r8,r8,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r8,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 8==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 8<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 9 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r7,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 9 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 9==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r7,r7,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r7,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 9==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 9<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 10 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r6,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 10 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 10==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r6,r6,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r6,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 10==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 10<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 11 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r5,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 11 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 11==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r5,r5,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r5,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 11==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 11<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 12 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r4,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 12 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 12==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r4,r4,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r4,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 12==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 12<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 13 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r11,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 13 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 13==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r11,r11,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r11,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 13==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 13<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 14 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + eor r0,r0,r10,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 14 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + ldrb r12,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r12,lsl#8 + ldrb r12,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 14==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r10,r10,ror#5 + orr r2,r2,r12,lsl#24 + eor r0,r0,r10,ror#19 @ Sigma1(e) +#endif + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 14==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 14<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) +#if __ARM_ARCH__>=7 + @ ldr r2,[r1],#4 @ 15 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + eor r0,r0,r9,ror#19 @ Sigma1(e) +# ifndef __ARMEB__ + rev r2,r2 +# endif +#else + @ ldrb r2,[r1,#3] @ 15 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + ldrb r3,[r1,#2] + ldrb r0,[r1,#1] + orr r2,r2,r3,lsl#8 + ldrb r3,[r1],#4 + orr r2,r2,r0,lsl#16 +# if 15==15 + str r1,[sp,#17*4] @ make room for r1 +# endif + eor r0,r9,r9,ror#5 + orr r2,r2,r3,lsl#24 + eor r0,r0,r9,ror#19 @ Sigma1(e) +#endif + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 15==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 15<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +.Lrounds_16_xx: + @ ldr r2,[sp,#1*4] @ 16 + @ ldr r1,[sp,#14*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#0*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#9*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#0*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 16==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 16<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#2*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#15*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#2*4] @ 17 + @ ldr r1,[sp,#15*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#1*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#10*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#1*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 17==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 17<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#3*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#0*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#3*4] @ 18 + @ ldr r1,[sp,#0*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#2*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#11*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#2*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 18==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 18<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#4*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#1*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#4*4] @ 19 + @ ldr r1,[sp,#1*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#3*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#12*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#3*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 19==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 19<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#5*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#2*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#5*4] @ 20 + @ ldr r1,[sp,#2*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#4*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#13*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#4*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 20==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 20<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#6*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#3*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#6*4] @ 21 + @ ldr r1,[sp,#3*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#5*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#14*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#5*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 21==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 21<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#7*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#4*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#7*4] @ 22 + @ ldr r1,[sp,#4*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#6*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#15*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#6*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 22==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 22<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#8*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#5*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#8*4] @ 23 + @ ldr r1,[sp,#5*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#7*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#0*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#7*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 23==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 23<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#9*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#6*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#9*4] @ 24 + @ ldr r1,[sp,#6*4] + mov r0,r2,ror#7 + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#8*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#1*4] + + add r12,r12,r0 + eor r0,r8,r8,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r8,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r11,r11,r2 @ h+=X[i] + str r2,[sp,#8*4] + eor r2,r9,r10 + add r11,r11,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r8 + add r11,r11,r12 @ h+=K256[i] + eor r2,r2,r10 @ Ch(e,f,g) + eor r0,r4,r4,ror#11 + add r11,r11,r2 @ h+=Ch(e,f,g) +#if 24==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 24<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r4,r5 @ a^b, b^c in next round +#else + ldr r2,[sp,#10*4] @ from future BODY_16_xx + eor r12,r4,r5 @ a^b, b^c in next round + ldr r1,[sp,#7*4] @ from future BODY_16_xx +#endif + eor r0,r0,r4,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r7,r7,r11 @ d+=h + eor r3,r3,r5 @ Maj(a,b,c) + add r11,r11,r0,ror#2 @ h+=Sigma0(a) + @ add r11,r11,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#10*4] @ 25 + @ ldr r1,[sp,#7*4] + mov r0,r2,ror#7 + add r11,r11,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#9*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#2*4] + + add r3,r3,r0 + eor r0,r7,r7,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r7,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r10,r10,r2 @ h+=X[i] + str r2,[sp,#9*4] + eor r2,r8,r9 + add r10,r10,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r7 + add r10,r10,r3 @ h+=K256[i] + eor r2,r2,r9 @ Ch(e,f,g) + eor r0,r11,r11,ror#11 + add r10,r10,r2 @ h+=Ch(e,f,g) +#if 25==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 25<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r11,r4 @ a^b, b^c in next round +#else + ldr r2,[sp,#11*4] @ from future BODY_16_xx + eor r3,r11,r4 @ a^b, b^c in next round + ldr r1,[sp,#8*4] @ from future BODY_16_xx +#endif + eor r0,r0,r11,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r6,r6,r10 @ d+=h + eor r12,r12,r4 @ Maj(a,b,c) + add r10,r10,r0,ror#2 @ h+=Sigma0(a) + @ add r10,r10,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#11*4] @ 26 + @ ldr r1,[sp,#8*4] + mov r0,r2,ror#7 + add r10,r10,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#10*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#3*4] + + add r12,r12,r0 + eor r0,r6,r6,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r6,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r9,r9,r2 @ h+=X[i] + str r2,[sp,#10*4] + eor r2,r7,r8 + add r9,r9,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r6 + add r9,r9,r12 @ h+=K256[i] + eor r2,r2,r8 @ Ch(e,f,g) + eor r0,r10,r10,ror#11 + add r9,r9,r2 @ h+=Ch(e,f,g) +#if 26==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 26<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r10,r11 @ a^b, b^c in next round +#else + ldr r2,[sp,#12*4] @ from future BODY_16_xx + eor r12,r10,r11 @ a^b, b^c in next round + ldr r1,[sp,#9*4] @ from future BODY_16_xx +#endif + eor r0,r0,r10,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r5,r5,r9 @ d+=h + eor r3,r3,r11 @ Maj(a,b,c) + add r9,r9,r0,ror#2 @ h+=Sigma0(a) + @ add r9,r9,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#12*4] @ 27 + @ ldr r1,[sp,#9*4] + mov r0,r2,ror#7 + add r9,r9,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#11*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#4*4] + + add r3,r3,r0 + eor r0,r5,r5,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r5,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r8,r8,r2 @ h+=X[i] + str r2,[sp,#11*4] + eor r2,r6,r7 + add r8,r8,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r5 + add r8,r8,r3 @ h+=K256[i] + eor r2,r2,r7 @ Ch(e,f,g) + eor r0,r9,r9,ror#11 + add r8,r8,r2 @ h+=Ch(e,f,g) +#if 27==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 27<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r9,r10 @ a^b, b^c in next round +#else + ldr r2,[sp,#13*4] @ from future BODY_16_xx + eor r3,r9,r10 @ a^b, b^c in next round + ldr r1,[sp,#10*4] @ from future BODY_16_xx +#endif + eor r0,r0,r9,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r4,r4,r8 @ d+=h + eor r12,r12,r10 @ Maj(a,b,c) + add r8,r8,r0,ror#2 @ h+=Sigma0(a) + @ add r8,r8,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#13*4] @ 28 + @ ldr r1,[sp,#10*4] + mov r0,r2,ror#7 + add r8,r8,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#12*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#5*4] + + add r12,r12,r0 + eor r0,r4,r4,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r4,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r7,r7,r2 @ h+=X[i] + str r2,[sp,#12*4] + eor r2,r5,r6 + add r7,r7,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r4 + add r7,r7,r12 @ h+=K256[i] + eor r2,r2,r6 @ Ch(e,f,g) + eor r0,r8,r8,ror#11 + add r7,r7,r2 @ h+=Ch(e,f,g) +#if 28==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 28<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r8,r9 @ a^b, b^c in next round +#else + ldr r2,[sp,#14*4] @ from future BODY_16_xx + eor r12,r8,r9 @ a^b, b^c in next round + ldr r1,[sp,#11*4] @ from future BODY_16_xx +#endif + eor r0,r0,r8,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r11,r11,r7 @ d+=h + eor r3,r3,r9 @ Maj(a,b,c) + add r7,r7,r0,ror#2 @ h+=Sigma0(a) + @ add r7,r7,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#14*4] @ 29 + @ ldr r1,[sp,#11*4] + mov r0,r2,ror#7 + add r7,r7,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#13*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#6*4] + + add r3,r3,r0 + eor r0,r11,r11,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r11,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r6,r6,r2 @ h+=X[i] + str r2,[sp,#13*4] + eor r2,r4,r5 + add r6,r6,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r11 + add r6,r6,r3 @ h+=K256[i] + eor r2,r2,r5 @ Ch(e,f,g) + eor r0,r7,r7,ror#11 + add r6,r6,r2 @ h+=Ch(e,f,g) +#if 29==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 29<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r7,r8 @ a^b, b^c in next round +#else + ldr r2,[sp,#15*4] @ from future BODY_16_xx + eor r3,r7,r8 @ a^b, b^c in next round + ldr r1,[sp,#12*4] @ from future BODY_16_xx +#endif + eor r0,r0,r7,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r10,r10,r6 @ d+=h + eor r12,r12,r8 @ Maj(a,b,c) + add r6,r6,r0,ror#2 @ h+=Sigma0(a) + @ add r6,r6,r12 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#15*4] @ 30 + @ ldr r1,[sp,#12*4] + mov r0,r2,ror#7 + add r6,r6,r12 @ h+=Maj(a,b,c) from the past + mov r12,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r12,r12,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#14*4] + eor r12,r12,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#7*4] + + add r12,r12,r0 + eor r0,r10,r10,ror#5 @ from BODY_00_15 + add r2,r2,r12 + eor r0,r0,r10,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r12,[r14],#4 @ *K256++ + add r5,r5,r2 @ h+=X[i] + str r2,[sp,#14*4] + eor r2,r11,r4 + add r5,r5,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r10 + add r5,r5,r12 @ h+=K256[i] + eor r2,r2,r4 @ Ch(e,f,g) + eor r0,r6,r6,ror#11 + add r5,r5,r2 @ h+=Ch(e,f,g) +#if 30==31 + and r12,r12,#0xff + cmp r12,#0xf2 @ done? +#endif +#if 30<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r12,r6,r7 @ a^b, b^c in next round +#else + ldr r2,[sp,#0*4] @ from future BODY_16_xx + eor r12,r6,r7 @ a^b, b^c in next round + ldr r1,[sp,#13*4] @ from future BODY_16_xx +#endif + eor r0,r0,r6,ror#20 @ Sigma0(a) + and r3,r3,r12 @ (b^c)&=(a^b) + add r9,r9,r5 @ d+=h + eor r3,r3,r7 @ Maj(a,b,c) + add r5,r5,r0,ror#2 @ h+=Sigma0(a) + @ add r5,r5,r3 @ h+=Maj(a,b,c) + @ ldr r2,[sp,#0*4] @ 31 + @ ldr r1,[sp,#13*4] + mov r0,r2,ror#7 + add r5,r5,r3 @ h+=Maj(a,b,c) from the past + mov r3,r1,ror#17 + eor r0,r0,r2,ror#18 + eor r3,r3,r1,ror#19 + eor r0,r0,r2,lsr#3 @ sigma0(X[i+1]) + ldr r2,[sp,#15*4] + eor r3,r3,r1,lsr#10 @ sigma1(X[i+14]) + ldr r1,[sp,#8*4] + + add r3,r3,r0 + eor r0,r9,r9,ror#5 @ from BODY_00_15 + add r2,r2,r3 + eor r0,r0,r9,ror#19 @ Sigma1(e) + add r2,r2,r1 @ X[i] + ldr r3,[r14],#4 @ *K256++ + add r4,r4,r2 @ h+=X[i] + str r2,[sp,#15*4] + eor r2,r10,r11 + add r4,r4,r0,ror#6 @ h+=Sigma1(e) + and r2,r2,r9 + add r4,r4,r3 @ h+=K256[i] + eor r2,r2,r11 @ Ch(e,f,g) + eor r0,r5,r5,ror#11 + add r4,r4,r2 @ h+=Ch(e,f,g) +#if 31==31 + and r3,r3,#0xff + cmp r3,#0xf2 @ done? +#endif +#if 31<15 +# if __ARM_ARCH__>=7 + ldr r2,[r1],#4 @ prefetch +# else + ldrb r2,[r1,#3] +# endif + eor r3,r5,r6 @ a^b, b^c in next round +#else + ldr r2,[sp,#1*4] @ from future BODY_16_xx + eor r3,r5,r6 @ a^b, b^c in next round + ldr r1,[sp,#14*4] @ from future BODY_16_xx +#endif + eor r0,r0,r5,ror#20 @ Sigma0(a) + and r12,r12,r3 @ (b^c)&=(a^b) + add r8,r8,r4 @ d+=h + eor r12,r12,r6 @ Maj(a,b,c) + add r4,r4,r0,ror#2 @ h+=Sigma0(a) + @ add r4,r4,r12 @ h+=Maj(a,b,c) +#ifdef __thumb2__ + ite eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r3,[sp,#16*4] @ pull ctx + bne .Lrounds_16_xx + + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r0,[r3,#0] + ldr r2,[r3,#4] + ldr r12,[r3,#8] + add r4,r4,r0 + ldr r0,[r3,#12] + add r5,r5,r2 + ldr r2,[r3,#16] + add r6,r6,r12 + ldr r12,[r3,#20] + add r7,r7,r0 + ldr r0,[r3,#24] + add r8,r8,r2 + ldr r2,[r3,#28] + add r9,r9,r12 + ldr r1,[sp,#17*4] @ pull inp + ldr r12,[sp,#18*4] @ pull inp+len + add r10,r10,r0 + add r11,r11,r2 + stmia r3,{r4,r5,r6,r7,r8,r9,r10,r11} + cmp r1,r12 + sub r14,r14,#256 @ rewind Ktbl + bne .Loop + + add sp,sp,#19*4 @ destroy frame +#if __ARM_ARCH__>=5 + ldmia sp!,{r4-r11,pc} +#else + ldmia sp!,{r4-r11,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet + .word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size zfs_sha256_block_armv7,.-zfs_sha256_block_armv7 + +#if __ARM_ARCH__ >= 7 +.arch armv7-a +.fpu neon + +.globl zfs_sha256_block_neon +.type zfs_sha256_block_neon,%function +.align 5 +.skip 16 +zfs_sha256_block_neon: +.LNEON: + stmdb sp!,{r4-r12,lr} + + sub r11,sp,#16*4+16 + adr r14,K256 + bic r11,r11,#15 @ align for 128-bit stores + mov r12,sp + mov sp,r11 @ alloca + add r2,r1,r2,lsl#6 @ len to point at the end of inp + + vld1.8 {q0},[r1]! + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + vld1.32 {q8},[r14,:128]! + vld1.32 {q9},[r14,:128]! + vld1.32 {q10},[r14,:128]! + vld1.32 {q11},[r14,:128]! + vrev32.8 q0,q0 @ yes, even on + str r0,[sp,#64] + vrev32.8 q1,q1 @ big-endian + str r1,[sp,#68] + mov r1,sp + vrev32.8 q2,q2 + str r2,[sp,#72] + vrev32.8 q3,q3 + str r12,[sp,#76] @ save original sp + vadd.i32 q8,q8,q0 + vadd.i32 q9,q9,q1 + vst1.32 {q8},[r1,:128]! + vadd.i32 q10,q10,q2 + vst1.32 {q9},[r1,:128]! + vadd.i32 q11,q11,q3 + vst1.32 {q10},[r1,:128]! + vst1.32 {q11},[r1,:128]! + + ldmia r0,{r4-r11} + sub r1,r1,#64 + ldr r2,[sp,#0] + eor r12,r12,r12 + eor r3,r5,r6 + b .L_00_48 + +.align 4 +.L_00_48: + vext.8 q8,q0,q1,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q2,q3,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q0,q0,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#4] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d7,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d7,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d7,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q0,q0,q9 + add r10,r10,r2 + ldr r2,[sp,#8] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d7,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d7,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d0,d0,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d0,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d0,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d0,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#12] + and r3,r3,r12 + vshr.u32 d24,d0,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d0,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d1,d1,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q0 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q1,q2,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q3,q0,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q1,q1,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#20] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d1,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d1,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d1,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q1,q1,q9 + add r6,r6,r2 + ldr r2,[sp,#24] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d1,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d1,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d2,d2,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d2,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d2,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d2,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#28] + and r3,r3,r12 + vshr.u32 d24,d2,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d2,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d3,d3,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q1 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vext.8 q8,q2,q3,#4 + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + vext.8 q9,q0,q1,#4 + add r4,r4,r12 + and r2,r2,r8 + eor r12,r0,r8,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vadd.i32 q2,q2,q9 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + vshr.u32 q9,q8,#3 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#36] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + veor q9,q9,q10 + add r10,r10,r2 + vsli.32 q11,q8,#14 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + vshr.u32 d24,d3,#17 + add r11,r11,r3 + and r2,r2,r7 + veor q9,q9,q11 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + vsli.32 d24,d3,#15 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + vshr.u32 d25,d3,#10 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + vadd.i32 q2,q2,q9 + add r10,r10,r2 + ldr r2,[sp,#40] + veor d25,d25,d24 + and r12,r12,r3 + add r6,r6,r10 + vshr.u32 d24,d3,#19 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + vsli.32 d24,d3,#13 + add r9,r9,r2 + eor r2,r7,r8 + veor d25,d25,d24 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + vadd.i32 d4,d4,d25 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + vshr.u32 d24,d4,#17 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + vsli.32 d24,d4,#15 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + vshr.u32 d25,d4,#10 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + veor d25,d25,d24 + ldr r2,[sp,#44] + and r3,r3,r12 + vshr.u32 d24,d4,#19 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + vld1.32 {q8},[r14,:128]! + add r8,r8,r2 + vsli.32 d24,d4,#13 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + veor d25,d25,d24 + add r9,r9,r3 + and r2,r2,r5 + vadd.i32 d5,d5,d25 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + vadd.i32 q8,q8,q2 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + vst1.32 {q8},[r1,:128]! + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vext.8 q8,q3,q0,#4 + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + vext.8 q9,q1,q2,#4 + add r8,r8,r12 + and r2,r2,r4 + eor r12,r0,r4,ror#19 + vshr.u32 q10,q8,#7 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vadd.i32 q3,q3,q9 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + vshr.u32 q9,q8,#3 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vsli.32 q10,q8,#25 + ldr r2,[sp,#52] + and r3,r3,r12 + vshr.u32 q11,q8,#18 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + veor q9,q9,q10 + add r6,r6,r2 + vsli.32 q11,q8,#14 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + vshr.u32 d24,d5,#17 + add r7,r7,r3 + and r2,r2,r11 + veor q9,q9,q11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + vsli.32 d24,d5,#15 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + vshr.u32 d25,d5,#10 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + vadd.i32 q3,q3,q9 + add r6,r6,r2 + ldr r2,[sp,#56] + veor d25,d25,d24 + and r12,r12,r3 + add r10,r10,r6 + vshr.u32 d24,d5,#19 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + vsli.32 d24,d5,#13 + add r5,r5,r2 + eor r2,r11,r4 + veor d25,d25,d24 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + vadd.i32 d6,d6,d25 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + vshr.u32 d24,d6,#17 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + vsli.32 d24,d6,#15 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + vshr.u32 d25,d6,#10 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + veor d25,d25,d24 + ldr r2,[sp,#60] + and r3,r3,r12 + vshr.u32 d24,d6,#19 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + vld1.32 {q8},[r14,:128]! + add r4,r4,r2 + vsli.32 d24,d6,#13 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + veor d25,d25,d24 + add r5,r5,r3 + and r2,r2,r9 + vadd.i32 d7,d7,d25 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + vadd.i32 q8,q8,q3 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[r14] + and r12,r12,r3 + add r8,r8,r4 + vst1.32 {q8},[r1,:128]! + add r4,r4,r0,ror#2 + eor r12,r12,r6 + teq r2,#0 @ check for K256 terminator + ldr r2,[sp,#0] + sub r1,r1,#64 + bne .L_00_48 + + ldr r1,[sp,#68] + ldr r0,[sp,#72] + sub r14,r14,#256 @ rewind r14 + teq r1,r0 + it eq + subeq r1,r1,#64 @ avoid SEGV + vld1.8 {q0},[r1]! @ load next input block + vld1.8 {q1},[r1]! + vld1.8 {q2},[r1]! + vld1.8 {q3},[r1]! + it ne + strne r1,[sp,#68] + mov r1,sp + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q0,q0 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q0 + ldr r2,[sp,#4] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#8] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#12] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#16] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q1,q1 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q1 + ldr r2,[sp,#20] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#24] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#28] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#32] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + add r11,r11,r2 + eor r2,r9,r10 + eor r0,r8,r8,ror#5 + add r4,r4,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r8 + eor r12,r0,r8,ror#19 + eor r0,r4,r4,ror#11 + eor r2,r2,r10 + vrev32.8 q2,q2 + add r11,r11,r12,ror#6 + eor r12,r4,r5 + eor r0,r0,r4,ror#20 + add r11,r11,r2 + vadd.i32 q8,q8,q2 + ldr r2,[sp,#36] + and r3,r3,r12 + add r7,r7,r11 + add r11,r11,r0,ror#2 + eor r3,r3,r5 + add r10,r10,r2 + eor r2,r8,r9 + eor r0,r7,r7,ror#5 + add r11,r11,r3 + and r2,r2,r7 + eor r3,r0,r7,ror#19 + eor r0,r11,r11,ror#11 + eor r2,r2,r9 + add r10,r10,r3,ror#6 + eor r3,r11,r4 + eor r0,r0,r11,ror#20 + add r10,r10,r2 + ldr r2,[sp,#40] + and r12,r12,r3 + add r6,r6,r10 + add r10,r10,r0,ror#2 + eor r12,r12,r4 + add r9,r9,r2 + eor r2,r7,r8 + eor r0,r6,r6,ror#5 + add r10,r10,r12 + and r2,r2,r6 + eor r12,r0,r6,ror#19 + eor r0,r10,r10,ror#11 + eor r2,r2,r8 + add r9,r9,r12,ror#6 + eor r12,r10,r11 + eor r0,r0,r10,ror#20 + add r9,r9,r2 + ldr r2,[sp,#44] + and r3,r3,r12 + add r5,r5,r9 + add r9,r9,r0,ror#2 + eor r3,r3,r11 + add r8,r8,r2 + eor r2,r6,r7 + eor r0,r5,r5,ror#5 + add r9,r9,r3 + and r2,r2,r5 + eor r3,r0,r5,ror#19 + eor r0,r9,r9,ror#11 + eor r2,r2,r7 + add r8,r8,r3,ror#6 + eor r3,r9,r10 + eor r0,r0,r9,ror#20 + add r8,r8,r2 + ldr r2,[sp,#48] + and r12,r12,r3 + add r4,r4,r8 + add r8,r8,r0,ror#2 + eor r12,r12,r10 + vst1.32 {q8},[r1,:128]! + add r7,r7,r2 + eor r2,r5,r6 + eor r0,r4,r4,ror#5 + add r8,r8,r12 + vld1.32 {q8},[r14,:128]! + and r2,r2,r4 + eor r12,r0,r4,ror#19 + eor r0,r8,r8,ror#11 + eor r2,r2,r6 + vrev32.8 q3,q3 + add r7,r7,r12,ror#6 + eor r12,r8,r9 + eor r0,r0,r8,ror#20 + add r7,r7,r2 + vadd.i32 q8,q8,q3 + ldr r2,[sp,#52] + and r3,r3,r12 + add r11,r11,r7 + add r7,r7,r0,ror#2 + eor r3,r3,r9 + add r6,r6,r2 + eor r2,r4,r5 + eor r0,r11,r11,ror#5 + add r7,r7,r3 + and r2,r2,r11 + eor r3,r0,r11,ror#19 + eor r0,r7,r7,ror#11 + eor r2,r2,r5 + add r6,r6,r3,ror#6 + eor r3,r7,r8 + eor r0,r0,r7,ror#20 + add r6,r6,r2 + ldr r2,[sp,#56] + and r12,r12,r3 + add r10,r10,r6 + add r6,r6,r0,ror#2 + eor r12,r12,r8 + add r5,r5,r2 + eor r2,r11,r4 + eor r0,r10,r10,ror#5 + add r6,r6,r12 + and r2,r2,r10 + eor r12,r0,r10,ror#19 + eor r0,r6,r6,ror#11 + eor r2,r2,r4 + add r5,r5,r12,ror#6 + eor r12,r6,r7 + eor r0,r0,r6,ror#20 + add r5,r5,r2 + ldr r2,[sp,#60] + and r3,r3,r12 + add r9,r9,r5 + add r5,r5,r0,ror#2 + eor r3,r3,r7 + add r4,r4,r2 + eor r2,r10,r11 + eor r0,r9,r9,ror#5 + add r5,r5,r3 + and r2,r2,r9 + eor r3,r0,r9,ror#19 + eor r0,r5,r5,ror#11 + eor r2,r2,r11 + add r4,r4,r3,ror#6 + eor r3,r5,r6 + eor r0,r0,r5,ror#20 + add r4,r4,r2 + ldr r2,[sp,#64] + and r12,r12,r3 + add r8,r8,r4 + add r4,r4,r0,ror#2 + eor r12,r12,r6 + vst1.32 {q8},[r1,:128]! + ldr r0,[r2,#0] + add r4,r4,r12 @ h+=Maj(a,b,c) from the past + ldr r12,[r2,#4] + ldr r3,[r2,#8] + ldr r1,[r2,#12] + add r4,r4,r0 @ accumulate + ldr r0,[r2,#16] + add r5,r5,r12 + ldr r12,[r2,#20] + add r6,r6,r3 + ldr r3,[r2,#24] + add r7,r7,r1 + ldr r1,[r2,#28] + add r8,r8,r0 + str r4,[r2],#4 + add r9,r9,r12 + str r5,[r2],#4 + add r10,r10,r3 + str r6,[r2],#4 + add r11,r11,r1 + str r7,[r2],#4 + stmia r2,{r8-r11} + + ittte ne + movne r1,sp + ldrne r2,[sp,#0] + eorne r12,r12,r12 + ldreq sp,[sp,#76] @ restore original sp + itt ne + eorne r3,r5,r6 + bne .L_00_48 + + ldmia sp!,{r4-r12,pc} +.size zfs_sha256_block_neon,.-zfs_sha256_block_neon + +# if defined(__thumb2__) +# define INST(a,b,c,d) .byte c,d|0xc,a,b +# else +# define INST(a,b,c,d) .byte a,b,c,d +# endif + +.globl zfs_sha256_block_armv8 +.type zfs_sha256_block_armv8,%function +.align 5 +zfs_sha256_block_armv8: +.LARMv8: + vld1.32 {q0,q1},[r0] + sub r3,r3,#256+32 + add r2,r1,r2,lsl#6 @ len to point at the end of inp + b .Loop_v8 + +.align 4 +.Loop_v8: + vld1.8 {q8-q9},[r1]! + vld1.8 {q10-q11},[r1]! + vld1.32 {q12},[r3]! + vrev32.8 q8,q8 + vrev32.8 q9,q9 + vrev32.8 q10,q10 + vrev32.8 q11,q11 + vmov q14,q0 @ offload + vmov q15,q1 + teq r1,r2 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + INST(0xe2,0x03,0xfa,0xf3) @ sha256su0 q8,q9 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe6,0x0c,0x64,0xf3) @ sha256su1 q8,q10,q11 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + INST(0xe4,0x23,0xfa,0xf3) @ sha256su0 q9,q10 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe0,0x2c,0x66,0xf3) @ sha256su1 q9,q11,q8 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q10 + INST(0xe6,0x43,0xfa,0xf3) @ sha256su0 q10,q11 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + INST(0xe2,0x4c,0x60,0xf3) @ sha256su1 q10,q8,q9 + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q11 + INST(0xe0,0x63,0xfa,0xf3) @ sha256su0 q11,q8 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + INST(0xe4,0x6c,0x62,0xf3) @ sha256su1 q11,q9,q10 + vld1.32 {q13},[r3]! + vadd.i32 q12,q12,q8 + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vld1.32 {q12},[r3]! + vadd.i32 q13,q13,q9 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vld1.32 {q13},[r3] + vadd.i32 q12,q12,q10 + sub r3,r3,#256-16 @ rewind + vmov q2,q0 + INST(0x68,0x0c,0x02,0xf3) @ sha256h q0,q1,q12 + INST(0x68,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q12 + + vadd.i32 q13,q13,q11 + vmov q2,q0 + INST(0x6a,0x0c,0x02,0xf3) @ sha256h q0,q1,q13 + INST(0x6a,0x2c,0x14,0xf3) @ sha256h2 q1,q2,q13 + + vadd.i32 q0,q0,q14 + vadd.i32 q1,q1,q15 + it ne + bne .Loop_v8 + + vst1.32 {q0,q1},[r0] + + bx lr @ bx lr +.size zfs_sha256_block_armv8,.-zfs_sha256_block_armv8 + +#endif // #if __ARM_ARCH__ >= 7 +#endif // #if defined(__arm__) diff --git a/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha512-armv7.S b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha512-armv7.S new file mode 100644 index 000000000000..66d7dd3cf0f7 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha512-armv7.S @@ -0,0 +1,1827 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if defined(__arm__) + +#ifndef __ARM_ARCH +# define __ARM_ARCH__ 7 +#else +# define __ARM_ARCH__ __ARM_ARCH +#endif + +#ifndef __KERNEL__ +# define VFP_ABI_PUSH vstmdb sp!,{d8-d15} +# define VFP_ABI_POP vldmia sp!,{d8-d15} +#else +# define VFP_ABI_PUSH +# define VFP_ABI_POP +#endif + +#ifdef __ARMEL__ +# define LO 0 +# define HI 4 +# define WORD64(hi0,lo0,hi1,lo1) .word lo0,hi0, lo1,hi1 +#else +# define HI 0 +# define LO 4 +# define WORD64(hi0,lo0,hi1,lo1) .word hi0,lo0, hi1,lo1 +#endif + +#if defined(__thumb2__) +.syntax unified +.thumb +# define adrl adr +#else +.code 32 +#endif + +.text + +.type K512,%object +.align 5 +K512: + WORD64(0x428a2f98,0xd728ae22, 0x71374491,0x23ef65cd) + WORD64(0xb5c0fbcf,0xec4d3b2f, 0xe9b5dba5,0x8189dbbc) + WORD64(0x3956c25b,0xf348b538, 0x59f111f1,0xb605d019) + WORD64(0x923f82a4,0xaf194f9b, 0xab1c5ed5,0xda6d8118) + WORD64(0xd807aa98,0xa3030242, 0x12835b01,0x45706fbe) + WORD64(0x243185be,0x4ee4b28c, 0x550c7dc3,0xd5ffb4e2) + WORD64(0x72be5d74,0xf27b896f, 0x80deb1fe,0x3b1696b1) + WORD64(0x9bdc06a7,0x25c71235, 0xc19bf174,0xcf692694) + WORD64(0xe49b69c1,0x9ef14ad2, 0xefbe4786,0x384f25e3) + WORD64(0x0fc19dc6,0x8b8cd5b5, 0x240ca1cc,0x77ac9c65) + WORD64(0x2de92c6f,0x592b0275, 0x4a7484aa,0x6ea6e483) + WORD64(0x5cb0a9dc,0xbd41fbd4, 0x76f988da,0x831153b5) + WORD64(0x983e5152,0xee66dfab, 0xa831c66d,0x2db43210) + WORD64(0xb00327c8,0x98fb213f, 0xbf597fc7,0xbeef0ee4) + WORD64(0xc6e00bf3,0x3da88fc2, 0xd5a79147,0x930aa725) + WORD64(0x06ca6351,0xe003826f, 0x14292967,0x0a0e6e70) + WORD64(0x27b70a85,0x46d22ffc, 0x2e1b2138,0x5c26c926) + WORD64(0x4d2c6dfc,0x5ac42aed, 0x53380d13,0x9d95b3df) + WORD64(0x650a7354,0x8baf63de, 0x766a0abb,0x3c77b2a8) + WORD64(0x81c2c92e,0x47edaee6, 0x92722c85,0x1482353b) + WORD64(0xa2bfe8a1,0x4cf10364, 0xa81a664b,0xbc423001) + WORD64(0xc24b8b70,0xd0f89791, 0xc76c51a3,0x0654be30) + WORD64(0xd192e819,0xd6ef5218, 0xd6990624,0x5565a910) + WORD64(0xf40e3585,0x5771202a, 0x106aa070,0x32bbd1b8) + WORD64(0x19a4c116,0xb8d2d0c8, 0x1e376c08,0x5141ab53) + WORD64(0x2748774c,0xdf8eeb99, 0x34b0bcb5,0xe19b48a8) + WORD64(0x391c0cb3,0xc5c95a63, 0x4ed8aa4a,0xe3418acb) + WORD64(0x5b9cca4f,0x7763e373, 0x682e6ff3,0xd6b2b8a3) + WORD64(0x748f82ee,0x5defb2fc, 0x78a5636f,0x43172f60) + WORD64(0x84c87814,0xa1f0ab72, 0x8cc70208,0x1a6439ec) + WORD64(0x90befffa,0x23631e28, 0xa4506ceb,0xde82bde9) + WORD64(0xbef9a3f7,0xb2c67915, 0xc67178f2,0xe372532b) + WORD64(0xca273ece,0xea26619c, 0xd186b8c7,0x21c0c207) + WORD64(0xeada7dd6,0xcde0eb1e, 0xf57d4f7f,0xee6ed178) + WORD64(0x06f067aa,0x72176fba, 0x0a637dc5,0xa2c898a6) + WORD64(0x113f9804,0xbef90dae, 0x1b710b35,0x131c471b) + WORD64(0x28db77f5,0x23047d84, 0x32caab7b,0x40c72493) + WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c) + WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a) + WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817) +.size K512,.-K512 +.word 0 @ terminator + +.align 5 +.globl zfs_sha512_block_armv7 +.type zfs_sha512_block_armv7,%function +zfs_sha512_block_armv7: +.Lzfs_sha512_block_armv7: + +#if __ARM_ARCH__<7 && !defined(__thumb2__) + sub r3,pc,#8 @ zfs_sha512_block_armv7 +#else + adr r3,.Lzfs_sha512_block_armv7 +#endif + + add r2,r1,r2,lsl#7 @ len to point at the end of inp + stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + sub r14,r3,#672 @ K512 + sub sp,sp,#9*8 + + ldr r7,[r0,#32+LO] + ldr r8,[r0,#32+HI] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] +.Loop: + str r9, [sp,#48+0] + str r10, [sp,#48+4] + str r11, [sp,#56+0] + str r12, [sp,#56+4] + ldr r5,[r0,#0+LO] + ldr r6,[r0,#0+HI] + ldr r3,[r0,#8+LO] + ldr r4,[r0,#8+HI] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + str r3,[sp,#8+0] + str r4,[sp,#8+4] + str r9, [sp,#16+0] + str r10, [sp,#16+4] + str r11, [sp,#24+0] + str r12, [sp,#24+4] + ldr r3,[r0,#40+LO] + ldr r4,[r0,#40+HI] + str r3,[sp,#40+0] + str r4,[sp,#40+4] + +.L00_15: +#if __ARM_ARCH__<7 + ldrb r3,[r1,#7] + ldrb r9, [r1,#6] + ldrb r10, [r1,#5] + ldrb r11, [r1,#4] + ldrb r4,[r1,#3] + ldrb r12, [r1,#2] + orr r3,r3,r9,lsl#8 + ldrb r9, [r1,#1] + orr r3,r3,r10,lsl#16 + ldrb r10, [r1],#8 + orr r3,r3,r11,lsl#24 + orr r4,r4,r12,lsl#8 + orr r4,r4,r9,lsl#16 + orr r4,r4,r10,lsl#24 +#else + ldr r3,[r1,#4] + ldr r4,[r1],#8 +#ifdef __ARMEL__ + rev r3,r3 + rev r4,r4 +#endif +#endif + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#148 + + ldr r12,[sp,#16+0] @ c.lo +#ifdef __thumb2__ + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 + tst r14,#1 + beq .L00_15 + ldr r9,[sp,#184+0] + ldr r10,[sp,#184+4] + bic r14,r14,#1 +.L16_79: + @ sigma0(x) (ROTR((x),1) ^ ROTR((x),8) ^ ((x)>>7)) + @ LO lo>>1^hi<<31 ^ lo>>8^hi<<24 ^ lo>>7^hi<<25 + @ HI hi>>1^lo<<31 ^ hi>>8^lo<<24 ^ hi>>7 + mov r3,r9,lsr#1 + ldr r11,[sp,#80+0] + mov r4,r10,lsr#1 + ldr r12,[sp,#80+4] + eor r3,r3,r10,lsl#31 + eor r4,r4,r9,lsl#31 + eor r3,r3,r9,lsr#8 + eor r4,r4,r10,lsr#8 + eor r3,r3,r10,lsl#24 + eor r4,r4,r9,lsl#24 + eor r3,r3,r9,lsr#7 + eor r4,r4,r10,lsr#7 + eor r3,r3,r10,lsl#25 + + @ sigma1(x) (ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6)) + @ LO lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26 + @ HI hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6 + mov r9,r11,lsr#19 + mov r10,r12,lsr#19 + eor r9,r9,r12,lsl#13 + eor r10,r10,r11,lsl#13 + eor r9,r9,r12,lsr#29 + eor r10,r10,r11,lsr#29 + eor r9,r9,r11,lsl#3 + eor r10,r10,r12,lsl#3 + eor r9,r9,r11,lsr#6 + eor r10,r10,r12,lsr#6 + ldr r11,[sp,#120+0] + eor r9,r9,r12,lsl#26 + + ldr r12,[sp,#120+4] + adds r3,r3,r9 + ldr r9,[sp,#192+0] + adc r4,r4,r10 + + ldr r10,[sp,#192+4] + adds r3,r3,r11 + adc r4,r4,r12 + adds r3,r3,r9 + adc r4,r4,r10 + @ Sigma1(x) (ROTR((x),14) ^ ROTR((x),18) ^ ROTR((x),41)) + @ LO lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23 + @ HI hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23 + mov r9,r7,lsr#14 + str r3,[sp,#64+0] + mov r10,r8,lsr#14 + str r4,[sp,#64+4] + eor r9,r9,r8,lsl#18 + ldr r11,[sp,#56+0] @ h.lo + eor r10,r10,r7,lsl#18 + ldr r12,[sp,#56+4] @ h.hi + eor r9,r9,r7,lsr#18 + eor r10,r10,r8,lsr#18 + eor r9,r9,r8,lsl#14 + eor r10,r10,r7,lsl#14 + eor r9,r9,r8,lsr#9 + eor r10,r10,r7,lsr#9 + eor r9,r9,r7,lsl#23 + eor r10,r10,r8,lsl#23 @ Sigma1(e) + adds r3,r3,r9 + ldr r9,[sp,#40+0] @ f.lo + adc r4,r4,r10 @ T += Sigma1(e) + ldr r10,[sp,#40+4] @ f.hi + adds r3,r3,r11 + ldr r11,[sp,#48+0] @ g.lo + adc r4,r4,r12 @ T += h + ldr r12,[sp,#48+4] @ g.hi + + eor r9,r9,r11 + str r7,[sp,#32+0] + eor r10,r10,r12 + str r8,[sp,#32+4] + and r9,r9,r7 + str r5,[sp,#0+0] + and r10,r10,r8 + str r6,[sp,#0+4] + eor r9,r9,r11 + ldr r11,[r14,#LO] @ K[i].lo + eor r10,r10,r12 @ Ch(e,f,g) + ldr r12,[r14,#HI] @ K[i].hi + + adds r3,r3,r9 + ldr r7,[sp,#24+0] @ d.lo + adc r4,r4,r10 @ T += Ch(e,f,g) + ldr r8,[sp,#24+4] @ d.hi + adds r3,r3,r11 + and r9,r11,#0xff + adc r4,r4,r12 @ T += K[i] + adds r7,r7,r3 + ldr r11,[sp,#8+0] @ b.lo + adc r8,r8,r4 @ d += T + teq r9,#23 + + ldr r12,[sp,#16+0] @ c.lo +#ifdef __thumb2__ + it eq @ Thumb2 thing, sanity check in ARM +#endif + orreq r14,r14,#1 + @ Sigma0(x) (ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39)) + @ LO lo>>28^hi<<4 ^ hi>>2^lo<<30 ^ hi>>7^lo<<25 + @ HI hi>>28^lo<<4 ^ lo>>2^hi<<30 ^ lo>>7^hi<<25 + mov r9,r5,lsr#28 + mov r10,r6,lsr#28 + eor r9,r9,r6,lsl#4 + eor r10,r10,r5,lsl#4 + eor r9,r9,r6,lsr#2 + eor r10,r10,r5,lsr#2 + eor r9,r9,r5,lsl#30 + eor r10,r10,r6,lsl#30 + eor r9,r9,r6,lsr#7 + eor r10,r10,r5,lsr#7 + eor r9,r9,r5,lsl#25 + eor r10,r10,r6,lsl#25 @ Sigma0(a) + adds r3,r3,r9 + and r9,r5,r11 + adc r4,r4,r10 @ T += Sigma0(a) + + ldr r10,[sp,#8+4] @ b.hi + orr r5,r5,r11 + ldr r11,[sp,#16+4] @ c.hi + and r5,r5,r12 + and r12,r6,r10 + orr r6,r6,r10 + orr r5,r5,r9 @ Maj(a,b,c).lo + and r6,r6,r11 + adds r5,r5,r3 + orr r6,r6,r12 @ Maj(a,b,c).hi + sub sp,sp,#8 + adc r6,r6,r4 @ h += T + tst r14,#1 + add r14,r14,#8 +#ifdef __thumb2__ + ittt eq @ Thumb2 thing, sanity check in ARM +#endif + ldreq r9,[sp,#184+0] + ldreq r10,[sp,#184+4] + beq .L16_79 + bic r14,r14,#1 + + ldr r3,[sp,#8+0] + ldr r4,[sp,#8+4] + ldr r9, [r0,#0+LO] + ldr r10, [r0,#0+HI] + ldr r11, [r0,#8+LO] + ldr r12, [r0,#8+HI] + adds r9,r5,r9 + str r9, [r0,#0+LO] + adc r10,r6,r10 + str r10, [r0,#0+HI] + adds r11,r3,r11 + str r11, [r0,#8+LO] + adc r12,r4,r12 + str r12, [r0,#8+HI] + + ldr r5,[sp,#16+0] + ldr r6,[sp,#16+4] + ldr r3,[sp,#24+0] + ldr r4,[sp,#24+4] + ldr r9, [r0,#16+LO] + ldr r10, [r0,#16+HI] + ldr r11, [r0,#24+LO] + ldr r12, [r0,#24+HI] + adds r9,r5,r9 + str r9, [r0,#16+LO] + adc r10,r6,r10 + str r10, [r0,#16+HI] + adds r11,r3,r11 + str r11, [r0,#24+LO] + adc r12,r4,r12 + str r12, [r0,#24+HI] + + ldr r3,[sp,#40+0] + ldr r4,[sp,#40+4] + ldr r9, [r0,#32+LO] + ldr r10, [r0,#32+HI] + ldr r11, [r0,#40+LO] + ldr r12, [r0,#40+HI] + adds r7,r7,r9 + str r7,[r0,#32+LO] + adc r8,r8,r10 + str r8,[r0,#32+HI] + adds r11,r3,r11 + str r11, [r0,#40+LO] + adc r12,r4,r12 + str r12, [r0,#40+HI] + + ldr r5,[sp,#48+0] + ldr r6,[sp,#48+4] + ldr r3,[sp,#56+0] + ldr r4,[sp,#56+4] + ldr r9, [r0,#48+LO] + ldr r10, [r0,#48+HI] + ldr r11, [r0,#56+LO] + ldr r12, [r0,#56+HI] + adds r9,r5,r9 + str r9, [r0,#48+LO] + adc r10,r6,r10 + str r10, [r0,#48+HI] + adds r11,r3,r11 + str r11, [r0,#56+LO] + adc r12,r4,r12 + str r12, [r0,#56+HI] + + add sp,sp,#640 + sub r14,r14,#640 + + teq r1,r2 + bne .Loop + + add sp,sp,#8*9 @ destroy frame + +#if __ARM_ARCH__>=5 + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc} +#else + ldmia sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} + tst lr,#1 + moveq pc,lr @ be binary compatible with V4, yet +.word 0xe12fff1e @ interoperable with Thumb ISA:-) +#endif +.size zfs_sha512_block_armv7,.-zfs_sha512_block_armv7 + +#if __ARM_ARCH__ >= 7 +.arch armv7-a +.fpu neon + +.globl zfs_sha512_block_neon +.type zfs_sha512_block_neon,%function +.align 4 +zfs_sha512_block_neon: +.LNEON: + dmb @ errata #451034 on early Cortex A8 + add r2,r1,r2,lsl#7 @ len to point at the end of inp + adr r3,K512 + VFP_ABI_PUSH + vldmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ load context +.Loop_neon: + vshr.u64 d24,d20,#14 @ 0 +#if 0<16 + vld1.64 {d0},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 0>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 0<16 && defined(__ARMEL__) + vrev64.8 d0,d0 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 1 +#if 1<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 1>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 1<16 && defined(__ARMEL__) + vrev64.8 d1,d1 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 2 +#if 2<16 + vld1.64 {d2},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 2>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 2<16 && defined(__ARMEL__) + vrev64.8 d2,d2 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 3 +#if 3<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 3>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 3<16 && defined(__ARMEL__) + vrev64.8 d3,d3 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 4 +#if 4<16 + vld1.64 {d4},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 4>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 4<16 && defined(__ARMEL__) + vrev64.8 d4,d4 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 5 +#if 5<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 5>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 5<16 && defined(__ARMEL__) + vrev64.8 d5,d5 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 6 +#if 6<16 + vld1.64 {d6},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 6>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 6<16 && defined(__ARMEL__) + vrev64.8 d6,d6 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 7 +#if 7<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 7>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 7<16 && defined(__ARMEL__) + vrev64.8 d7,d7 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 d24,d20,#14 @ 8 +#if 8<16 + vld1.64 {d8},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d20,#18 +#if 8>0 + vadd.i64 d16,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d20,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 8<16 && defined(__ARMEL__) + vrev64.8 d8,d8 +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 9 +#if 9<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 9>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 9<16 && defined(__ARMEL__) + vrev64.8 d9,d9 +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 d24,d18,#14 @ 10 +#if 10<16 + vld1.64 {d10},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d18,#18 +#if 10>0 + vadd.i64 d22,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d18,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 10<16 && defined(__ARMEL__) + vrev64.8 d10,d10 +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 11 +#if 11<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 11>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 11<16 && defined(__ARMEL__) + vrev64.8 d11,d11 +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 d24,d16,#14 @ 12 +#if 12<16 + vld1.64 {d12},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d16,#18 +#if 12>0 + vadd.i64 d20,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d16,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 12<16 && defined(__ARMEL__) + vrev64.8 d12,d12 +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 13 +#if 13<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 13>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 13<16 && defined(__ARMEL__) + vrev64.8 d13,d13 +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 d24,d22,#14 @ 14 +#if 14<16 + vld1.64 {d14},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d22,#18 +#if 14>0 + vadd.i64 d18,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d22,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 14<16 && defined(__ARMEL__) + vrev64.8 d14,d14 +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 15 +#if 15<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 15>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 15<16 && defined(__ARMEL__) + vrev64.8 d15,d15 +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + mov r12,#4 +.L16_79_neon: + subs r12,#1 + vshr.u64 q12,q7,#19 + vshr.u64 q13,q7,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q7,#6 + vsli.64 q12,q7,#45 + vext.8 q14,q0,q1,#8 @ X[i+1] + vsli.64 q13,q7,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q0,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q4,q5,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q0,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q0,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 16<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d0 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 17 +#if 17<16 + vld1.64 {d1},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 17>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 17<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d1 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q0,#19 + vshr.u64 q13,q0,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q0,#6 + vsli.64 q12,q0,#45 + vext.8 q14,q1,q2,#8 @ X[i+1] + vsli.64 q13,q0,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q1,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q5,q6,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q1,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q1,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 18<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d2 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 19 +#if 19<16 + vld1.64 {d3},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 19>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 19<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d3 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q1,#19 + vshr.u64 q13,q1,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q1,#6 + vsli.64 q12,q1,#45 + vext.8 q14,q2,q3,#8 @ X[i+1] + vsli.64 q13,q1,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q2,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q6,q7,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q2,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q2,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 20<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d4 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 21 +#if 21<16 + vld1.64 {d5},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 21>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 21<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d5 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q2,#19 + vshr.u64 q13,q2,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q2,#6 + vsli.64 q12,q2,#45 + vext.8 q14,q3,q4,#8 @ X[i+1] + vsli.64 q13,q2,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q3,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q7,q0,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q3,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q3,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 22<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d6 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 23 +#if 23<16 + vld1.64 {d7},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 23>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 23<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d7 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + vshr.u64 q12,q3,#19 + vshr.u64 q13,q3,#61 + vadd.i64 d16,d30 @ h+=Maj from the past + vshr.u64 q15,q3,#6 + vsli.64 q12,q3,#45 + vext.8 q14,q4,q5,#8 @ X[i+1] + vsli.64 q13,q3,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q4,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q0,q1,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d20,#14 @ from NEON_00_15 + vadd.i64 q4,q14 + vshr.u64 d25,d20,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d20,#41 @ from NEON_00_15 + vadd.i64 q4,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d20,#50 + vsli.64 d25,d20,#46 + vmov d29,d20 + vsli.64 d26,d20,#23 +#if 24<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d21,d22 @ Ch(e,f,g) + vshr.u64 d24,d16,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d23 + vshr.u64 d25,d16,#34 + vsli.64 d24,d16,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d16,#39 + vadd.i64 d28,d8 + vsli.64 d25,d16,#30 + veor d30,d16,d17 + vsli.64 d26,d16,#25 + veor d23,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d18,d17 @ Maj(a,b,c) + veor d23,d26 @ Sigma0(a) + vadd.i64 d19,d27 + vadd.i64 d30,d27 + @ vadd.i64 d23,d30 + vshr.u64 d24,d19,#14 @ 25 +#if 25<16 + vld1.64 {d9},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d19,#18 +#if 25>0 + vadd.i64 d23,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d19,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d19,#50 + vsli.64 d25,d19,#46 + vmov d29,d19 + vsli.64 d26,d19,#23 +#if 25<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d20,d21 @ Ch(e,f,g) + vshr.u64 d24,d23,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d22 + vshr.u64 d25,d23,#34 + vsli.64 d24,d23,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d23,#39 + vadd.i64 d28,d9 + vsli.64 d25,d23,#30 + veor d30,d23,d16 + vsli.64 d26,d23,#25 + veor d22,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d17,d16 @ Maj(a,b,c) + veor d22,d26 @ Sigma0(a) + vadd.i64 d18,d27 + vadd.i64 d30,d27 + @ vadd.i64 d22,d30 + vshr.u64 q12,q4,#19 + vshr.u64 q13,q4,#61 + vadd.i64 d22,d30 @ h+=Maj from the past + vshr.u64 q15,q4,#6 + vsli.64 q12,q4,#45 + vext.8 q14,q5,q6,#8 @ X[i+1] + vsli.64 q13,q4,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q5,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q1,q2,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d18,#14 @ from NEON_00_15 + vadd.i64 q5,q14 + vshr.u64 d25,d18,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d18,#41 @ from NEON_00_15 + vadd.i64 q5,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d18,#50 + vsli.64 d25,d18,#46 + vmov d29,d18 + vsli.64 d26,d18,#23 +#if 26<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d19,d20 @ Ch(e,f,g) + vshr.u64 d24,d22,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d21 + vshr.u64 d25,d22,#34 + vsli.64 d24,d22,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d22,#39 + vadd.i64 d28,d10 + vsli.64 d25,d22,#30 + veor d30,d22,d23 + vsli.64 d26,d22,#25 + veor d21,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d16,d23 @ Maj(a,b,c) + veor d21,d26 @ Sigma0(a) + vadd.i64 d17,d27 + vadd.i64 d30,d27 + @ vadd.i64 d21,d30 + vshr.u64 d24,d17,#14 @ 27 +#if 27<16 + vld1.64 {d11},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d17,#18 +#if 27>0 + vadd.i64 d21,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d17,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d17,#50 + vsli.64 d25,d17,#46 + vmov d29,d17 + vsli.64 d26,d17,#23 +#if 27<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d18,d19 @ Ch(e,f,g) + vshr.u64 d24,d21,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d20 + vshr.u64 d25,d21,#34 + vsli.64 d24,d21,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d21,#39 + vadd.i64 d28,d11 + vsli.64 d25,d21,#30 + veor d30,d21,d22 + vsli.64 d26,d21,#25 + veor d20,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d23,d22 @ Maj(a,b,c) + veor d20,d26 @ Sigma0(a) + vadd.i64 d16,d27 + vadd.i64 d30,d27 + @ vadd.i64 d20,d30 + vshr.u64 q12,q5,#19 + vshr.u64 q13,q5,#61 + vadd.i64 d20,d30 @ h+=Maj from the past + vshr.u64 q15,q5,#6 + vsli.64 q12,q5,#45 + vext.8 q14,q6,q7,#8 @ X[i+1] + vsli.64 q13,q5,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q6,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q2,q3,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d16,#14 @ from NEON_00_15 + vadd.i64 q6,q14 + vshr.u64 d25,d16,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d16,#41 @ from NEON_00_15 + vadd.i64 q6,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d16,#50 + vsli.64 d25,d16,#46 + vmov d29,d16 + vsli.64 d26,d16,#23 +#if 28<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d17,d18 @ Ch(e,f,g) + vshr.u64 d24,d20,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d19 + vshr.u64 d25,d20,#34 + vsli.64 d24,d20,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d20,#39 + vadd.i64 d28,d12 + vsli.64 d25,d20,#30 + veor d30,d20,d21 + vsli.64 d26,d20,#25 + veor d19,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d22,d21 @ Maj(a,b,c) + veor d19,d26 @ Sigma0(a) + vadd.i64 d23,d27 + vadd.i64 d30,d27 + @ vadd.i64 d19,d30 + vshr.u64 d24,d23,#14 @ 29 +#if 29<16 + vld1.64 {d13},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d23,#18 +#if 29>0 + vadd.i64 d19,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d23,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d23,#50 + vsli.64 d25,d23,#46 + vmov d29,d23 + vsli.64 d26,d23,#23 +#if 29<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d16,d17 @ Ch(e,f,g) + vshr.u64 d24,d19,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d18 + vshr.u64 d25,d19,#34 + vsli.64 d24,d19,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d19,#39 + vadd.i64 d28,d13 + vsli.64 d25,d19,#30 + veor d30,d19,d20 + vsli.64 d26,d19,#25 + veor d18,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d21,d20 @ Maj(a,b,c) + veor d18,d26 @ Sigma0(a) + vadd.i64 d22,d27 + vadd.i64 d30,d27 + @ vadd.i64 d18,d30 + vshr.u64 q12,q6,#19 + vshr.u64 q13,q6,#61 + vadd.i64 d18,d30 @ h+=Maj from the past + vshr.u64 q15,q6,#6 + vsli.64 q12,q6,#45 + vext.8 q14,q7,q0,#8 @ X[i+1] + vsli.64 q13,q6,#3 + veor q15,q12 + vshr.u64 q12,q14,#1 + veor q15,q13 @ sigma1(X[i+14]) + vshr.u64 q13,q14,#8 + vadd.i64 q7,q15 + vshr.u64 q15,q14,#7 + vsli.64 q12,q14,#63 + vsli.64 q13,q14,#56 + vext.8 q14,q3,q4,#8 @ X[i+9] + veor q15,q12 + vshr.u64 d24,d22,#14 @ from NEON_00_15 + vadd.i64 q7,q14 + vshr.u64 d25,d22,#18 @ from NEON_00_15 + veor q15,q13 @ sigma0(X[i+1]) + vshr.u64 d26,d22,#41 @ from NEON_00_15 + vadd.i64 q7,q15 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d22,#50 + vsli.64 d25,d22,#46 + vmov d29,d22 + vsli.64 d26,d22,#23 +#if 30<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d23,d16 @ Ch(e,f,g) + vshr.u64 d24,d18,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d17 + vshr.u64 d25,d18,#34 + vsli.64 d24,d18,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d18,#39 + vadd.i64 d28,d14 + vsli.64 d25,d18,#30 + veor d30,d18,d19 + vsli.64 d26,d18,#25 + veor d17,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d20,d19 @ Maj(a,b,c) + veor d17,d26 @ Sigma0(a) + vadd.i64 d21,d27 + vadd.i64 d30,d27 + @ vadd.i64 d17,d30 + vshr.u64 d24,d21,#14 @ 31 +#if 31<16 + vld1.64 {d15},[r1]! @ handles unaligned +#endif + vshr.u64 d25,d21,#18 +#if 31>0 + vadd.i64 d17,d30 @ h+=Maj from the past +#endif + vshr.u64 d26,d21,#41 + vld1.64 {d28},[r3,:64]! @ K[i++] + vsli.64 d24,d21,#50 + vsli.64 d25,d21,#46 + vmov d29,d21 + vsli.64 d26,d21,#23 +#if 31<16 && defined(__ARMEL__) + vrev64.8 , +#endif + veor d25,d24 + vbsl d29,d22,d23 @ Ch(e,f,g) + vshr.u64 d24,d17,#28 + veor d26,d25 @ Sigma1(e) + vadd.i64 d27,d29,d16 + vshr.u64 d25,d17,#34 + vsli.64 d24,d17,#36 + vadd.i64 d27,d26 + vshr.u64 d26,d17,#39 + vadd.i64 d28,d15 + vsli.64 d25,d17,#30 + veor d30,d17,d18 + vsli.64 d26,d17,#25 + veor d16,d24,d25 + vadd.i64 d27,d28 + vbsl d30,d19,d18 @ Maj(a,b,c) + veor d16,d26 @ Sigma0(a) + vadd.i64 d20,d27 + vadd.i64 d30,d27 + @ vadd.i64 d16,d30 + bne .L16_79_neon + + vadd.i64 d16,d30 @ h+=Maj from the past + vldmia r0,{d24,d25,d26,d27,d28,d29,d30,d31} @ load context to temp + vadd.i64 q8,q12 @ vectorized accumulate + vadd.i64 q9,q13 + vadd.i64 q10,q14 + vadd.i64 q11,q15 + vstmia r0,{d16,d17,d18,d19,d20,d21,d22,d23} @ save context + teq r1,r2 + sub r3,#640 @ rewind K512 + bne .Loop_neon + + VFP_ABI_POP + bx lr @ .word 0xe12fff1e +.size zfs_sha512_block_neon,.-zfs_sha512_block_neon +#endif // #if __ARM_ARCH__ >= 7 +#endif // #if defined(__arm__) diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S new file mode 100644 index 000000000000..ae8d0fad7c83 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S @@ -0,0 +1,2823 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2022 Samuel Neves and Matthew Krupcale + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * + * This is converted assembly: SSE2 -> POWER8 PPC64 Little Endian + * Used tools: SIMDe https://github.com/simd-everywhere/simde + */ + +#if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + .text + .abiversion 2 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI0_0: + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 25 + .byte 24 + .byte 27 + .byte 26 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 17 + .byte 16 + .byte 19 + .byte 18 +.LCPI0_1: + .long 1779033703 + .long 3144134277 + .long 1013904242 + .long 2773480762 +.LCPI0_2: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_3: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI0_4: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_5: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_6: + .short 1 + .short 2 + .short 4 + .short 8 + .short 16 + .short 32 + .short 64 + .short 128 +.LCPI0_7: + .short 0 + .short 0 + .short 4 + .short 8 + .short 0 + .short 0 + .short 64 + .short 128 +.LCPI0_8: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 29 + .byte 28 +.LCPI0_9: + .short 0 + .short 0 + .short 0 + .short 0 + .short 0 + .short 0 + .short 64 + .short 128 +.LCPI0_10: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 7 + .byte 6 + .byte 5 + .byte 4 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI0_11: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI0_12: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 15 + .byte 14 + .byte 13 + .byte 12 + .byte 31 + .byte 30 + .byte 29 + .byte 28 +.LCPI0_13: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 15 + .byte 14 + .byte 13 + .byte 12 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI0_14: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .text + .globl zfs_blake3_compress_in_place_sse2 + .p2align 2 + .type zfs_blake3_compress_in_place_sse2,@function +zfs_blake3_compress_in_place_sse2: +.Lfunc_begin0: + .cfi_startproc +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry zfs_blake3_compress_in_place_sse2, .Lfunc_lep0-.Lfunc_gep0 + li 8, -64 + mtvsrd 35, 5 + li 5, 16 + lfdx 0, 0, 4 + vspltisw 12, 9 + stxvd2x 60, 1, 8 + li 8, -48 + mtvsrd 36, 7 + lfd 2, 16(4) + stxvd2x 61, 1, 8 + li 8, -32 + lfd 1, 8(4) + mtvsrwz 37, 6 + rldicl 6, 6, 32, 32 + addis 7, 2, .LCPI0_2@toc@ha + stxvd2x 62, 1, 8 + li 8, -16 + addi 7, 7, .LCPI0_2@toc@l + stxvd2x 63, 1, 8 + li 8, 0 + lvx 9, 0, 7 + li 7, 48 + mtvsrd 34, 8 + xxmrghd 32, 1, 0 + lxvd2x 0, 0, 3 + lxvd2x 1, 3, 5 + lfd 3, 24(4) + addis 8, 2, .LCPI0_5@toc@ha + vmrghb 3, 2, 3 + addi 8, 8, .LCPI0_5@toc@l + vmrghb 4, 2, 4 + vspltb 2, 2, 7 + xxmrghd 33, 3, 2 + vpkudum 7, 1, 0 + vmrglh 3, 2, 3 + vmrglh 2, 2, 4 + mtvsrwz 36, 6 + addis 6, 2, .LCPI0_0@toc@ha + addi 6, 6, .LCPI0_0@toc@l + vperm 10, 1, 0, 9 + vmrghw 4, 4, 5 + xxswapd 37, 1 + lxvd2x 1, 4, 7 + addis 7, 2, .LCPI0_8@toc@ha + addi 7, 7, .LCPI0_8@toc@l + vmrglw 2, 2, 3 + xxswapd 35, 0 + xxswapd 41, 1 + xxspltd 62, 42, 1 + vadduwm 3, 7, 3 + vadduwm 6, 3, 5 + xxmrgld 36, 34, 36 + lvx 2, 0, 6 + addis 6, 2, .LCPI0_1@toc@ha + addi 6, 6, .LCPI0_1@toc@l + xxlxor 35, 38, 36 + lvx 4, 0, 6 + li 6, 32 + lxvd2x 0, 4, 6 + addis 4, 2, .LCPI0_3@toc@ha + addis 6, 2, .LCPI0_7@toc@ha + vperm 8, 3, 3, 2 + vspltisw 3, 10 + addi 4, 4, .LCPI0_3@toc@l + addi 6, 6, .LCPI0_7@toc@l + vadduwm 3, 3, 3 + vadduwm 11, 8, 4 + xxlxor 36, 43, 37 + vadduwm 5, 6, 10 + vrlw 0, 4, 3 + vspltisw 4, 12 + vadduwm 4, 4, 4 + vadduwm 1, 0, 5 + xxlxor 37, 33, 40 + xxswapd 40, 0 + vrlw 6, 5, 4 + vspltisw 5, -16 + vpkudum 13, 9, 8 + vsubuwm 5, 12, 5 + lvx 12, 0, 4 + addis 4, 2, .LCPI0_4@toc@ha + addi 4, 4, .LCPI0_4@toc@l + vadduwm 11, 6, 11 + xxswapd 0, 38 + vadduwm 1, 1, 13 + xxsldwi 50, 45, 45, 1 + xxlxor 32, 43, 32 + xxsldwi 43, 43, 43, 3 + xxsldwi 33, 33, 33, 1 + vperm 12, 8, 9, 12 + vrlw 0, 0, 5 + vadduwm 1, 0, 1 + xxlxor 38, 33, 0 + vadduwm 1, 1, 12 + vperm 6, 6, 6, 2 + vadduwm 15, 6, 11 + lvx 11, 0, 4 + addis 4, 2, .LCPI0_6@toc@ha + addi 4, 4, .LCPI0_6@toc@l + xxlxor 32, 47, 32 + lvx 17, 0, 4 + addis 4, 2, .LCPI0_9@toc@ha + vperm 14, 10, 7, 11 + addi 4, 4, .LCPI0_9@toc@l + vrlw 0, 0, 3 + vadduwm 1, 0, 1 + xxlxor 38, 33, 38 + vrlw 6, 6, 4 + vadduwm 8, 6, 15 + xxswapd 0, 38 + lvx 6, 0, 8 + xxlxor 32, 40, 32 + xxsldwi 40, 40, 40, 1 + vperm 13, 12, 18, 6 + vrlw 9, 0, 5 + vadduwm 0, 1, 14 + lvx 1, 0, 7 + xxsldwi 46, 46, 46, 3 + xxsldwi 32, 32, 32, 3 + vperm 7, 7, 7, 1 + vadduwm 15, 9, 0 + xxlxor 32, 47, 0 + vperm 16, 0, 0, 2 + lvx 0, 0, 6 + addis 6, 2, .LCPI0_10@toc@ha + vcmpequh 0, 0, 17 + vadduwm 19, 16, 8 + xxlxor 40, 51, 41 + xxsel 45, 39, 45, 32 + vrlw 31, 8, 3 + lvx 8, 0, 4 + addis 4, 2, .LCPI0_11@toc@ha + addi 4, 4, .LCPI0_11@toc@l + vcmpequh 7, 8, 17 + vadduwm 8, 15, 13 + vadduwm 15, 31, 8 + lvx 8, 0, 4 + addi 4, 6, .LCPI0_10@toc@l + lvx 17, 0, 4 + addis 4, 2, .LCPI0_12@toc@ha + xxlxor 41, 47, 48 + xxsldwi 47, 47, 47, 1 + addi 4, 4, .LCPI0_12@toc@l + xxlnor 48, 39, 39 + vrlw 29, 9, 4 + vperm 9, 16, 16, 8 + xxland 48, 50, 39 + vperm 17, 30, 12, 17 + vperm 16, 16, 16, 8 + vmrghw 12, 12, 10 + lvx 10, 0, 4 + addis 4, 2, .LCPI0_13@toc@ha + vadduwm 19, 29, 19 + addi 4, 4, .LCPI0_13@toc@l + xxlxor 63, 51, 63 + xxsldwi 51, 51, 51, 3 + xxland 0, 49, 41 + vrlw 17, 31, 5 + xxlor 48, 0, 48 + xxswapd 0, 61 + vperm 18, 12, 18, 10 + vadduwm 15, 15, 16 + xxland 60, 48, 39 + vadduwm 15, 17, 15 + vperm 28, 28, 28, 8 + xxlxor 63, 47, 0 + vadduwm 15, 15, 18 + vperm 31, 31, 31, 2 + vperm 30, 18, 16, 6 + vadduwm 19, 31, 19 + xxlxor 44, 51, 49 + vrlw 12, 12, 3 + vadduwm 15, 12, 15 + xxlxor 49, 47, 63 + vperm 31, 13, 14, 11 + vrlw 17, 17, 4 + vperm 14, 14, 14, 1 + vadduwm 15, 15, 31 + vadduwm 19, 17, 19 + xxswapd 0, 49 + xxsldwi 47, 47, 47, 3 + xxsel 46, 46, 62, 32 + xxlxor 44, 51, 44 + xxsldwi 51, 51, 51, 1 + vrlw 12, 12, 5 + vadduwm 15, 12, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 19, 17, 19 + xxlxor 44, 51, 44 + vrlw 29, 12, 3 + vadduwm 12, 15, 14 + vadduwm 15, 29, 12 + lvx 12, 0, 4 + addis 4, 2, .LCPI0_14@toc@ha + addi 4, 4, .LCPI0_14@toc@l + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + vperm 30, 13, 18, 12 + vrlw 17, 17, 4 + vmrghw 13, 18, 13 + xxland 0, 62, 41 + vadduwm 19, 17, 19 + vperm 16, 13, 16, 10 + xxlxor 61, 51, 61 + xxsldwi 50, 51, 51, 3 + xxsldwi 51, 63, 63, 3 + vrlw 30, 29, 5 + xxlor 61, 60, 0 + xxswapd 0, 49 + vperm 31, 14, 19, 11 + vadduwm 15, 15, 29 + vperm 19, 19, 19, 1 + vadduwm 15, 30, 15 + xxlxor 49, 47, 0 + vadduwm 15, 15, 16 + vperm 17, 17, 17, 2 + vadduwm 18, 17, 18 + xxlxor 45, 50, 62 + vperm 30, 16, 29, 6 + vrlw 13, 13, 3 + vadduwm 15, 13, 15 + xxlxor 49, 47, 49 + vadduwm 15, 15, 31 + xxsldwi 63, 63, 63, 3 + vrlw 17, 17, 4 + xxsldwi 47, 47, 47, 3 + vadduwm 18, 17, 18 + xxswapd 0, 49 + xxlxor 45, 50, 45 + xxsldwi 50, 50, 50, 1 + vrlw 13, 13, 5 + vadduwm 15, 13, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 18, 17, 18 + xxlxor 45, 50, 45 + vrlw 28, 13, 3 + xxsel 45, 51, 62, 32 + xxland 51, 61, 39 + vperm 30, 14, 16, 12 + vadduwm 15, 15, 13 + vperm 19, 19, 19, 8 + vmrghw 14, 16, 14 + vadduwm 15, 28, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + xxland 0, 62, 41 + vrlw 17, 17, 4 + xxlor 51, 51, 0 + vadduwm 15, 15, 19 + vadduwm 18, 17, 18 + xxswapd 0, 49 + xxlxor 60, 50, 60 + xxsldwi 48, 50, 50, 3 + vperm 18, 14, 29, 10 + vrlw 30, 28, 5 + vperm 29, 18, 19, 6 + vadduwm 15, 30, 15 + xxlxor 49, 47, 0 + vadduwm 15, 15, 18 + vperm 17, 17, 17, 2 + vadduwm 16, 17, 16 + xxlxor 46, 48, 62 + vperm 30, 13, 31, 11 + vrlw 14, 14, 3 + vperm 31, 31, 31, 1 + vadduwm 15, 14, 15 + xxlxor 49, 47, 49 + vadduwm 15, 15, 30 + vrlw 17, 17, 4 + xxsldwi 47, 47, 47, 3 + vadduwm 16, 17, 16 + xxswapd 0, 49 + xxlxor 46, 48, 46 + xxsldwi 48, 48, 48, 1 + vrlw 14, 14, 5 + vadduwm 15, 14, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 16, 17, 16 + xxlxor 46, 48, 46 + vrlw 28, 14, 3 + xxsel 46, 63, 61, 32 + xxland 63, 51, 39 + vperm 29, 13, 18, 12 + vadduwm 15, 15, 14 + vperm 31, 31, 31, 8 + vmrghw 13, 18, 13 + vadduwm 15, 28, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + xxland 0, 61, 41 + vrlw 17, 17, 4 + xxlor 63, 63, 0 + vperm 13, 13, 19, 10 + xxsldwi 51, 62, 62, 3 + vadduwm 15, 15, 31 + vperm 30, 14, 19, 11 + vadduwm 16, 17, 16 + xxswapd 0, 49 + xxlxor 60, 48, 60 + xxsldwi 48, 48, 48, 3 + vrlw 29, 28, 5 + vadduwm 15, 29, 15 + xxlxor 49, 47, 0 + vadduwm 15, 15, 13 + vperm 17, 17, 17, 2 + vadduwm 16, 17, 16 + xxlxor 50, 48, 61 + vrlw 18, 18, 3 + vadduwm 15, 18, 15 + xxlxor 49, 47, 49 + vadduwm 15, 15, 30 + vrlw 17, 17, 4 + xxsldwi 47, 47, 47, 3 + vadduwm 11, 17, 16 + xxswapd 0, 49 + xxlxor 48, 43, 50 + xxsldwi 43, 43, 43, 1 + vperm 18, 19, 19, 1 + vrlw 16, 16, 5 + vperm 19, 13, 31, 6 + vadduwm 15, 16, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 29, 17, 11 + xxlxor 43, 61, 48 + vrlw 16, 11, 3 + xxsel 43, 50, 51, 32 + xxland 50, 63, 39 + vperm 19, 14, 13, 12 + vadduwm 15, 15, 11 + vperm 18, 18, 18, 8 + vmrghw 13, 13, 14 + vadduwm 15, 16, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + xxland 0, 51, 41 + lvx 19, 0, 4 + vrlw 17, 17, 4 + xxlor 50, 50, 0 + vperm 13, 13, 31, 10 + xxsldwi 63, 62, 62, 3 + vadduwm 15, 15, 18 + vperm 19, 11, 31, 19 + vadduwm 29, 17, 29 + xxswapd 0, 49 + vperm 1, 31, 31, 1 + xxlxor 48, 61, 48 + xxsldwi 46, 61, 61, 3 + vperm 6, 13, 18, 6 + vrlw 16, 16, 5 + xxsel 32, 33, 38, 32 + xxland 38, 50, 39 + vadduwm 15, 16, 15 + vperm 7, 11, 13, 12 + xxlxor 49, 47, 0 + vadduwm 15, 15, 13 + vperm 17, 17, 17, 2 + vperm 6, 6, 6, 8 + vadduwm 14, 17, 14 + xxlxor 48, 46, 48 + vrlw 16, 16, 3 + vadduwm 15, 16, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 3 + vrlw 17, 17, 4 + vadduwm 15, 15, 19 + vadduwm 14, 17, 14 + xxswapd 0, 49 + xxlxor 48, 46, 48 + xxsldwi 46, 46, 46, 1 + vrlw 16, 16, 5 + vadduwm 15, 16, 15 + xxlxor 49, 47, 0 + vadduwm 0, 15, 0 + vperm 17, 17, 17, 2 + xxland 0, 39, 41 + xxlor 38, 38, 0 + vadduwm 14, 17, 14 + xxlxor 48, 46, 48 + vrlw 16, 16, 3 + vadduwm 0, 16, 0 + xxlxor 33, 32, 49 + xxsldwi 32, 32, 32, 1 + vrlw 1, 1, 4 + vadduwm 0, 0, 6 + vadduwm 8, 1, 14 + xxswapd 0, 33 + xxlxor 44, 40, 48 + xxsldwi 38, 40, 40, 3 + vrlw 7, 12, 5 + vadduwm 0, 7, 0 + xxlxor 33, 32, 0 + vperm 2, 1, 1, 2 + vmrghw 1, 13, 11 + vadduwm 6, 2, 6 + vperm 1, 1, 18, 10 + xxlxor 39, 38, 39 + vrlw 3, 7, 3 + vadduwm 0, 0, 1 + vadduwm 0, 3, 0 + xxlxor 34, 32, 34 + xxsldwi 0, 32, 32, 3 + vrlw 2, 2, 4 + vadduwm 4, 2, 6 + xxswapd 2, 34 + xxlxor 35, 36, 35 + xxsldwi 1, 36, 36, 1 + vrlw 3, 3, 5 + xxlxor 0, 1, 0 + xxswapd 0, 0 + xxlxor 1, 35, 2 + stxvd2x 0, 0, 3 + xxswapd 1, 1 + stxvd2x 1, 3, 5 + li 3, -16 + lxvd2x 63, 1, 3 + li 3, -32 + lxvd2x 62, 1, 3 + li 3, -48 + lxvd2x 61, 1, 3 + li 3, -64 + lxvd2x 60, 1, 3 + blr + .long 0 + .quad 0 +.Lfunc_end0: + .size zfs_blake3_compress_in_place_sse2, .Lfunc_end0-.Lfunc_begin0 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI1_0: + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 25 + .byte 24 + .byte 27 + .byte 26 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 17 + .byte 16 + .byte 19 + .byte 18 +.LCPI1_1: + .long 1779033703 + .long 3144134277 + .long 1013904242 + .long 2773480762 +.LCPI1_2: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_3: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI1_4: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_5: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_6: + .short 1 + .short 2 + .short 4 + .short 8 + .short 16 + .short 32 + .short 64 + .short 128 +.LCPI1_7: + .short 0 + .short 0 + .short 4 + .short 8 + .short 0 + .short 0 + .short 64 + .short 128 +.LCPI1_8: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 29 + .byte 28 +.LCPI1_9: + .short 0 + .short 0 + .short 0 + .short 0 + .short 0 + .short 0 + .short 64 + .short 128 +.LCPI1_10: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 7 + .byte 6 + .byte 5 + .byte 4 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI1_11: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI1_12: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 15 + .byte 14 + .byte 13 + .byte 12 + .byte 31 + .byte 30 + .byte 29 + .byte 28 +.LCPI1_13: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 15 + .byte 14 + .byte 13 + .byte 12 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI1_14: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .text + .globl zfs_blake3_compress_xof_sse2 + .p2align 2 + .type zfs_blake3_compress_xof_sse2,@function +zfs_blake3_compress_xof_sse2: +.Lfunc_begin1: + .cfi_startproc +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry zfs_blake3_compress_xof_sse2, .Lfunc_lep1-.Lfunc_gep1 + li 9, -80 + mtvsrd 35, 5 + li 5, 16 + lfdx 0, 0, 4 + addis 10, 2, .LCPI1_2@toc@ha + vspltisw 12, 9 + std 30, -16(1) + addis 12, 2, .LCPI1_8@toc@ha + addis 30, 2, .LCPI1_5@toc@ha + addis 11, 2, .LCPI1_7@toc@ha + stxvd2x 60, 1, 9 + li 9, -64 + mtvsrd 36, 7 + lfd 2, 16(4) + addi 10, 10, .LCPI1_2@toc@l + addi 12, 12, .LCPI1_8@toc@l + addi 11, 11, .LCPI1_7@toc@l + stxvd2x 61, 1, 9 + li 9, -48 + lfd 3, 24(4) + mtvsrwz 37, 6 + rldicl 6, 6, 32, 32 + lvx 9, 0, 10 + stxvd2x 62, 1, 9 + li 9, -32 + li 10, 32 + stxvd2x 63, 1, 9 + li 9, 0 + mtvsrd 34, 9 + xxmrghd 33, 3, 2 + lfd 1, 8(4) + vmrghb 3, 2, 3 + vmrghb 4, 2, 4 + vspltb 2, 2, 7 + xxmrghd 32, 1, 0 + lxvd2x 0, 0, 3 + lxvd2x 1, 3, 5 + vpkudum 7, 1, 0 + vmrglh 3, 2, 3 + vmrglh 2, 2, 4 + mtvsrwz 36, 6 + addis 6, 2, .LCPI1_0@toc@ha + addi 6, 6, .LCPI1_0@toc@l + vperm 10, 1, 0, 9 + vmrghw 4, 4, 5 + xxswapd 37, 1 + vmrglw 2, 2, 3 + xxswapd 35, 0 + lxvd2x 0, 4, 10 + xxspltd 62, 42, 1 + vadduwm 3, 7, 3 + vadduwm 6, 3, 5 + xxmrgld 36, 34, 36 + lvx 2, 0, 6 + addis 6, 2, .LCPI1_1@toc@ha + addi 6, 6, .LCPI1_1@toc@l + xxlxor 35, 38, 36 + lvx 4, 0, 6 + li 6, 48 + lxvd2x 1, 4, 6 + addis 4, 2, .LCPI1_3@toc@ha + vperm 8, 3, 3, 2 + vspltisw 3, 10 + addi 4, 4, .LCPI1_3@toc@l + xxswapd 41, 1 + vadduwm 3, 3, 3 + vadduwm 11, 8, 4 + xxlxor 36, 43, 37 + vadduwm 5, 6, 10 + vrlw 0, 4, 3 + vspltisw 4, 12 + vadduwm 4, 4, 4 + vadduwm 1, 0, 5 + xxlxor 37, 33, 40 + xxswapd 40, 0 + vrlw 6, 5, 4 + vspltisw 5, -16 + vpkudum 13, 9, 8 + vsubuwm 5, 12, 5 + lvx 12, 0, 4 + addis 4, 2, .LCPI1_4@toc@ha + addi 4, 4, .LCPI1_4@toc@l + vadduwm 11, 6, 11 + xxswapd 0, 38 + vadduwm 1, 1, 13 + xxsldwi 50, 45, 45, 1 + xxlxor 32, 43, 32 + xxsldwi 43, 43, 43, 3 + xxsldwi 33, 33, 33, 1 + vperm 12, 8, 9, 12 + vrlw 0, 0, 5 + vadduwm 1, 0, 1 + xxlxor 38, 33, 0 + vadduwm 1, 1, 12 + vperm 6, 6, 6, 2 + vadduwm 15, 6, 11 + lvx 11, 0, 4 + addis 4, 2, .LCPI1_6@toc@ha + addi 4, 4, .LCPI1_6@toc@l + xxlxor 32, 47, 32 + lvx 17, 0, 4 + addi 4, 30, .LCPI1_5@toc@l + vperm 14, 10, 7, 11 + vrlw 0, 0, 3 + vadduwm 1, 0, 1 + xxlxor 38, 33, 38 + vrlw 6, 6, 4 + vadduwm 8, 6, 15 + xxswapd 0, 38 + lvx 6, 0, 4 + addis 4, 2, .LCPI1_9@toc@ha + addi 4, 4, .LCPI1_9@toc@l + xxlxor 32, 40, 32 + xxsldwi 40, 40, 40, 1 + vperm 13, 12, 18, 6 + vrlw 9, 0, 5 + vadduwm 0, 1, 14 + lvx 1, 0, 12 + xxsldwi 46, 46, 46, 3 + xxsldwi 32, 32, 32, 3 + vperm 7, 7, 7, 1 + vadduwm 15, 9, 0 + xxlxor 32, 47, 0 + vperm 16, 0, 0, 2 + lvx 0, 0, 11 + addis 11, 2, .LCPI1_10@toc@ha + vcmpequh 0, 0, 17 + vadduwm 19, 16, 8 + xxlxor 40, 51, 41 + xxsel 45, 39, 45, 32 + vrlw 31, 8, 3 + lvx 8, 0, 4 + addis 4, 2, .LCPI1_11@toc@ha + addi 4, 4, .LCPI1_11@toc@l + vcmpequh 7, 8, 17 + vadduwm 8, 15, 13 + vadduwm 15, 31, 8 + lvx 8, 0, 4 + addi 4, 11, .LCPI1_10@toc@l + lvx 17, 0, 4 + addis 4, 2, .LCPI1_12@toc@ha + xxlxor 41, 47, 48 + xxsldwi 47, 47, 47, 1 + addi 4, 4, .LCPI1_12@toc@l + xxlnor 48, 39, 39 + vrlw 29, 9, 4 + vperm 9, 16, 16, 8 + xxland 48, 50, 39 + vperm 17, 30, 12, 17 + vperm 16, 16, 16, 8 + vmrghw 12, 12, 10 + lvx 10, 0, 4 + addis 4, 2, .LCPI1_13@toc@ha + vadduwm 19, 29, 19 + addi 4, 4, .LCPI1_13@toc@l + xxlxor 63, 51, 63 + xxsldwi 51, 51, 51, 3 + xxland 0, 49, 41 + vrlw 17, 31, 5 + xxlor 48, 0, 48 + xxswapd 0, 61 + vperm 18, 12, 18, 10 + vadduwm 15, 15, 16 + xxland 60, 48, 39 + vadduwm 15, 17, 15 + vperm 28, 28, 28, 8 + xxlxor 63, 47, 0 + vadduwm 15, 15, 18 + vperm 31, 31, 31, 2 + vperm 30, 18, 16, 6 + vadduwm 19, 31, 19 + xxlxor 44, 51, 49 + vrlw 12, 12, 3 + vadduwm 15, 12, 15 + xxlxor 49, 47, 63 + vperm 31, 13, 14, 11 + vrlw 17, 17, 4 + vperm 14, 14, 14, 1 + vadduwm 15, 15, 31 + vadduwm 19, 17, 19 + xxswapd 0, 49 + xxsldwi 47, 47, 47, 3 + xxsel 46, 46, 62, 32 + xxlxor 44, 51, 44 + xxsldwi 51, 51, 51, 1 + vrlw 12, 12, 5 + vadduwm 15, 12, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 19, 17, 19 + xxlxor 44, 51, 44 + vrlw 29, 12, 3 + vadduwm 12, 15, 14 + vadduwm 15, 29, 12 + lvx 12, 0, 4 + addis 4, 2, .LCPI1_14@toc@ha + addi 4, 4, .LCPI1_14@toc@l + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + vperm 30, 13, 18, 12 + vrlw 17, 17, 4 + vmrghw 13, 18, 13 + xxland 0, 62, 41 + vadduwm 19, 17, 19 + vperm 16, 13, 16, 10 + xxlxor 61, 51, 61 + xxsldwi 50, 51, 51, 3 + xxsldwi 51, 63, 63, 3 + vrlw 30, 29, 5 + xxlor 61, 60, 0 + xxswapd 0, 49 + vperm 31, 14, 19, 11 + vadduwm 15, 15, 29 + vperm 19, 19, 19, 1 + vadduwm 15, 30, 15 + xxlxor 49, 47, 0 + vadduwm 15, 15, 16 + vperm 17, 17, 17, 2 + vadduwm 18, 17, 18 + xxlxor 45, 50, 62 + vperm 30, 16, 29, 6 + vrlw 13, 13, 3 + vadduwm 15, 13, 15 + xxlxor 49, 47, 49 + vadduwm 15, 15, 31 + xxsldwi 63, 63, 63, 3 + vrlw 17, 17, 4 + xxsldwi 47, 47, 47, 3 + vadduwm 18, 17, 18 + xxswapd 0, 49 + xxlxor 45, 50, 45 + xxsldwi 50, 50, 50, 1 + vrlw 13, 13, 5 + vadduwm 15, 13, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 18, 17, 18 + xxlxor 45, 50, 45 + vrlw 28, 13, 3 + xxsel 45, 51, 62, 32 + xxland 51, 61, 39 + vperm 30, 14, 16, 12 + vadduwm 15, 15, 13 + vperm 19, 19, 19, 8 + vmrghw 14, 16, 14 + vadduwm 15, 28, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + xxland 0, 62, 41 + vrlw 17, 17, 4 + xxlor 51, 51, 0 + vadduwm 15, 15, 19 + vadduwm 18, 17, 18 + xxswapd 0, 49 + xxlxor 60, 50, 60 + xxsldwi 48, 50, 50, 3 + vperm 18, 14, 29, 10 + vrlw 30, 28, 5 + vperm 29, 18, 19, 6 + vadduwm 15, 30, 15 + xxlxor 49, 47, 0 + vadduwm 15, 15, 18 + vperm 17, 17, 17, 2 + vadduwm 16, 17, 16 + xxlxor 46, 48, 62 + vperm 30, 13, 31, 11 + vrlw 14, 14, 3 + vperm 31, 31, 31, 1 + vadduwm 15, 14, 15 + xxlxor 49, 47, 49 + vadduwm 15, 15, 30 + vrlw 17, 17, 4 + xxsldwi 47, 47, 47, 3 + vadduwm 16, 17, 16 + xxswapd 0, 49 + xxlxor 46, 48, 46 + xxsldwi 48, 48, 48, 1 + vrlw 14, 14, 5 + vadduwm 15, 14, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 16, 17, 16 + xxlxor 46, 48, 46 + vrlw 28, 14, 3 + xxsel 46, 63, 61, 32 + xxland 63, 51, 39 + vperm 29, 13, 18, 12 + vadduwm 15, 15, 14 + vperm 31, 31, 31, 8 + vmrghw 13, 18, 13 + vadduwm 15, 28, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + xxland 0, 61, 41 + vrlw 17, 17, 4 + xxlor 63, 63, 0 + vperm 13, 13, 19, 10 + xxsldwi 51, 62, 62, 3 + vadduwm 15, 15, 31 + vperm 30, 14, 19, 11 + vadduwm 16, 17, 16 + xxswapd 0, 49 + xxlxor 60, 48, 60 + xxsldwi 48, 48, 48, 3 + vrlw 29, 28, 5 + vadduwm 15, 29, 15 + xxlxor 49, 47, 0 + vadduwm 15, 15, 13 + vperm 17, 17, 17, 2 + vadduwm 16, 17, 16 + xxlxor 50, 48, 61 + vrlw 18, 18, 3 + vadduwm 15, 18, 15 + xxlxor 49, 47, 49 + vadduwm 15, 15, 30 + vrlw 17, 17, 4 + xxsldwi 47, 47, 47, 3 + vadduwm 11, 17, 16 + xxswapd 0, 49 + xxlxor 48, 43, 50 + xxsldwi 43, 43, 43, 1 + vperm 18, 19, 19, 1 + vrlw 16, 16, 5 + vperm 19, 13, 31, 6 + vadduwm 15, 16, 15 + xxlxor 49, 47, 0 + vperm 17, 17, 17, 2 + vadduwm 29, 17, 11 + xxlxor 43, 61, 48 + vrlw 16, 11, 3 + xxsel 43, 50, 51, 32 + xxland 50, 63, 39 + vperm 19, 14, 13, 12 + vadduwm 15, 15, 11 + vperm 18, 18, 18, 8 + vmrghw 13, 13, 14 + vadduwm 15, 16, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 1 + xxland 0, 51, 41 + lvx 19, 0, 4 + vrlw 17, 17, 4 + xxlor 50, 50, 0 + vperm 13, 13, 31, 10 + xxsldwi 63, 62, 62, 3 + vadduwm 15, 15, 18 + vperm 19, 11, 31, 19 + vadduwm 29, 17, 29 + xxswapd 0, 49 + vperm 1, 31, 31, 1 + xxlxor 48, 61, 48 + xxsldwi 46, 61, 61, 3 + vperm 6, 13, 18, 6 + vrlw 16, 16, 5 + xxsel 32, 33, 38, 32 + xxland 38, 50, 39 + vadduwm 15, 16, 15 + vperm 7, 11, 13, 12 + xxlxor 49, 47, 0 + vadduwm 15, 15, 13 + vperm 17, 17, 17, 2 + vperm 6, 6, 6, 8 + vadduwm 14, 17, 14 + xxlxor 48, 46, 48 + vrlw 16, 16, 3 + vadduwm 15, 16, 15 + xxlxor 49, 47, 49 + xxsldwi 47, 47, 47, 3 + vrlw 17, 17, 4 + vadduwm 15, 15, 19 + vadduwm 14, 17, 14 + xxswapd 0, 49 + xxlxor 48, 46, 48 + xxsldwi 46, 46, 46, 1 + vrlw 16, 16, 5 + vadduwm 15, 16, 15 + xxlxor 49, 47, 0 + vadduwm 0, 15, 0 + vperm 17, 17, 17, 2 + xxland 0, 39, 41 + xxlor 38, 38, 0 + vadduwm 14, 17, 14 + xxlxor 48, 46, 48 + vrlw 16, 16, 3 + vadduwm 0, 16, 0 + xxlxor 33, 32, 49 + xxsldwi 32, 32, 32, 1 + vrlw 1, 1, 4 + vadduwm 0, 0, 6 + vadduwm 8, 1, 14 + xxswapd 0, 33 + xxlxor 44, 40, 48 + xxsldwi 38, 40, 40, 3 + vrlw 7, 12, 5 + vadduwm 0, 7, 0 + xxlxor 33, 32, 0 + vperm 2, 1, 1, 2 + vmrghw 1, 13, 11 + vadduwm 6, 2, 6 + vperm 1, 1, 18, 10 + xxlxor 39, 38, 39 + vrlw 3, 7, 3 + vadduwm 0, 0, 1 + vadduwm 0, 3, 0 + xxlxor 34, 32, 34 + xxsldwi 0, 32, 32, 3 + vrlw 2, 2, 4 + vadduwm 4, 2, 6 + xxswapd 2, 34 + xxlxor 35, 36, 35 + xxsldwi 1, 36, 36, 1 + vrlw 3, 3, 5 + xxlxor 0, 1, 0 + xxswapd 0, 0 + xxlxor 3, 35, 2 + stxvd2x 0, 0, 8 + xxswapd 3, 3 + stxvd2x 3, 8, 5 + lfdx 0, 0, 3 + lfd 3, 8(3) + xxmrghd 34, 3, 0 + xxlxor 0, 1, 34 + xxswapd 0, 0 + stxvd2x 0, 8, 10 + lfd 0, 16(3) + lfd 1, 24(3) + li 3, -32 + xxmrghd 34, 1, 0 + xxlxor 0, 2, 34 + xxswapd 0, 0 + stxvd2x 0, 8, 6 + lxvd2x 63, 1, 3 + li 3, -48 + ld 30, -16(1) + lxvd2x 62, 1, 3 + li 3, -64 + lxvd2x 61, 1, 3 + li 3, -80 + lxvd2x 60, 1, 3 + blr + .long 0 + .quad 0 +.Lfunc_end1: + .size zfs_blake3_compress_xof_sse2, .Lfunc_end1-.Lfunc_begin1 + .cfi_endproc + + .globl zfs_blake3_hash_many_sse2 + .p2align 2 + .type zfs_blake3_hash_many_sse2,@function +zfs_blake3_hash_many_sse2: +.Lfunc_begin2: + .cfi_startproc +.Lfunc_gep2: + addis 2, 12, .TOC.-.Lfunc_gep2@ha + addi 2, 2, .TOC.-.Lfunc_gep2@l +.Lfunc_lep2: + .localentry zfs_blake3_hash_many_sse2, .Lfunc_lep2-.Lfunc_gep2 + mfocrf 12, 32 + mflr 0 + std 0, 16(1) + stw 12, 8(1) + stdu 1, -256(1) + .cfi_def_cfa_offset 256 + .cfi_offset lr, 16 + .cfi_offset r17, -120 + .cfi_offset r18, -112 + .cfi_offset r19, -104 + .cfi_offset r20, -96 + .cfi_offset r21, -88 + .cfi_offset r22, -80 + .cfi_offset r23, -72 + .cfi_offset r24, -64 + .cfi_offset r25, -56 + .cfi_offset r26, -48 + .cfi_offset r27, -40 + .cfi_offset r28, -32 + .cfi_offset r29, -24 + .cfi_offset r30, -16 + .cfi_offset cr2, 8 + std 26, 208(1) + mr 26, 4 + cmpldi 1, 4, 4 + andi. 4, 8, 1 + std 18, 144(1) + std 19, 152(1) + crmove 8, 1 + ld 19, 360(1) + lwz 18, 352(1) + std 24, 192(1) + std 25, 200(1) + std 27, 216(1) + std 28, 224(1) + mr 24, 10 + mr 28, 6 + mr 27, 5 + mr 25, 3 + std 29, 232(1) + std 30, 240(1) + mr 30, 9 + mr 29, 7 + std 17, 136(1) + std 20, 160(1) + std 21, 168(1) + std 22, 176(1) + std 23, 184(1) + blt 1, .LBB2_3 + li 3, 0 + li 4, 1 + clrldi 23, 30, 32 + isel 22, 4, 3, 8 + clrldi 21, 24, 32 + clrldi 20, 18, 32 +.LBB2_2: + mr 3, 25 + mr 4, 27 + mr 5, 28 + mr 6, 29 + mr 7, 22 + mr 8, 23 + mr 9, 21 + mr 10, 20 + std 19, 32(1) + bl blake3_hash4_sse2 + addi 26, 26, -4 + addi 3, 29, 4 + addi 25, 25, 32 + addi 19, 19, 128 + cmpldi 26, 3 + isel 29, 3, 29, 8 + bgt 0, .LBB2_2 +.LBB2_3: + cmpldi 26, 0 + beq 0, .LBB2_11 + li 3, 0 + li 4, 1 + or 21, 24, 30 + li 20, 16 + addi 24, 1, 96 + isel 22, 4, 3, 8 +.LBB2_5: + lxvd2x 0, 28, 20 + ld 23, 0(25) + mr 17, 27 + mr 3, 21 + stxvd2x 0, 24, 20 + lxvd2x 0, 0, 28 + stxvd2x 0, 0, 24 +.LBB2_6: + cmpldi 17, 1 + beq 0, .LBB2_8 + cmpldi 17, 0 + bne 0, .LBB2_9 + b .LBB2_10 +.LBB2_8: + or 3, 3, 18 +.LBB2_9: + clrldi 7, 3, 56 + mr 3, 24 + mr 4, 23 + li 5, 64 + mr 6, 29 + bl zfs_blake3_compress_in_place_sse2 + addi 23, 23, 64 + addi 17, 17, -1 + mr 3, 30 + b .LBB2_6 +.LBB2_10: + lxvd2x 0, 24, 20 + addi 26, 26, -1 + add 29, 29, 22 + addi 25, 25, 8 + cmpldi 26, 0 + stxvd2x 0, 19, 20 + lxvd2x 0, 0, 24 + stxvd2x 0, 0, 19 + addi 19, 19, 32 + bne 0, .LBB2_5 +.LBB2_11: + ld 30, 240(1) + ld 29, 232(1) + ld 28, 224(1) + ld 27, 216(1) + ld 26, 208(1) + ld 25, 200(1) + ld 24, 192(1) + ld 23, 184(1) + ld 22, 176(1) + ld 21, 168(1) + ld 20, 160(1) + ld 19, 152(1) + ld 18, 144(1) + ld 17, 136(1) + addi 1, 1, 256 + ld 0, 16(1) + lwz 12, 8(1) + mtocrf 32, 12 + mtlr 0 + blr + .long 0 + .quad 0 +.Lfunc_end2: + .size zfs_blake3_hash_many_sse2, .Lfunc_end2-.Lfunc_begin2 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI3_0: + .quad 4294967296 + .quad 12884901890 +.LCPI3_1: + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 25 + .byte 24 + .byte 27 + .byte 26 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 17 + .byte 16 + .byte 19 + .byte 18 +.LCPI3_2: + .long 1779033703 + .long 1779033703 + .long 1779033703 + .long 1779033703 +.LCPI3_3: + .long 3144134277 + .long 3144134277 + .long 3144134277 + .long 3144134277 +.LCPI3_4: + .long 1013904242 + .long 1013904242 + .long 1013904242 + .long 1013904242 +.LCPI3_5: + .long 2773480762 + .long 2773480762 + .long 2773480762 + .long 2773480762 + .text + .p2align 2 + .type blake3_hash4_sse2,@function +blake3_hash4_sse2: +.Lfunc_begin3: + .cfi_startproc +.Lfunc_gep3: + addis 2, 12, .TOC.-.Lfunc_gep3@ha + addi 2, 2, .TOC.-.Lfunc_gep3@l +.Lfunc_lep3: + .localentry blake3_hash4_sse2, .Lfunc_lep3-.Lfunc_gep3 + stdu 1, -400(1) + .cfi_def_cfa_offset 400 + .cfi_offset r22, -152 + .cfi_offset r23, -144 + .cfi_offset r24, -136 + .cfi_offset r25, -128 + .cfi_offset r26, -120 + .cfi_offset r27, -112 + .cfi_offset r28, -104 + .cfi_offset r29, -96 + .cfi_offset r30, -88 + .cfi_offset f23, -72 + .cfi_offset f24, -64 + .cfi_offset f25, -56 + .cfi_offset f26, -48 + .cfi_offset f27, -40 + .cfi_offset f28, -32 + .cfi_offset f29, -24 + .cfi_offset f30, -16 + .cfi_offset f31, -8 + .cfi_offset v20, -352 + .cfi_offset v21, -336 + .cfi_offset v22, -320 + .cfi_offset v23, -304 + .cfi_offset v24, -288 + .cfi_offset v25, -272 + .cfi_offset v26, -256 + .cfi_offset v27, -240 + .cfi_offset v28, -224 + .cfi_offset v29, -208 + .cfi_offset v30, -192 + .cfi_offset v31, -176 + li 11, 48 + li 0, 8 + std 30, 312(1) + li 30, 12 + li 12, 4 + lfiwzx 0, 0, 5 + stxvd2x 52, 1, 11 + li 11, 64 + lfiwzx 2, 5, 0 + li 0, 20 + lfiwzx 3, 5, 30 + stxvd2x 53, 1, 11 + li 11, 80 + li 30, 24 + lfiwzx 4, 5, 0 + li 0, 28 + stxvd2x 54, 1, 11 + li 11, 96 + lfiwzx 1, 5, 12 + lfiwzx 6, 5, 30 + xxspltw 45, 0, 1 + cmpldi 4, 0 + std 22, 248(1) + stxvd2x 55, 1, 11 + li 11, 112 + lfiwzx 7, 5, 0 + xxspltw 40, 2, 1 + std 23, 256(1) + xxspltw 38, 3, 1 + xxspltw 50, 4, 1 + std 24, 264(1) + std 25, 272(1) + std 26, 280(1) + xxspltw 54, 7, 1 + std 27, 288(1) + std 28, 296(1) + std 29, 304(1) + stxvd2x 56, 1, 11 + li 11, 128 + stfd 23, 328(1) + stxvd2x 57, 1, 11 + li 11, 144 + stfd 24, 336(1) + stxvd2x 58, 1, 11 + li 11, 160 + stfd 25, 344(1) + stxvd2x 59, 1, 11 + li 11, 176 + xxspltw 59, 1, 1 + stxvd2x 60, 1, 11 + li 11, 192 + stfd 26, 352(1) + stxvd2x 61, 1, 11 + li 11, 208 + stfd 27, 360(1) + stxvd2x 62, 1, 11 + li 11, 224 + xxspltw 62, 6, 1 + stxvd2x 63, 1, 11 + li 11, 16 + stfd 28, 368(1) + lfiwzx 5, 5, 11 + ld 5, 432(1) + stfd 29, 376(1) + stfd 30, 384(1) + stfd 31, 392(1) + xxspltw 61, 5, 1 + beq 0, .LBB3_5 + addis 30, 2, .LCPI3_0@toc@ha + neg 7, 7 + xxleqv 34, 34, 34 + addis 28, 2, .LCPI3_2@toc@ha + addis 27, 2, .LCPI3_3@toc@ha + addis 26, 2, .LCPI3_4@toc@ha + addis 25, 2, .LCPI3_5@toc@ha + ld 29, 24(3) + addi 0, 30, .LCPI3_0@toc@l + mtfprwz 1, 7 + addis 7, 2, .LCPI3_1@toc@ha + ld 30, 16(3) + lxvd2x 0, 0, 0 + mtfprwz 2, 6 + rldicl 6, 6, 32, 32 + addi 0, 7, .LCPI3_1@toc@l + ld 7, 8(3) + vslw 2, 2, 2 + lvx 5, 0, 0 + addi 0, 28, .LCPI3_2@toc@l + addi 28, 27, .LCPI3_3@toc@l + addi 27, 26, .LCPI3_4@toc@l + addi 26, 25, .LCPI3_5@toc@l + or 25, 9, 8 + li 9, 0 + xxspltw 36, 2, 1 + xxswapd 35, 0 + xxspltw 0, 1, 1 + xxland 35, 0, 35 + mtfprwz 0, 6 + ld 6, 0(3) + addi 3, 3, -8 + vadduwm 4, 3, 4 + xxlor 35, 35, 34 + xxlxor 34, 36, 34 + xxlor 9, 36, 36 + vspltisw 4, 4 + vcmpgtsw 2, 3, 2 + xxspltw 35, 0, 1 + xxlor 10, 36, 36 + vsubuwm 2, 3, 2 + xxlor 11, 34, 34 + lvx 2, 0, 0 + li 0, 32 + xxlor 12, 34, 34 + lvx 2, 0, 28 + li 28, 48 + xxlor 13, 34, 34 + lvx 2, 0, 27 + li 27, 0 + xxlor 31, 34, 34 + lvx 2, 0, 26 + xxlor 30, 34, 34 +.LBB3_2: + mr 26, 27 + addi 27, 27, 1 + xxlor 28, 40, 40 + cmpld 27, 4 + sldi 26, 26, 6 + xxlor 24, 45, 45 + iseleq 24, 10, 9 + add 23, 6, 26 + add 22, 30, 26 + lxvd2x 0, 6, 26 + lxvd2x 1, 7, 26 + or 25, 24, 25 + add 24, 7, 26 + lxvd2x 2, 30, 26 + lxvd2x 3, 29, 26 + xxlor 29, 38, 38 + lxvd2x 4, 23, 11 + lxvd2x 6, 24, 11 + clrlwi 25, 25, 24 + lxvd2x 7, 22, 11 + lxvd2x 8, 23, 0 + mtfprd 5, 25 + add 25, 29, 26 + xxswapd 34, 0 + lxvd2x 0, 25, 11 + xxswapd 36, 1 + xxswapd 33, 2 + lxvd2x 1, 24, 0 + lxvd2x 2, 22, 0 + xxswapd 39, 3 + xxswapd 32, 4 + lxvd2x 3, 25, 0 + lxvd2x 4, 23, 28 + xxswapd 49, 6 + xxswapd 51, 7 + lxvd2x 6, 24, 28 + xxswapd 58, 8 + lxvd2x 7, 22, 28 + lxvd2x 8, 25, 28 + xxswapd 60, 0 + mr 25, 3 + xxswapd 57, 1 + xxswapd 53, 2 + xxswapd 52, 3 + xxswapd 56, 4 + xxswapd 55, 6 + xxswapd 0, 5 + xxswapd 40, 7 + xxswapd 41, 8 + mtctr 12 +.LBB3_3: + ldu 24, 8(25) + add 24, 24, 26 + addi 24, 24, 256 + dcbt 0, 24 + bdnz .LBB3_3 + vmrgew 3, 4, 2 + vspltisw 31, 9 + mr 25, 8 + vmrglw 10, 4, 2 + vspltisw 14, 10 + vmrghw 6, 4, 2 + xxspltw 0, 0, 3 + vmrgew 4, 17, 0 + vmrglw 11, 17, 0 + vmrghw 16, 17, 0 + vmrgew 0, 25, 26 + vmrgew 13, 7, 1 + vmrglw 2, 7, 1 + vmrghw 7, 7, 1 + xxlor 25, 36, 36 + vmrgew 4, 28, 19 + xxlor 26, 32, 32 + vmrglw 0, 25, 26 + vmrglw 1, 28, 19 + xxmrgld 47, 34, 42 + xxlor 44, 28, 28 + vmrghw 25, 25, 26 + xxlor 23, 36, 36 + vmrghw 4, 28, 19 + vspltisw 19, -16 + xxlor 5, 32, 32 + vmrgew 0, 20, 21 + xxmrgld 34, 33, 43 + vmrglw 28, 20, 21 + vmrghw 21, 20, 21 + vmrglw 20, 23, 24 + vmrghw 26, 23, 24 + vmrglw 17, 9, 8 + xxlor 8, 32, 32 + vmrgew 0, 23, 24 + xxmrgld 56, 39, 38 + vmrgew 23, 9, 8 + xxlor 33, 24, 24 + xxlor 2, 34, 34 + vadduwm 11, 15, 1 + xxmrgld 33, 36, 48 + xxlor 6, 47, 47 + xxlor 27, 32, 32 + vmrghw 0, 9, 8 + vspltisw 9, 12 + vsubuwm 8, 31, 19 + xxmrgld 51, 23, 25 + vadduwm 31, 2, 12 + xxlor 34, 10, 10 + vadduwm 10, 14, 14 + vslw 15, 2, 2 + xxlor 34, 29, 29 + vadduwm 14, 24, 27 + xxlor 24, 48, 48 + vadduwm 16, 1, 2 + xxmrgld 34, 45, 35 + vadduwm 31, 31, 30 + xxmrghd 36, 36, 24 + vadduwm 11, 11, 29 + vadduwm 14, 14, 18 + vadduwm 13, 16, 22 + xxlxor 47, 63, 47 + xxlor 1, 9, 9 + xxlor 1, 11, 11 + xxlxor 48, 43, 9 + vadduwm 11, 11, 2 + xxlor 7, 34, 34 + xxmrghd 34, 39, 38 + xxlxor 39, 46, 11 + xxlor 1, 50, 50 + xxlxor 50, 45, 0 + vperm 15, 15, 15, 5 + vperm 16, 16, 16, 5 + vperm 7, 7, 7, 5 + vperm 18, 18, 18, 5 + xxlor 4, 33, 33 + xxlor 33, 31, 31 + vadduwm 14, 14, 2 + xxlor 3, 34, 34 + xxlor 34, 12, 12 + xxlor 35, 13, 13 + vadduwm 6, 15, 1 + xxlor 33, 30, 30 + vadduwm 2, 16, 2 + vadduwm 3, 7, 3 + vadduwm 12, 18, 1 + xxlxor 59, 34, 61 + xxlxor 61, 35, 1 + xxlxor 33, 38, 62 + xxlxor 62, 44, 54 + vrlw 22, 27, 10 + vrlw 29, 29, 10 + vrlw 1, 1, 10 + vrlw 30, 30, 10 + vadduwm 31, 31, 19 + vadduwm 13, 13, 4 + vadduwm 11, 22, 11 + vadduwm 14, 29, 14 + vadduwm 31, 1, 31 + vadduwm 13, 30, 13 + vadduwm 9, 9, 9 + xxlor 1, 36, 36 + xxlxor 48, 43, 48 + xxlxor 36, 46, 39 + xxmrgld 39, 60, 5 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vrlw 16, 16, 9 + vrlw 28, 4, 9 + xxmrgld 36, 53, 57 + vrlw 15, 15, 9 + xxmrghd 57, 53, 57 + vrlw 18, 18, 9 + vadduwm 14, 14, 4 + xxlor 0, 36, 36 + xxmrgld 36, 49, 52 + vadduwm 2, 16, 2 + xxmrgld 49, 8, 26 + vadduwm 3, 28, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 54, 34, 54 + xxlxor 61, 35, 61 + xxlxor 33, 38, 33 + xxlxor 62, 44, 62 + vrlw 29, 29, 8 + vrlw 20, 1, 8 + xxmrgld 33, 55, 27 + vrlw 30, 30, 8 + vrlw 22, 22, 8 + vadduwm 11, 11, 7 + xxlor 5, 39, 39 + xxmrgld 39, 32, 58 + vadduwm 31, 31, 4 + vadduwm 11, 29, 11 + vadduwm 13, 13, 7 + vadduwm 14, 20, 14 + vadduwm 31, 30, 31 + vadduwm 13, 22, 13 + xxlor 28, 36, 36 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 60 + xxlxor 47, 45, 47 + vperm 18, 18, 18, 5 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vadduwm 11, 11, 17 + vmr 28, 17 + xxmrghd 49, 32, 58 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 21, 4, 2 + vadduwm 3, 15, 3 + xxlxor 34, 38, 61 + xxlxor 61, 44, 52 + xxlxor 62, 53, 62 + xxlxor 54, 35, 54 + vrlw 20, 2, 10 + vrlw 29, 29, 10 + vrlw 0, 30, 10 + vrlw 30, 22, 10 + vadduwm 14, 14, 25 + vadduwm 31, 31, 1 + vadduwm 13, 13, 17 + vadduwm 11, 20, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vrlw 18, 18, 9 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vadduwm 11, 11, 24 + xxlor 8, 56, 56 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 21 + vadduwm 3, 15, 3 + xxlxor 55, 38, 52 + xxlxor 61, 44, 61 + xxlxor 62, 35, 62 + xxlxor 32, 56, 32 + vrlw 30, 30, 8 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + xxlor 25, 51, 51 + vmr 26, 17 + xxlor 49, 3, 3 + xxlor 52, 1, 1 + xxlor 51, 2, 2 + vadduwm 14, 14, 17 + vadduwm 31, 31, 20 + vadduwm 13, 13, 19 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vperm 18, 18, 18, 5 + xxlor 29, 39, 39 + xxlor 59, 4, 4 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 30, 30, 10 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + xxlor 53, 0, 0 + xxlor 39, 6, 6 + vadduwm 11, 11, 27 + vadduwm 14, 14, 21 + vadduwm 31, 31, 7 + vadduwm 13, 13, 1 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vrlw 18, 18, 9 + xxlor 34, 7, 7 + vadduwm 31, 31, 28 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vrlw 30, 30, 8 + vadduwm 11, 11, 2 + xxlor 34, 28, 28 + vadduwm 13, 13, 26 + vadduwm 14, 14, 2 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vperm 18, 18, 18, 5 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + xxlor 2, 58, 58 + xxlor 39, 25, 25 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 32, 56, 32 + xxlxor 62, 35, 62 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + vrlw 30, 30, 10 + xxlor 54, 29, 29 + xxlor 58, 5, 5 + vadduwm 11, 11, 25 + vadduwm 14, 14, 7 + vadduwm 31, 31, 22 + vadduwm 13, 13, 26 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vrlw 18, 18, 9 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vadduwm 11, 11, 17 + vadduwm 14, 14, 21 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 62, 35, 62 + xxlxor 32, 56, 32 + vrlw 30, 30, 8 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vadduwm 31, 31, 1 + vadduwm 13, 13, 20 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vperm 18, 18, 18, 5 + xxlor 0, 33, 33 + xxlor 33, 8, 8 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 30, 30, 10 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + vadduwm 11, 11, 19 + vadduwm 14, 14, 2 + vadduwm 31, 31, 1 + vadduwm 13, 13, 22 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vrlw 18, 18, 9 + vadduwm 11, 11, 27 + vadduwm 14, 14, 28 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vrlw 30, 30, 8 + vadduwm 31, 31, 25 + vadduwm 13, 13, 26 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vperm 18, 18, 18, 5 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + xxlor 3, 7, 7 + vadduwm 11, 11, 7 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 32, 56, 32 + xxlxor 62, 35, 62 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + vrlw 30, 30, 10 + xxlor 33, 6, 6 + xxlor 58, 2, 2 + xxlor 39, 3, 3 + vadduwm 14, 14, 1 + vadduwm 31, 31, 26 + vadduwm 13, 13, 7 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vrlw 18, 18, 9 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + xxlor 52, 0, 0 + vadduwm 11, 11, 21 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 62, 35, 62 + xxlxor 32, 56, 32 + vrlw 30, 30, 8 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vadduwm 14, 14, 2 + vadduwm 31, 31, 22 + vadduwm 13, 13, 20 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vperm 18, 18, 18, 5 + xxlor 7, 49, 49 + vmr 17, 2 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 30, 30, 10 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + xxlor 54, 1, 1 + xxlor 34, 7, 7 + vadduwm 11, 11, 22 + vadduwm 14, 14, 28 + vadduwm 31, 31, 2 + vadduwm 13, 13, 26 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vrlw 18, 18, 9 + xxlor 59, 25, 25 + vadduwm 11, 11, 19 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vrlw 30, 30, 8 + vadduwm 14, 14, 25 + vadduwm 31, 31, 27 + vadduwm 13, 13, 7 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vperm 18, 18, 18, 5 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vmr 2, 19 + xxlor 0, 7, 7 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 32, 56, 32 + xxlxor 62, 35, 62 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + vrlw 30, 30, 10 + xxlor 1, 51, 51 + xxlor 7, 39, 39 + xxlor 51, 8, 8 + xxlor 39, 5, 5 + xxlor 34, 4, 4 + vadduwm 11, 11, 1 + vadduwm 14, 14, 19 + vadduwm 31, 31, 7 + vadduwm 13, 13, 2 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vrlw 18, 18, 9 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + xxlor 2, 53, 53 + vmr 21, 28 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 62, 35, 62 + xxlxor 32, 56, 32 + vrlw 30, 30, 8 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + xxlor 53, 29, 29 + vadduwm 11, 11, 17 + vadduwm 14, 14, 28 + vadduwm 31, 31, 26 + vadduwm 13, 13, 21 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vperm 18, 18, 18, 5 + vadduwm 11, 11, 20 + xxlor 5, 52, 52 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 30, 30, 10 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + xxlor 52, 2, 2 + vadduwm 14, 14, 25 + vadduwm 31, 31, 20 + vadduwm 13, 13, 7 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vrlw 18, 18, 9 + vadduwm 11, 11, 22 + vadduwm 14, 14, 27 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vrlw 30, 30, 8 + vadduwm 31, 31, 1 + vadduwm 13, 13, 2 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vperm 18, 18, 18, 5 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + xxlor 3, 29, 29 + xxlor 4, 49, 49 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 32, 56, 32 + xxlxor 62, 35, 62 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + vrlw 30, 30, 10 + vmr 17, 28 + xxlor 2, 54, 54 + xxlor 3, 34, 34 + xxlor 34, 8, 8 + xxlor 51, 0, 0 + xxlor 60, 7, 7 + xxlor 54, 1, 1 + vadduwm 11, 11, 2 + vadduwm 14, 14, 19 + vadduwm 31, 31, 28 + vadduwm 13, 13, 22 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vrlw 18, 18, 9 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vadduwm 11, 11, 17 + vadduwm 14, 14, 25 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 62, 35, 62 + xxlxor 32, 56, 32 + vrlw 30, 30, 8 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vadduwm 31, 31, 7 + vadduwm 13, 13, 26 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vperm 18, 18, 18, 5 + xxlor 6, 39, 39 + xxlor 39, 4, 4 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 30, 30, 10 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + vadduwm 11, 11, 21 + vadduwm 14, 14, 27 + vadduwm 31, 31, 7 + vadduwm 13, 13, 28 + vadduwm 11, 30, 11 + vadduwm 14, 23, 14 + vadduwm 31, 29, 31 + vadduwm 13, 0, 13 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 63, 47 + xxlxor 50, 45, 50 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vrlw 18, 18, 9 + xxlor 0, 49, 49 + xxlor 49, 5, 5 + vadduwm 24, 16, 24 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 62, 56, 62 + xxlxor 55, 35, 55 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 23, 23, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + vrlw 30, 30, 8 + vadduwm 11, 11, 17 + vadduwm 14, 14, 1 + vadduwm 31, 31, 2 + vadduwm 13, 13, 22 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vperm 18, 18, 18, 5 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + xxlor 34, 3, 3 + xxlor 49, 2, 2 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 24, 4, 24 + vadduwm 3, 15, 3 + xxlxor 55, 38, 55 + xxlxor 61, 44, 61 + xxlxor 32, 56, 32 + xxlxor 62, 35, 62 + vrlw 23, 23, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + vrlw 30, 30, 10 + vadduwm 11, 11, 19 + vadduwm 14, 14, 20 + vadduwm 31, 31, 2 + vadduwm 13, 13, 17 + vadduwm 11, 23, 11 + vadduwm 14, 29, 14 + vadduwm 31, 0, 31 + vadduwm 13, 30, 13 + xxlxor 50, 43, 50 + xxlxor 48, 46, 48 + xxlxor 36, 63, 36 + xxlxor 47, 45, 47 + vrlw 18, 18, 9 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vadduwm 14, 14, 27 + vadduwm 11, 11, 25 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 27, 4, 24 + vadduwm 3, 15, 3 + xxlxor 57, 38, 55 + xxlxor 61, 44, 61 + xxlxor 62, 35, 62 + xxlxor 32, 59, 32 + xxlor 39, 7, 7 + vrlw 30, 30, 8 + vrlw 25, 25, 8 + vrlw 29, 29, 8 + vrlw 0, 0, 8 + xxlor 1, 58, 58 + vmr 26, 19 + vadduwm 19, 31, 7 + xxlor 39, 6, 6 + vadduwm 11, 30, 11 + vadduwm 7, 13, 7 + vadduwm 13, 25, 14 + vadduwm 14, 29, 19 + vadduwm 7, 0, 7 + xxlxor 48, 43, 48 + xxlxor 36, 45, 36 + xxlxor 47, 46, 47 + xxlxor 50, 39, 50 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + vperm 18, 18, 18, 5 + xxlor 51, 1, 1 + vadduwm 13, 13, 1 + vadduwm 11, 11, 19 + vadduwm 19, 16, 27 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 63, 51, 62 + xxlxor 62, 35, 57 + xxlxor 61, 38, 61 + xxlxor 32, 44, 32 + vrlw 31, 31, 10 + vrlw 30, 30, 10 + vrlw 29, 29, 10 + vrlw 0, 0, 10 + xxlor 33, 0, 0 + vadduwm 7, 7, 2 + vadduwm 14, 14, 1 + vadduwm 11, 31, 11 + vadduwm 13, 30, 13 + vadduwm 14, 29, 14 + vadduwm 7, 0, 7 + xxlxor 48, 43, 48 + xxlxor 36, 45, 36 + xxlxor 47, 46, 47 + xxlxor 50, 39, 50 + vrlw 16, 16, 9 + vrlw 4, 4, 9 + vrlw 15, 15, 9 + vrlw 18, 18, 9 + xxlor 60, 8, 8 + vadduwm 1, 11, 21 + vadduwm 11, 13, 28 + vadduwm 13, 16, 19 + vadduwm 3, 4, 3 + vadduwm 6, 15, 6 + vadduwm 12, 18, 12 + xxlxor 51, 45, 63 + xxlxor 63, 35, 62 + xxlxor 62, 38, 61 + xxlxor 32, 44, 32 + vrlw 31, 31, 8 + vrlw 30, 30, 8 + vrlw 0, 0, 8 + vrlw 19, 19, 8 + vadduwm 14, 14, 26 + vadduwm 7, 7, 17 + vadduwm 1, 31, 1 + vadduwm 11, 30, 11 + vadduwm 14, 0, 14 + vadduwm 7, 19, 7 + xxlxor 50, 33, 50 + xxlxor 48, 43, 48 + xxlxor 36, 46, 36 + xxlxor 47, 39, 47 + vperm 18, 18, 18, 5 + vperm 16, 16, 16, 5 + vperm 4, 4, 4, 5 + vperm 15, 15, 15, 5 + xxlor 34, 4, 4 + vadduwm 14, 14, 22 + vadduwm 6, 18, 6 + vadduwm 12, 16, 12 + vadduwm 13, 4, 13 + vadduwm 3, 15, 3 + xxlxor 49, 38, 63 + xxlxor 63, 44, 62 + xxlxor 32, 45, 32 + xxlxor 51, 35, 51 + vrlw 17, 17, 10 + vrlw 31, 31, 10 + vrlw 0, 0, 10 + vrlw 10, 19, 10 + vadduwm 11, 11, 2 + xxlor 34, 5, 5 + vadduwm 1, 1, 20 + vadduwm 2, 7, 2 + vadduwm 7, 31, 11 + vadduwm 11, 0, 14 + vadduwm 2, 10, 2 + vadduwm 1, 17, 1 + xxlxor 36, 43, 36 + xxlxor 46, 34, 47 + vrlw 4, 4, 9 + vrlw 14, 14, 9 + xxlxor 47, 33, 50 + xxlxor 48, 39, 48 + vrlw 15, 15, 9 + vrlw 9, 16, 9 + vadduwm 13, 4, 13 + vadduwm 3, 14, 3 + xxlxor 32, 45, 32 + xxlxor 45, 45, 33 + xxlxor 33, 35, 42 + xxlxor 59, 35, 39 + vadduwm 3, 15, 6 + vadduwm 6, 9, 12 + xxlxor 39, 35, 49 + xxlxor 42, 38, 63 + vrlw 1, 1, 8 + vrlw 7, 7, 8 + vrlw 10, 10, 8 + vrlw 0, 0, 8 + xxlxor 40, 35, 43 + xxlxor 38, 38, 34 + xxlxor 61, 33, 41 + xxlxor 50, 39, 36 + xxlxor 62, 42, 46 + xxlxor 54, 32, 47 + bne 0, .LBB3_2 +.LBB3_5: + vmrglw 2, 27, 13 + li 3, 32 + li 4, 48 + vmrglw 4, 6, 8 + vmrglw 0, 18, 29 + vmrglw 1, 22, 30 + vmrghw 3, 27, 13 + vmrghw 5, 6, 8 + vmrghw 6, 18, 29 + vmrghw 7, 22, 30 + xxmrgld 40, 36, 34 + xxmrghd 34, 36, 34 + xxmrgld 41, 33, 32 + xxswapd 0, 40 + xxmrgld 36, 37, 35 + xxmrghd 35, 37, 35 + xxmrghd 37, 33, 32 + xxswapd 1, 41 + xxmrgld 32, 39, 38 + xxmrghd 33, 39, 38 + xxswapd 2, 34 + xxswapd 4, 36 + xxswapd 3, 37 + stxvd2x 0, 0, 5 + xxswapd 5, 32 + stxvd2x 1, 5, 11 + xxswapd 0, 35 + xxswapd 1, 33 + stxvd2x 2, 5, 3 + li 3, 64 + stxvd2x 3, 5, 4 + li 4, 80 + stxvd2x 4, 5, 3 + li 3, 96 + stxvd2x 5, 5, 4 + li 4, 112 + stxvd2x 0, 5, 3 + stxvd2x 1, 5, 4 + li 3, 224 + lxvd2x 63, 1, 3 + li 3, 208 + lfd 31, 392(1) + ld 30, 312(1) + ld 29, 304(1) + lxvd2x 62, 1, 3 + li 3, 192 + lfd 30, 384(1) + ld 28, 296(1) + ld 27, 288(1) + lxvd2x 61, 1, 3 + li 3, 176 + lfd 29, 376(1) + ld 26, 280(1) + ld 25, 272(1) + lxvd2x 60, 1, 3 + li 3, 160 + lfd 28, 368(1) + ld 24, 264(1) + ld 23, 256(1) + lxvd2x 59, 1, 3 + li 3, 144 + lfd 27, 360(1) + ld 22, 248(1) + lxvd2x 58, 1, 3 + li 3, 128 + lfd 26, 352(1) + lxvd2x 57, 1, 3 + li 3, 112 + lfd 25, 344(1) + lxvd2x 56, 1, 3 + li 3, 96 + lfd 24, 336(1) + lxvd2x 55, 1, 3 + li 3, 80 + lfd 23, 328(1) + lxvd2x 54, 1, 3 + li 3, 64 + lxvd2x 53, 1, 3 + li 3, 48 + lxvd2x 52, 1, 3 + addi 1, 1, 400 + blr + .long 0 + .quad 0 +.Lfunc_end3: + .size blake3_hash4_sse2, .Lfunc_end3-.Lfunc_begin3 + .cfi_endproc + .section ".note.GNU-stack","",@progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S new file mode 100644 index 000000000000..315561d4497a --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S @@ -0,0 +1,3064 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2022 Samuel Neves + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * + * This is converted assembly: SSE4.1 -> POWER8 PPC64 Little Endian + * Used tools: SIMDe https://github.com/simd-everywhere/simde + */ + +#if (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + .text + .abiversion 2 + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI0_0: + .byte 31 + .byte 14 + .byte 13 + .byte 12 + .byte 30 + .byte 10 + .byte 9 + .byte 8 + .byte 29 + .byte 6 + .byte 5 + .byte 4 + .byte 28 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_1: + .byte 2 + .byte 3 + .byte 0 + .byte 1 + .byte 6 + .byte 7 + .byte 4 + .byte 5 + .byte 10 + .byte 11 + .byte 8 + .byte 9 + .byte 14 + .byte 15 + .byte 12 + .byte 13 +.LCPI0_2: + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 25 + .byte 24 + .byte 27 + .byte 26 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 17 + .byte 16 + .byte 19 + .byte 18 +.LCPI0_3: + .long 1779033703 + .long 3144134277 + .long 1013904242 + .long 2773480762 +.LCPI0_4: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_5: + .byte 1 + .byte 2 + .byte 3 + .byte 0 + .byte 5 + .byte 6 + .byte 7 + .byte 4 + .byte 9 + .byte 10 + .byte 11 + .byte 8 + .byte 13 + .byte 14 + .byte 15 + .byte 12 +.LCPI0_6: + .byte 30 + .byte 29 + .byte 28 + .byte 31 + .byte 26 + .byte 25 + .byte 24 + .byte 27 + .byte 22 + .byte 21 + .byte 20 + .byte 23 + .byte 18 + .byte 17 + .byte 16 + .byte 19 +.LCPI0_7: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI0_8: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_9: + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_10: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 31 + .byte 31 + .byte 31 + .byte 31 +.LCPI0_11: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI0_12: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI0_13: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 15 + .byte 14 + .byte 13 + .byte 12 + .byte 31 + .byte 30 + .byte 29 + .byte 28 +.LCPI0_14: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .text + .globl zfs_blake3_compress_in_place_sse41 + .p2align 2 + .type zfs_blake3_compress_in_place_sse41,@function +zfs_blake3_compress_in_place_sse41: +.Lfunc_begin0: + .cfi_startproc +.Lfunc_gep0: + addis 2, 12, .TOC.-.Lfunc_gep0@ha + addi 2, 2, .TOC.-.Lfunc_gep0@l +.Lfunc_lep0: + .localentry zfs_blake3_compress_in_place_sse41, .Lfunc_lep0-.Lfunc_gep0 + li 8, -64 + mtvsrd 34, 5 + li 5, 16 + lfdx 0, 0, 4 + vspltisw 13, -16 + stxvd2x 60, 1, 8 + li 8, -48 + mtvsrd 35, 7 + lfd 2, 16(4) + lfd 3, 24(4) + addis 7, 2, .LCPI0_0@toc@ha + stxvd2x 61, 1, 8 + li 8, -32 + mtvsrwz 36, 6 + rldicl 6, 6, 32, 32 + stxvd2x 62, 1, 8 + li 8, -16 + vmrghb 2, 3, 2 + stxvd2x 63, 1, 8 + mtvsrwz 35, 6 + addi 6, 7, .LCPI0_0@toc@l + addis 7, 2, .LCPI0_2@toc@ha + lfd 1, 8(4) + xxmrghd 32, 3, 2 + lvx 6, 0, 6 + xxlxor 33, 33, 33 + addis 6, 2, .LCPI0_1@toc@ha + addi 7, 7, .LCPI0_2@toc@l + vmrghw 3, 3, 4 + addi 6, 6, .LCPI0_1@toc@l + vspltisw 14, 9 + xxmrghd 37, 1, 0 + lxvd2x 0, 0, 3 + lxvd2x 1, 3, 5 + vperm 2, 1, 2, 6 + vpkudum 9, 0, 5 + xxswapd 36, 0 + xxswapd 38, 1 + xxmrgld 34, 34, 35 + lvx 3, 0, 7 + addis 7, 2, .LCPI0_4@toc@ha + addi 7, 7, .LCPI0_4@toc@l + vadduwm 4, 9, 4 + lvx 11, 0, 7 + addis 7, 2, .LCPI0_6@toc@ha + addi 7, 7, .LCPI0_6@toc@l + vadduwm 7, 4, 6 + lvx 4, 0, 6 + addis 6, 2, .LCPI0_3@toc@ha + addi 6, 6, .LCPI0_3@toc@l + vperm 11, 0, 5, 11 + lvx 0, 0, 7 + li 7, 48 + xxlxor 40, 39, 34 + lvx 10, 0, 6 + addis 6, 2, .LCPI0_5@toc@ha + lxvd2x 1, 4, 7 + vcmpgtsb 2, 1, 4 + addi 6, 6, .LCPI0_5@toc@l + vperm 4, 8, 8, 3 + vspltisw 8, 10 + xxlandc 44, 36, 34 + vadduwm 4, 8, 8 + vadduwm 8, 12, 10 + xxlxor 37, 40, 38 + vrlw 6, 5, 4 + vadduwm 5, 7, 11 + vadduwm 7, 6, 5 + lvx 5, 0, 6 + li 6, 32 + lxvd2x 0, 4, 6 + addis 4, 2, .LCPI0_7@toc@ha + addis 6, 2, .LCPI0_9@toc@ha + xxlxor 42, 39, 44 + xxswapd 44, 1 + addi 4, 4, .LCPI0_7@toc@l + addi 6, 6, .LCPI0_9@toc@l + vcmpgtsb 5, 1, 5 + vperm 1, 10, 10, 0 + xxswapd 42, 0 + vpkudum 16, 12, 10 + xxlandc 47, 33, 37 + vsubuwm 1, 14, 13 + lvx 14, 0, 4 + addis 4, 2, .LCPI0_8@toc@ha + vadduwm 8, 15, 8 + xxswapd 45, 47 + addi 4, 4, .LCPI0_8@toc@l + vadduwm 7, 7, 16 + xxsldwi 48, 48, 48, 1 + xxlxor 38, 40, 38 + xxsldwi 40, 40, 40, 3 + xxsldwi 39, 39, 39, 1 + vperm 14, 10, 12, 14 + vrlw 6, 6, 1 + vadduwm 7, 6, 7 + xxlxor 45, 39, 45 + vperm 13, 13, 13, 3 + xxlandc 45, 45, 34 + vadduwm 8, 13, 8 + xxlxor 38, 40, 38 + vrlw 10, 6, 4 + vadduwm 6, 7, 14 + vadduwm 7, 10, 6 + xxlxor 38, 39, 45 + vperm 12, 6, 6, 0 + lvx 6, 0, 4 + addis 4, 2, .LCPI0_10@toc@ha + addi 4, 4, .LCPI0_10@toc@l + vperm 13, 11, 9, 6 + xxlandc 44, 44, 37 + vadduwm 15, 12, 8 + vadduwm 7, 7, 13 + xxsldwi 45, 45, 45, 3 + xxlxor 40, 47, 42 + xxsldwi 47, 47, 47, 1 + xxsldwi 39, 39, 39, 3 + vrlw 10, 8, 1 + xxswapd 40, 44 + vadduwm 17, 10, 7 + lvx 7, 0, 4 + addis 4, 2, .LCPI0_11@toc@ha + addi 4, 4, .LCPI0_11@toc@l + xxlxor 44, 49, 40 + lvx 8, 0, 6 + vperm 18, 9, 9, 7 + lvx 9, 0, 4 + addis 4, 2, .LCPI0_12@toc@ha + vperm 12, 12, 12, 3 + addi 4, 4, .LCPI0_12@toc@l + vperm 19, 14, 16, 8 + xxlandc 63, 44, 34 + vperm 12, 19, 18, 9 + vadduwm 15, 31, 15 + xxlxor 42, 47, 42 + vrlw 18, 10, 4 + vadduwm 10, 17, 12 + vadduwm 17, 18, 10 + xxlxor 42, 49, 63 + xxmrgld 63, 43, 46 + xxsldwi 49, 49, 49, 1 + vmrghw 14, 14, 11 + vperm 19, 10, 10, 0 + lvx 10, 0, 4 + addis 4, 2, .LCPI0_13@toc@ha + addi 4, 4, .LCPI0_13@toc@l + lvx 11, 0, 4 + addis 4, 2, .LCPI0_14@toc@ha + vperm 31, 16, 31, 10 + addi 4, 4, .LCPI0_14@toc@l + vperm 14, 14, 16, 11 + xxlandc 51, 51, 37 + vadduwm 15, 19, 15 + xxswapd 51, 51 + vadduwm 17, 17, 31 + xxlxor 50, 47, 50 + xxsldwi 47, 47, 47, 3 + vperm 30, 14, 31, 8 + vrlw 18, 18, 1 + vadduwm 17, 18, 17 + xxlxor 51, 49, 51 + vadduwm 17, 17, 14 + vperm 19, 19, 19, 3 + xxlandc 51, 51, 34 + vadduwm 15, 19, 15 + xxlxor 48, 47, 50 + vrlw 16, 16, 4 + vadduwm 17, 16, 17 + xxlxor 50, 49, 51 + vperm 19, 12, 13, 6 + vperm 18, 18, 18, 0 + vperm 13, 13, 13, 7 + vadduwm 17, 17, 19 + xxlandc 50, 50, 37 + xxsldwi 49, 49, 49, 3 + vperm 13, 30, 13, 9 + vadduwm 15, 18, 15 + xxswapd 50, 50 + xxmrgld 62, 44, 46 + vmrghw 12, 14, 12 + xxlxor 48, 47, 48 + xxsldwi 47, 47, 47, 1 + vrlw 16, 16, 1 + vperm 30, 31, 30, 10 + vperm 12, 12, 31, 11 + vadduwm 17, 16, 17 + xxlxor 50, 49, 50 + vadduwm 17, 17, 13 + vperm 18, 18, 18, 3 + vperm 31, 12, 30, 8 + xxlandc 50, 50, 34 + vadduwm 15, 18, 15 + xxlxor 48, 47, 48 + vrlw 16, 16, 4 + vadduwm 17, 16, 17 + xxlxor 50, 49, 50 + xxsldwi 49, 49, 49, 1 + vperm 18, 18, 18, 0 + vadduwm 17, 17, 30 + xxlandc 50, 50, 37 + vadduwm 15, 18, 15 + xxswapd 50, 50 + xxlxor 48, 47, 48 + xxsldwi 46, 47, 47, 3 + vrlw 16, 16, 1 + vadduwm 17, 16, 17 + xxlxor 50, 49, 50 + vadduwm 17, 17, 12 + vperm 18, 18, 18, 3 + xxlandc 47, 50, 34 + xxsldwi 50, 51, 51, 3 + vadduwm 14, 15, 14 + vperm 19, 13, 18, 6 + xxlxor 48, 46, 48 + vperm 18, 18, 18, 7 + vrlw 16, 16, 4 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + vadduwm 17, 17, 19 + vperm 15, 15, 15, 0 + xxsldwi 49, 49, 49, 3 + xxlandc 47, 47, 37 + vadduwm 14, 15, 14 + xxswapd 47, 47 + xxlxor 48, 46, 48 + xxsldwi 46, 46, 46, 1 + vrlw 16, 16, 1 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + vperm 15, 15, 15, 3 + xxlandc 47, 47, 34 + vadduwm 29, 15, 14 + vperm 14, 31, 18, 9 + xxmrgld 50, 45, 44 + xxlxor 48, 61, 48 + vmrghw 12, 12, 13 + vrlw 16, 16, 4 + vperm 18, 30, 18, 10 + vadduwm 17, 17, 14 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + xxsldwi 49, 49, 49, 1 + vperm 15, 15, 15, 0 + vadduwm 17, 17, 18 + xxlandc 47, 47, 37 + vadduwm 31, 15, 29 + xxswapd 47, 47 + xxlxor 48, 63, 48 + xxsldwi 45, 63, 63, 3 + vperm 31, 12, 30, 11 + vrlw 16, 16, 1 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + vperm 15, 15, 15, 3 + xxlandc 47, 47, 34 + vadduwm 13, 15, 13 + xxlxor 44, 45, 48 + vadduwm 16, 17, 31 + xxsldwi 49, 51, 51, 3 + vrlw 12, 12, 4 + vperm 19, 14, 17, 6 + vadduwm 16, 12, 16 + xxlxor 47, 48, 47 + vperm 15, 15, 15, 0 + xxlandc 47, 47, 37 + vadduwm 13, 15, 13 + xxswapd 47, 47 + xxlxor 44, 45, 44 + xxsldwi 45, 45, 45, 1 + vrlw 30, 12, 1 + vadduwm 12, 16, 19 + xxsldwi 44, 44, 44, 3 + vadduwm 16, 30, 12 + xxlxor 44, 48, 47 + vperm 15, 17, 17, 7 + vperm 12, 12, 12, 3 + vperm 17, 31, 18, 8 + xxlandc 61, 44, 34 + vperm 12, 17, 15, 9 + vadduwm 13, 29, 13 + xxlxor 47, 45, 62 + xxmrgld 62, 46, 63 + vmrghw 14, 31, 14 + vrlw 15, 15, 4 + vadduwm 16, 16, 12 + vperm 30, 18, 30, 10 + vperm 14, 14, 18, 11 + xxsldwi 50, 51, 51, 3 + vadduwm 16, 15, 16 + xxlxor 49, 48, 61 + xxsldwi 48, 48, 48, 1 + vperm 19, 12, 18, 6 + vperm 17, 17, 17, 0 + vadduwm 16, 16, 30 + xxmrgld 60, 44, 46 + vmrghw 12, 14, 12 + vperm 28, 30, 28, 10 + xxlandc 49, 49, 37 + vadduwm 13, 17, 13 + xxswapd 49, 49 + vperm 12, 12, 30, 11 + xxlxor 47, 45, 47 + xxsldwi 45, 45, 45, 3 + vrlw 15, 15, 1 + vperm 8, 12, 28, 8 + vadduwm 16, 15, 16 + xxlxor 49, 48, 49 + vadduwm 16, 16, 14 + vperm 17, 17, 17, 3 + xxlandc 49, 49, 34 + vadduwm 13, 17, 13 + xxlxor 47, 45, 47 + vrlw 15, 15, 4 + vadduwm 16, 15, 16 + xxlxor 49, 48, 49 + vperm 17, 17, 17, 0 + xxlandc 49, 49, 37 + vadduwm 31, 17, 13 + xxlxor 45, 63, 47 + vrlw 15, 13, 1 + vadduwm 13, 16, 19 + xxswapd 48, 49 + xxsldwi 51, 51, 51, 3 + xxsldwi 45, 45, 45, 3 + vadduwm 17, 15, 13 + xxlxor 45, 49, 48 + lvx 16, 0, 4 + vperm 29, 13, 13, 3 + vperm 13, 18, 18, 7 + xxsldwi 50, 63, 63, 1 + vperm 16, 14, 30, 16 + vperm 7, 19, 19, 7 + xxlandc 63, 61, 34 + vadduwm 18, 31, 18 + vperm 29, 16, 13, 9 + xxlxor 47, 50, 47 + vperm 6, 16, 19, 6 + vrlw 15, 15, 4 + vperm 7, 8, 7, 9 + vadduwm 17, 17, 29 + xxmrgld 41, 61, 44 + vadduwm 17, 15, 17 + vperm 9, 28, 9, 10 + xxlxor 63, 49, 63 + xxsldwi 49, 49, 49, 1 + vperm 31, 31, 31, 0 + vadduwm 17, 17, 28 + xxlandc 63, 63, 37 + vadduwm 18, 31, 18 + xxswapd 63, 63 + xxlxor 47, 50, 47 + xxsldwi 46, 50, 50, 3 + vrlw 15, 15, 1 + vadduwm 17, 15, 17 + xxlxor 63, 49, 63 + vadduwm 17, 17, 12 + vperm 31, 31, 31, 3 + xxlandc 50, 63, 34 + vadduwm 14, 18, 14 + xxlxor 47, 46, 47 + vrlw 15, 15, 4 + vadduwm 17, 15, 17 + xxlxor 50, 49, 50 + vadduwm 6, 17, 6 + vperm 18, 18, 18, 0 + xxsldwi 38, 38, 38, 3 + xxlandc 50, 50, 37 + vadduwm 14, 18, 14 + xxswapd 48, 50 + xxlxor 47, 46, 47 + xxsldwi 46, 46, 46, 1 + vrlw 15, 15, 1 + vadduwm 6, 15, 6 + xxlxor 48, 38, 48 + vadduwm 6, 6, 7 + vperm 16, 16, 16, 3 + xxlandc 48, 48, 34 + vadduwm 14, 16, 14 + xxlxor 40, 46, 47 + vrlw 8, 8, 4 + vadduwm 6, 8, 6 + xxlxor 39, 38, 48 + xxsldwi 38, 38, 38, 1 + vperm 7, 7, 7, 0 + vadduwm 6, 6, 9 + xxlandc 39, 39, 37 + vadduwm 14, 7, 14 + xxswapd 39, 39 + xxlxor 40, 46, 40 + xxsldwi 41, 46, 46, 3 + vrlw 8, 8, 1 + vadduwm 6, 8, 6 + xxlxor 39, 38, 39 + vperm 3, 7, 7, 3 + vmrghw 7, 12, 13 + xxlandc 34, 35, 34 + vperm 7, 7, 28, 11 + vadduwm 3, 2, 9 + xxlxor 40, 35, 40 + vrlw 4, 8, 4 + vadduwm 6, 6, 7 + vadduwm 6, 4, 6 + xxlxor 34, 38, 34 + xxsldwi 0, 38, 38, 3 + vperm 2, 2, 2, 0 + xxlandc 34, 34, 37 + vadduwm 3, 2, 3 + xxswapd 34, 34 + xxlxor 36, 35, 36 + xxsldwi 1, 35, 35, 1 + vrlw 4, 4, 1 + xxlxor 0, 1, 0 + xxswapd 0, 0 + xxlxor 1, 36, 34 + stxvd2x 0, 0, 3 + xxswapd 1, 1 + stxvd2x 1, 3, 5 + li 3, -16 + lxvd2x 63, 1, 3 + li 3, -32 + lxvd2x 62, 1, 3 + li 3, -48 + lxvd2x 61, 1, 3 + li 3, -64 + lxvd2x 60, 1, 3 + blr + .long 0 + .quad 0 +.Lfunc_end0: + .size zfs_blake3_compress_in_place_sse41, .Lfunc_end0-.Lfunc_begin0 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI1_0: + .byte 31 + .byte 14 + .byte 13 + .byte 12 + .byte 30 + .byte 10 + .byte 9 + .byte 8 + .byte 29 + .byte 6 + .byte 5 + .byte 4 + .byte 28 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_1: + .byte 2 + .byte 3 + .byte 0 + .byte 1 + .byte 6 + .byte 7 + .byte 4 + .byte 5 + .byte 10 + .byte 11 + .byte 8 + .byte 9 + .byte 14 + .byte 15 + .byte 12 + .byte 13 +.LCPI1_2: + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 25 + .byte 24 + .byte 27 + .byte 26 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 17 + .byte 16 + .byte 19 + .byte 18 +.LCPI1_3: + .long 1779033703 + .long 3144134277 + .long 1013904242 + .long 2773480762 +.LCPI1_4: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_5: + .byte 1 + .byte 2 + .byte 3 + .byte 0 + .byte 5 + .byte 6 + .byte 7 + .byte 4 + .byte 9 + .byte 10 + .byte 11 + .byte 8 + .byte 13 + .byte 14 + .byte 15 + .byte 12 +.LCPI1_6: + .byte 30 + .byte 29 + .byte 28 + .byte 31 + .byte 26 + .byte 25 + .byte 24 + .byte 27 + .byte 22 + .byte 21 + .byte 20 + .byte 23 + .byte 18 + .byte 17 + .byte 16 + .byte 19 +.LCPI1_7: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI1_8: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_9: + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_10: + .byte 19 + .byte 18 + .byte 17 + .byte 16 + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 31 + .byte 31 + .byte 31 + .byte 31 +.LCPI1_11: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 +.LCPI1_12: + .byte 31 + .byte 30 + .byte 29 + .byte 28 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 27 + .byte 26 + .byte 25 + .byte 24 +.LCPI1_13: + .byte 27 + .byte 26 + .byte 25 + .byte 24 + .byte 11 + .byte 10 + .byte 9 + .byte 8 + .byte 15 + .byte 14 + .byte 13 + .byte 12 + .byte 31 + .byte 30 + .byte 29 + .byte 28 +.LCPI1_14: + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 21 + .byte 20 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .byte 3 + .byte 2 + .byte 1 + .byte 0 + .text + .globl zfs_blake3_compress_xof_sse41 + .p2align 2 + .type zfs_blake3_compress_xof_sse41,@function +zfs_blake3_compress_xof_sse41: +.Lfunc_begin1: + .cfi_startproc +.Lfunc_gep1: + addis 2, 12, .TOC.-.Lfunc_gep1@ha + addi 2, 2, .TOC.-.Lfunc_gep1@l +.Lfunc_lep1: + .localentry zfs_blake3_compress_xof_sse41, .Lfunc_lep1-.Lfunc_gep1 + li 9, -64 + mtvsrd 34, 5 + li 5, 16 + lfdx 0, 0, 4 + vspltisw 13, -16 + addis 11, 2, .LCPI1_9@toc@ha + stxvd2x 60, 1, 9 + li 9, -48 + mtvsrd 35, 7 + lfd 1, 8(4) + lfd 2, 16(4) + addis 7, 2, .LCPI1_0@toc@ha + stxvd2x 61, 1, 9 + li 9, -32 + mtvsrwz 36, 6 + rldicl 6, 6, 32, 32 + stxvd2x 62, 1, 9 + li 9, -16 + vmrghb 2, 3, 2 + stxvd2x 63, 1, 9 + mtvsrwz 35, 6 + addi 6, 7, .LCPI1_0@toc@l + addis 7, 2, .LCPI1_2@toc@ha + lfd 3, 24(4) + xxmrghd 37, 1, 0 + lvx 6, 0, 6 + xxlxor 33, 33, 33 + lxvd2x 0, 0, 3 + addis 6, 2, .LCPI1_1@toc@ha + addi 7, 7, .LCPI1_2@toc@l + vmrghw 3, 3, 4 + lxvd2x 1, 3, 5 + addi 6, 6, .LCPI1_1@toc@l + vspltisw 14, 9 + xxmrghd 32, 3, 2 + xxswapd 36, 0 + vperm 2, 1, 2, 6 + xxswapd 38, 1 + vpkudum 9, 0, 5 + xxmrgld 34, 34, 35 + lvx 3, 0, 7 + addis 7, 2, .LCPI1_4@toc@ha + addi 7, 7, .LCPI1_4@toc@l + vadduwm 4, 9, 4 + lvx 11, 0, 7 + addis 7, 2, .LCPI1_6@toc@ha + addi 7, 7, .LCPI1_6@toc@l + vadduwm 7, 4, 6 + lvx 4, 0, 6 + addis 6, 2, .LCPI1_3@toc@ha + addi 6, 6, .LCPI1_3@toc@l + vperm 11, 0, 5, 11 + lvx 0, 0, 7 + li 7, 32 + xxlxor 40, 39, 34 + lvx 10, 0, 6 + addis 6, 2, .LCPI1_5@toc@ha + lxvd2x 0, 4, 7 + vcmpgtsb 2, 1, 4 + addi 6, 6, .LCPI1_5@toc@l + vperm 4, 8, 8, 3 + vspltisw 8, 10 + xxlandc 44, 36, 34 + vadduwm 4, 8, 8 + vadduwm 8, 12, 10 + xxlxor 37, 40, 38 + vrlw 6, 5, 4 + vadduwm 5, 7, 11 + vadduwm 7, 6, 5 + lvx 5, 0, 6 + li 6, 48 + lxvd2x 1, 4, 6 + addis 4, 2, .LCPI1_7@toc@ha + xxlxor 42, 39, 44 + addi 4, 4, .LCPI1_7@toc@l + vcmpgtsb 5, 1, 5 + vperm 1, 10, 10, 0 + xxswapd 42, 0 + xxswapd 44, 1 + vpkudum 16, 12, 10 + xxlandc 47, 33, 37 + vsubuwm 1, 14, 13 + lvx 14, 0, 4 + addis 4, 2, .LCPI1_8@toc@ha + vadduwm 8, 15, 8 + xxswapd 45, 47 + addi 4, 4, .LCPI1_8@toc@l + xxlxor 38, 40, 38 + xxsldwi 40, 40, 40, 3 + vadduwm 7, 7, 16 + xxsldwi 48, 48, 48, 1 + vrlw 6, 6, 1 + xxsldwi 39, 39, 39, 1 + vperm 14, 10, 12, 14 + vadduwm 7, 6, 7 + xxlxor 45, 39, 45 + vperm 13, 13, 13, 3 + xxlandc 45, 45, 34 + vadduwm 8, 13, 8 + xxlxor 38, 40, 38 + vrlw 10, 6, 4 + vadduwm 6, 7, 14 + vadduwm 7, 10, 6 + xxlxor 38, 39, 45 + vperm 12, 6, 6, 0 + lvx 6, 0, 4 + addis 4, 2, .LCPI1_10@toc@ha + addi 4, 4, .LCPI1_10@toc@l + vperm 13, 11, 9, 6 + xxlandc 44, 44, 37 + vadduwm 15, 12, 8 + vadduwm 7, 7, 13 + xxsldwi 45, 45, 45, 3 + xxlxor 40, 47, 42 + xxsldwi 47, 47, 47, 1 + xxsldwi 39, 39, 39, 3 + vrlw 10, 8, 1 + xxswapd 40, 44 + vadduwm 17, 10, 7 + lvx 7, 0, 4 + addi 4, 11, .LCPI1_9@toc@l + xxlxor 44, 49, 40 + lvx 8, 0, 4 + addis 4, 2, .LCPI1_11@toc@ha + vperm 18, 9, 9, 7 + addi 4, 4, .LCPI1_11@toc@l + vperm 12, 12, 12, 3 + lvx 9, 0, 4 + addis 4, 2, .LCPI1_12@toc@ha + vperm 19, 14, 16, 8 + addi 4, 4, .LCPI1_12@toc@l + xxlandc 63, 44, 34 + vperm 12, 19, 18, 9 + vadduwm 15, 31, 15 + xxlxor 42, 47, 42 + vrlw 18, 10, 4 + vadduwm 10, 17, 12 + vadduwm 17, 18, 10 + xxlxor 42, 49, 63 + xxmrgld 63, 43, 46 + xxsldwi 49, 49, 49, 1 + vmrghw 14, 14, 11 + vperm 19, 10, 10, 0 + lvx 10, 0, 4 + addis 4, 2, .LCPI1_13@toc@ha + addi 4, 4, .LCPI1_13@toc@l + lvx 11, 0, 4 + addis 4, 2, .LCPI1_14@toc@ha + vperm 31, 16, 31, 10 + addi 4, 4, .LCPI1_14@toc@l + vperm 14, 14, 16, 11 + xxlandc 51, 51, 37 + vadduwm 15, 19, 15 + xxswapd 51, 51 + vadduwm 17, 17, 31 + xxlxor 50, 47, 50 + xxsldwi 47, 47, 47, 3 + vperm 30, 14, 31, 8 + vrlw 18, 18, 1 + vadduwm 17, 18, 17 + xxlxor 51, 49, 51 + vadduwm 17, 17, 14 + vperm 19, 19, 19, 3 + xxlandc 51, 51, 34 + vadduwm 15, 19, 15 + xxlxor 48, 47, 50 + vrlw 16, 16, 4 + vadduwm 17, 16, 17 + xxlxor 50, 49, 51 + vperm 19, 12, 13, 6 + vperm 18, 18, 18, 0 + vperm 13, 13, 13, 7 + vadduwm 17, 17, 19 + xxlandc 50, 50, 37 + xxsldwi 49, 49, 49, 3 + vperm 13, 30, 13, 9 + vadduwm 15, 18, 15 + xxswapd 50, 50 + xxmrgld 62, 44, 46 + vmrghw 12, 14, 12 + xxlxor 48, 47, 48 + xxsldwi 47, 47, 47, 1 + vrlw 16, 16, 1 + vperm 30, 31, 30, 10 + vperm 12, 12, 31, 11 + vadduwm 17, 16, 17 + xxlxor 50, 49, 50 + vadduwm 17, 17, 13 + vperm 18, 18, 18, 3 + vperm 31, 12, 30, 8 + xxlandc 50, 50, 34 + vadduwm 15, 18, 15 + xxlxor 48, 47, 48 + vrlw 16, 16, 4 + vadduwm 17, 16, 17 + xxlxor 50, 49, 50 + xxsldwi 49, 49, 49, 1 + vperm 18, 18, 18, 0 + vadduwm 17, 17, 30 + xxlandc 50, 50, 37 + vadduwm 15, 18, 15 + xxswapd 50, 50 + xxlxor 48, 47, 48 + xxsldwi 46, 47, 47, 3 + vrlw 16, 16, 1 + vadduwm 17, 16, 17 + xxlxor 50, 49, 50 + vadduwm 17, 17, 12 + vperm 18, 18, 18, 3 + xxlandc 47, 50, 34 + xxsldwi 50, 51, 51, 3 + vadduwm 14, 15, 14 + vperm 19, 13, 18, 6 + xxlxor 48, 46, 48 + vperm 18, 18, 18, 7 + vrlw 16, 16, 4 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + vadduwm 17, 17, 19 + vperm 15, 15, 15, 0 + xxsldwi 49, 49, 49, 3 + xxlandc 47, 47, 37 + vadduwm 14, 15, 14 + xxswapd 47, 47 + xxlxor 48, 46, 48 + xxsldwi 46, 46, 46, 1 + vrlw 16, 16, 1 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + vperm 15, 15, 15, 3 + xxlandc 47, 47, 34 + vadduwm 29, 15, 14 + vperm 14, 31, 18, 9 + xxmrgld 50, 45, 44 + xxlxor 48, 61, 48 + vmrghw 12, 12, 13 + vrlw 16, 16, 4 + vperm 18, 30, 18, 10 + vadduwm 17, 17, 14 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + xxsldwi 49, 49, 49, 1 + vperm 15, 15, 15, 0 + vadduwm 17, 17, 18 + xxlandc 47, 47, 37 + vadduwm 31, 15, 29 + xxswapd 47, 47 + xxlxor 48, 63, 48 + xxsldwi 45, 63, 63, 3 + vperm 31, 12, 30, 11 + vrlw 16, 16, 1 + vadduwm 17, 16, 17 + xxlxor 47, 49, 47 + vperm 15, 15, 15, 3 + xxlandc 47, 47, 34 + vadduwm 13, 15, 13 + xxlxor 44, 45, 48 + vadduwm 16, 17, 31 + xxsldwi 49, 51, 51, 3 + vrlw 12, 12, 4 + vperm 19, 14, 17, 6 + vadduwm 16, 12, 16 + xxlxor 47, 48, 47 + vperm 15, 15, 15, 0 + xxlandc 47, 47, 37 + vadduwm 13, 15, 13 + xxswapd 47, 47 + xxlxor 44, 45, 44 + xxsldwi 45, 45, 45, 1 + vrlw 30, 12, 1 + vadduwm 12, 16, 19 + xxsldwi 44, 44, 44, 3 + vadduwm 16, 30, 12 + xxlxor 44, 48, 47 + vperm 15, 17, 17, 7 + vperm 12, 12, 12, 3 + vperm 17, 31, 18, 8 + xxlandc 61, 44, 34 + vperm 12, 17, 15, 9 + vadduwm 13, 29, 13 + xxlxor 47, 45, 62 + xxmrgld 62, 46, 63 + vmrghw 14, 31, 14 + vrlw 15, 15, 4 + vadduwm 16, 16, 12 + vperm 30, 18, 30, 10 + vperm 14, 14, 18, 11 + xxsldwi 50, 51, 51, 3 + vadduwm 16, 15, 16 + xxlxor 49, 48, 61 + xxsldwi 48, 48, 48, 1 + vperm 19, 12, 18, 6 + vperm 17, 17, 17, 0 + vadduwm 16, 16, 30 + xxmrgld 60, 44, 46 + vmrghw 12, 14, 12 + vperm 28, 30, 28, 10 + xxlandc 49, 49, 37 + vadduwm 13, 17, 13 + xxswapd 49, 49 + vperm 12, 12, 30, 11 + xxlxor 47, 45, 47 + xxsldwi 45, 45, 45, 3 + vrlw 15, 15, 1 + vperm 8, 12, 28, 8 + vadduwm 16, 15, 16 + xxlxor 49, 48, 49 + vadduwm 16, 16, 14 + vperm 17, 17, 17, 3 + xxlandc 49, 49, 34 + vadduwm 13, 17, 13 + xxlxor 47, 45, 47 + vrlw 15, 15, 4 + vadduwm 16, 15, 16 + xxlxor 49, 48, 49 + vperm 17, 17, 17, 0 + xxlandc 49, 49, 37 + vadduwm 31, 17, 13 + xxlxor 45, 63, 47 + vrlw 15, 13, 1 + vadduwm 13, 16, 19 + xxswapd 48, 49 + xxsldwi 51, 51, 51, 3 + xxsldwi 45, 45, 45, 3 + vadduwm 17, 15, 13 + xxlxor 45, 49, 48 + lvx 16, 0, 4 + vperm 29, 13, 13, 3 + vperm 13, 18, 18, 7 + xxsldwi 50, 63, 63, 1 + vperm 16, 14, 30, 16 + vperm 7, 19, 19, 7 + xxlandc 63, 61, 34 + vadduwm 18, 31, 18 + vperm 29, 16, 13, 9 + xxlxor 47, 50, 47 + vperm 6, 16, 19, 6 + vrlw 15, 15, 4 + vperm 7, 8, 7, 9 + vadduwm 17, 17, 29 + xxmrgld 41, 61, 44 + vadduwm 17, 15, 17 + vperm 9, 28, 9, 10 + xxlxor 63, 49, 63 + xxsldwi 49, 49, 49, 1 + vperm 31, 31, 31, 0 + vadduwm 17, 17, 28 + xxlandc 63, 63, 37 + vadduwm 18, 31, 18 + xxswapd 63, 63 + xxlxor 47, 50, 47 + xxsldwi 46, 50, 50, 3 + vrlw 15, 15, 1 + vadduwm 17, 15, 17 + xxlxor 63, 49, 63 + vadduwm 17, 17, 12 + vperm 31, 31, 31, 3 + xxlandc 50, 63, 34 + vadduwm 14, 18, 14 + xxlxor 47, 46, 47 + vrlw 15, 15, 4 + vadduwm 17, 15, 17 + xxlxor 50, 49, 50 + vadduwm 6, 17, 6 + vperm 18, 18, 18, 0 + xxsldwi 38, 38, 38, 3 + xxlandc 50, 50, 37 + vadduwm 14, 18, 14 + xxswapd 48, 50 + xxlxor 47, 46, 47 + xxsldwi 46, 46, 46, 1 + vrlw 15, 15, 1 + vadduwm 6, 15, 6 + xxlxor 48, 38, 48 + vadduwm 6, 6, 7 + vperm 16, 16, 16, 3 + xxlandc 48, 48, 34 + vadduwm 14, 16, 14 + xxlxor 40, 46, 47 + vrlw 8, 8, 4 + vadduwm 6, 8, 6 + xxlxor 39, 38, 48 + xxsldwi 38, 38, 38, 1 + vperm 7, 7, 7, 0 + vadduwm 6, 6, 9 + xxlandc 39, 39, 37 + vadduwm 14, 7, 14 + xxswapd 39, 39 + xxlxor 40, 46, 40 + xxsldwi 41, 46, 46, 3 + vrlw 8, 8, 1 + vadduwm 6, 8, 6 + xxlxor 39, 38, 39 + vperm 3, 7, 7, 3 + vmrghw 7, 12, 13 + xxlandc 34, 35, 34 + vperm 7, 7, 28, 11 + vadduwm 3, 2, 9 + xxlxor 40, 35, 40 + vrlw 4, 8, 4 + vadduwm 6, 6, 7 + vadduwm 6, 4, 6 + xxlxor 34, 38, 34 + xxsldwi 0, 38, 38, 3 + vperm 2, 2, 2, 0 + xxlandc 34, 34, 37 + vadduwm 3, 2, 3 + xxswapd 34, 34 + xxlxor 36, 35, 36 + xxsldwi 1, 35, 35, 1 + vrlw 4, 4, 1 + xxlxor 0, 1, 0 + xxswapd 0, 0 + xxlxor 2, 36, 34 + stxvd2x 0, 0, 8 + xxswapd 2, 2 + stxvd2x 2, 8, 5 + lfdx 0, 0, 3 + lfd 2, 8(3) + xxmrghd 35, 2, 0 + xxlxor 0, 1, 35 + xxswapd 0, 0 + stxvd2x 0, 8, 7 + lfd 0, 16(3) + lfd 1, 24(3) + li 3, -16 + xxmrghd 35, 1, 0 + xxlxor 0, 34, 35 + xxswapd 0, 0 + stxvd2x 0, 8, 6 + lxvd2x 63, 1, 3 + li 3, -32 + lxvd2x 62, 1, 3 + li 3, -48 + lxvd2x 61, 1, 3 + li 3, -64 + lxvd2x 60, 1, 3 + blr + .long 0 + .quad 0 +.Lfunc_end1: + .size zfs_blake3_compress_xof_sse41, .Lfunc_end1-.Lfunc_begin1 + .cfi_endproc + + .globl zfs_blake3_hash_many_sse41 + .p2align 2 + .type zfs_blake3_hash_many_sse41,@function +zfs_blake3_hash_many_sse41: +.Lfunc_begin2: + .cfi_startproc +.Lfunc_gep2: + addis 2, 12, .TOC.-.Lfunc_gep2@ha + addi 2, 2, .TOC.-.Lfunc_gep2@l +.Lfunc_lep2: + .localentry zfs_blake3_hash_many_sse41, .Lfunc_lep2-.Lfunc_gep2 + mfocrf 12, 32 + mflr 0 + std 0, 16(1) + stw 12, 8(1) + stdu 1, -256(1) + .cfi_def_cfa_offset 256 + .cfi_offset lr, 16 + .cfi_offset r17, -120 + .cfi_offset r18, -112 + .cfi_offset r19, -104 + .cfi_offset r20, -96 + .cfi_offset r21, -88 + .cfi_offset r22, -80 + .cfi_offset r23, -72 + .cfi_offset r24, -64 + .cfi_offset r25, -56 + .cfi_offset r26, -48 + .cfi_offset r27, -40 + .cfi_offset r28, -32 + .cfi_offset r29, -24 + .cfi_offset r30, -16 + .cfi_offset cr2, 8 + std 26, 208(1) + mr 26, 4 + cmpldi 1, 4, 4 + andi. 4, 8, 1 + std 18, 144(1) + std 19, 152(1) + crmove 8, 1 + ld 19, 360(1) + lwz 18, 352(1) + std 24, 192(1) + std 25, 200(1) + std 27, 216(1) + std 28, 224(1) + mr 24, 10 + mr 28, 6 + mr 27, 5 + mr 25, 3 + std 29, 232(1) + std 30, 240(1) + mr 30, 9 + mr 29, 7 + std 17, 136(1) + std 20, 160(1) + std 21, 168(1) + std 22, 176(1) + std 23, 184(1) + blt 1, .LBB2_3 + li 3, 0 + li 4, 1 + clrldi 23, 30, 32 + isel 22, 4, 3, 8 + clrldi 21, 24, 32 + clrldi 20, 18, 32 +.LBB2_2: + mr 3, 25 + mr 4, 27 + mr 5, 28 + mr 6, 29 + mr 7, 22 + mr 8, 23 + mr 9, 21 + mr 10, 20 + std 19, 32(1) + bl blake3_hash4_sse41 + addi 26, 26, -4 + addi 3, 29, 4 + addi 25, 25, 32 + addi 19, 19, 128 + cmpldi 26, 3 + isel 29, 3, 29, 8 + bgt 0, .LBB2_2 +.LBB2_3: + cmpldi 26, 0 + beq 0, .LBB2_11 + li 3, 0 + li 4, 1 + or 21, 24, 30 + li 20, 16 + addi 24, 1, 96 + isel 22, 4, 3, 8 +.LBB2_5: + lxvd2x 0, 28, 20 + ld 23, 0(25) + mr 17, 27 + mr 3, 21 + stxvd2x 0, 24, 20 + lxvd2x 0, 0, 28 + stxvd2x 0, 0, 24 +.LBB2_6: + cmpldi 17, 1 + beq 0, .LBB2_8 + cmpldi 17, 0 + bne 0, .LBB2_9 + b .LBB2_10 +.LBB2_8: + or 3, 3, 18 +.LBB2_9: + clrldi 7, 3, 56 + mr 3, 24 + mr 4, 23 + li 5, 64 + mr 6, 29 + bl zfs_blake3_compress_in_place_sse41 + addi 23, 23, 64 + addi 17, 17, -1 + mr 3, 30 + b .LBB2_6 +.LBB2_10: + lxvd2x 0, 24, 20 + addi 26, 26, -1 + add 29, 29, 22 + addi 25, 25, 8 + cmpldi 26, 0 + stxvd2x 0, 19, 20 + lxvd2x 0, 0, 24 + stxvd2x 0, 0, 19 + addi 19, 19, 32 + bne 0, .LBB2_5 +.LBB2_11: + ld 30, 240(1) + ld 29, 232(1) + ld 28, 224(1) + ld 27, 216(1) + ld 26, 208(1) + ld 25, 200(1) + ld 24, 192(1) + ld 23, 184(1) + ld 22, 176(1) + ld 21, 168(1) + ld 20, 160(1) + ld 19, 152(1) + ld 18, 144(1) + ld 17, 136(1) + addi 1, 1, 256 + ld 0, 16(1) + lwz 12, 8(1) + mtocrf 32, 12 + mtlr 0 + blr + .long 0 + .quad 0 +.Lfunc_end2: + .size zfs_blake3_hash_many_sse41, .Lfunc_end2-.Lfunc_begin2 + .cfi_endproc + + .section .rodata.cst16,"aM",@progbits,16 + .p2align 4 +.LCPI3_0: + .quad 4294967296 + .quad 12884901890 +.LCPI3_1: + .byte 2 + .byte 3 + .byte 0 + .byte 1 + .byte 6 + .byte 7 + .byte 4 + .byte 5 + .byte 10 + .byte 11 + .byte 8 + .byte 9 + .byte 14 + .byte 15 + .byte 12 + .byte 13 +.LCPI3_2: + .byte 1 + .byte 2 + .byte 3 + .byte 0 + .byte 5 + .byte 6 + .byte 7 + .byte 4 + .byte 9 + .byte 10 + .byte 11 + .byte 8 + .byte 13 + .byte 14 + .byte 15 + .byte 12 +.LCPI3_3: + .byte 29 + .byte 28 + .byte 31 + .byte 30 + .byte 25 + .byte 24 + .byte 27 + .byte 26 + .byte 21 + .byte 20 + .byte 23 + .byte 22 + .byte 17 + .byte 16 + .byte 19 + .byte 18 +.LCPI3_4: + .long 1779033703 + .long 1779033703 + .long 1779033703 + .long 1779033703 +.LCPI3_5: + .long 3144134277 + .long 3144134277 + .long 3144134277 + .long 3144134277 +.LCPI3_6: + .long 1013904242 + .long 1013904242 + .long 1013904242 + .long 1013904242 +.LCPI3_7: + .long 2773480762 + .long 2773480762 + .long 2773480762 + .long 2773480762 +.LCPI3_8: + .byte 30 + .byte 29 + .byte 28 + .byte 31 + .byte 26 + .byte 25 + .byte 24 + .byte 27 + .byte 22 + .byte 21 + .byte 20 + .byte 23 + .byte 18 + .byte 17 + .byte 16 + .byte 19 + .text + .p2align 2 + .type blake3_hash4_sse41,@function +blake3_hash4_sse41: +.Lfunc_begin3: + .cfi_startproc +.Lfunc_gep3: + addis 2, 12, .TOC.-.Lfunc_gep3@ha + addi 2, 2, .TOC.-.Lfunc_gep3@l +.Lfunc_lep3: + .localentry blake3_hash4_sse41, .Lfunc_lep3-.Lfunc_gep3 + stdu 1, -416(1) + .cfi_def_cfa_offset 416 + .cfi_offset r22, -176 + .cfi_offset r23, -168 + .cfi_offset r24, -160 + .cfi_offset r25, -152 + .cfi_offset r26, -144 + .cfi_offset r27, -136 + .cfi_offset r28, -128 + .cfi_offset r29, -120 + .cfi_offset r30, -112 + .cfi_offset f20, -96 + .cfi_offset f21, -88 + .cfi_offset f22, -80 + .cfi_offset f23, -72 + .cfi_offset f24, -64 + .cfi_offset f25, -56 + .cfi_offset f26, -48 + .cfi_offset f27, -40 + .cfi_offset f28, -32 + .cfi_offset f29, -24 + .cfi_offset f30, -16 + .cfi_offset f31, -8 + .cfi_offset v20, -368 + .cfi_offset v21, -352 + .cfi_offset v22, -336 + .cfi_offset v23, -320 + .cfi_offset v24, -304 + .cfi_offset v25, -288 + .cfi_offset v26, -272 + .cfi_offset v27, -256 + .cfi_offset v28, -240 + .cfi_offset v29, -224 + .cfi_offset v30, -208 + .cfi_offset v31, -192 + li 11, 48 + li 0, 8 + std 30, 304(1) + li 30, 12 + li 12, 4 + lfiwzx 0, 0, 5 + stxvd2x 52, 1, 11 + li 11, 64 + lfiwzx 2, 5, 0 + li 0, 20 + lfiwzx 3, 5, 30 + stxvd2x 53, 1, 11 + li 11, 80 + li 30, 24 + lfiwzx 4, 5, 0 + li 0, 28 + stxvd2x 54, 1, 11 + li 11, 96 + lfiwzx 1, 5, 12 + lfiwzx 6, 5, 30 + xxspltw 47, 0, 1 + cmpldi 4, 0 + std 22, 240(1) + stxvd2x 55, 1, 11 + li 11, 112 + lfiwzx 7, 5, 0 + xxspltw 40, 2, 1 + std 23, 248(1) + xxspltw 39, 3, 1 + std 24, 256(1) + std 25, 264(1) + xxspltw 51, 1, 1 + xxspltw 43, 6, 1 + std 26, 272(1) + xxspltw 41, 7, 1 + std 27, 280(1) + std 28, 288(1) + std 29, 296(1) + stxvd2x 56, 1, 11 + li 11, 128 + stfd 20, 320(1) + stxvd2x 57, 1, 11 + li 11, 144 + stfd 21, 328(1) + stxvd2x 58, 1, 11 + li 11, 160 + stfd 22, 336(1) + stxvd2x 59, 1, 11 + li 11, 176 + stfd 23, 344(1) + stxvd2x 60, 1, 11 + li 11, 192 + stfd 24, 352(1) + stxvd2x 61, 1, 11 + li 11, 208 + stfd 25, 360(1) + stxvd2x 62, 1, 11 + li 11, 224 + stfd 26, 368(1) + stxvd2x 63, 1, 11 + li 11, 16 + xxspltw 63, 4, 1 + lfiwzx 5, 5, 11 + ld 5, 448(1) + stfd 27, 376(1) + stfd 28, 384(1) + stfd 29, 392(1) + stfd 30, 400(1) + stfd 31, 408(1) + xxspltw 50, 5, 1 + beq 0, .LBB3_5 + addis 30, 2, .LCPI3_0@toc@ha + neg 7, 7 + xxleqv 34, 34, 34 + addis 28, 2, .LCPI3_5@toc@ha + addis 27, 2, .LCPI3_6@toc@ha + addis 26, 2, .LCPI3_7@toc@ha + addis 29, 2, .LCPI3_4@toc@ha + addis 25, 2, .LCPI3_8@toc@ha + addi 0, 30, .LCPI3_0@toc@l + mtfprwz 2, 7 + addis 7, 2, .LCPI3_1@toc@ha + addis 30, 2, .LCPI3_3@toc@ha + addi 24, 29, .LCPI3_4@toc@l + ld 29, 24(3) + lxvd2x 1, 0, 0 + mtfprwz 0, 6 + rldicl 6, 6, 32, 32 + addi 0, 30, .LCPI3_3@toc@l + ld 30, 16(3) + xxspltw 2, 2, 1 + vslw 2, 2, 2 + xxspltw 37, 0, 1 + mtfprwz 0, 6 + addi 6, 7, .LCPI3_1@toc@l + addis 7, 2, .LCPI3_2@toc@ha + xxswapd 35, 1 + xxlxor 36, 36, 36 + xxspltw 33, 0, 1 + xxland 35, 2, 35 + vadduwm 0, 3, 5 + lvx 5, 0, 6 + addi 6, 7, .LCPI3_2@toc@l + ld 7, 8(3) + xxlor 35, 35, 34 + xxlxor 34, 32, 34 + xxlor 9, 32, 32 + lvx 0, 0, 6 + ld 6, 0(3) + addi 3, 3, -8 + vcmpgtsw 2, 3, 2 + lvx 3, 0, 0 + addi 0, 28, .LCPI3_5@toc@l + addi 28, 27, .LCPI3_6@toc@l + addi 27, 26, .LCPI3_7@toc@l + addi 26, 25, .LCPI3_8@toc@l + or 25, 9, 8 + li 9, 0 + vcmpgtsb 5, 4, 5 + vcmpgtsb 0, 4, 0 + xxlor 11, 35, 35 + lvx 3, 0, 24 + xxlor 12, 35, 35 + vsubuwm 2, 1, 2 + xxlnor 10, 37, 37 + xxlor 13, 34, 34 + lvx 2, 0, 0 + li 0, 32 + xxlnor 31, 32, 32 + xxlor 30, 34, 34 + lvx 2, 0, 28 + li 28, 48 + xxlor 29, 34, 34 + lvx 2, 0, 27 + li 27, 0 + xxlor 28, 34, 34 + lvx 2, 0, 26 + xxlor 27, 34, 34 +.LBB3_2: + mr 26, 27 + addi 27, 27, 1 + xxlor 23, 39, 39 + cmpld 27, 4 + sldi 26, 26, 6 + xxlor 24, 40, 40 + iseleq 24, 10, 9 + add 23, 6, 26 + add 22, 30, 26 + lxvd2x 0, 6, 26 + lxvd2x 1, 7, 26 + or 25, 24, 25 + add 24, 7, 26 + lxvd2x 2, 30, 26 + lxvd2x 3, 29, 26 + xxlor 26, 47, 47 + lxvd2x 4, 23, 11 + lxvd2x 6, 24, 11 + clrlwi 25, 25, 24 + xxlor 25, 51, 51 + lxvd2x 7, 22, 11 + lxvd2x 8, 23, 0 + mtfprd 5, 25 + add 25, 29, 26 + xxswapd 34, 0 + lxvd2x 0, 25, 11 + xxswapd 38, 1 + xxswapd 32, 2 + lxvd2x 1, 24, 0 + lxvd2x 2, 22, 0 + xxswapd 40, 3 + xxswapd 39, 4 + lxvd2x 3, 25, 0 + lxvd2x 4, 23, 28 + xxswapd 60, 6 + xxswapd 47, 7 + lxvd2x 6, 24, 28 + xxswapd 57, 8 + lxvd2x 7, 22, 28 + lxvd2x 8, 25, 28 + xxswapd 58, 0 + mr 25, 3 + xxswapd 53, 1 + xxswapd 56, 2 + xxswapd 52, 3 + xxswapd 55, 4 + xxswapd 54, 6 + xxswapd 0, 5 + xxswapd 42, 7 + xxswapd 48, 8 + mtctr 12 +.LBB3_3: + ldu 24, 8(25) + add 24, 24, 26 + addi 24, 24, 256 + dcbt 0, 24 + bdnz .LBB3_3 + vmrgew 4, 28, 7 + vspltisw 14, 9 + mr 25, 8 + vmrgew 27, 6, 2 + vspltisw 17, 4 + vmrglw 12, 6, 2 + vspltisw 19, 10 + vmrghw 30, 6, 2 + xxspltw 0, 0, 3 + vmrglw 2, 8, 0 + vmrghw 13, 8, 0 + xxlor 7, 36, 36 + vmrgew 4, 21, 25 + vmrglw 29, 28, 7 + vmrghw 1, 28, 7 + vmrglw 28, 26, 15 + xxmrgld 37, 34, 44 + vmrgew 7, 26, 15 + vmrghw 15, 26, 15 + xxlor 21, 36, 36 + vmrglw 4, 21, 25 + vmrghw 21, 21, 25 + vmrglw 25, 20, 24 + xxmrgld 34, 60, 61 + vmrghw 26, 20, 24 + xxlor 38, 26, 26 + vmrgew 3, 8, 0 + xxlor 5, 36, 36 + vmrgew 4, 20, 24 + vspltisw 24, -16 + vmrglw 20, 22, 23 + xxmrgld 57, 57, 5 + vmrglw 8, 16, 10 + vmrghw 0, 16, 10 + vadduwm 12, 19, 19 + xxlor 8, 37, 37 + xxlor 20, 36, 36 + vmrgew 4, 22, 23 + vmrghw 23, 22, 23 + xxmrgld 40, 40, 52 + vmrgew 22, 16, 10 + vsubuwm 10, 14, 24 + vslw 14, 17, 17 + vadduwm 17, 5, 6 + xxmrgld 37, 47, 33 + xxlor 22, 36, 36 + xxmrgld 36, 45, 62 + xxlor 38, 25, 25 + xxlor 2, 34, 34 + vadduwm 19, 4, 6 + xxmrgld 38, 39, 7 + xxlor 3, 36, 36 + xxmrghd 39, 47, 33 + xxlor 36, 24, 24 + xxmrgld 33, 58, 53 + vadduwm 17, 17, 18 + vadduwm 29, 2, 4 + xxmrgld 36, 35, 59 + xxlor 34, 23, 23 + xxmrghd 35, 45, 62 + xxlor 1, 9, 9 + vadduwm 28, 5, 2 + xxlor 1, 13, 13 + vadduwm 19, 19, 31 + vadduwm 24, 29, 11 + vadduwm 28, 28, 9 + xxlxor 61, 49, 9 + xxlor 1, 41, 41 + xxlor 41, 11, 11 + xxlxor 34, 51, 13 + vperm 29, 29, 29, 9 + xxlxor 46, 56, 46 + vperm 2, 2, 2, 9 + xxlxor 59, 60, 0 + vperm 14, 14, 14, 9 + vperm 30, 27, 27, 9 + vadduwm 19, 19, 3 + xxlor 4, 35, 35 + xxland 61, 61, 10 + xxlor 35, 12, 12 + xxland 34, 34, 10 + vadduwm 27, 29, 3 + xxlor 35, 30, 30 + vadduwm 17, 17, 4 + xxlor 26, 36, 36 + xxland 46, 46, 10 + vadduwm 3, 2, 3 + xxlor 36, 29, 29 + xxland 62, 62, 10 + xxlxor 45, 59, 50 + xxlxor 50, 35, 63 + vadduwm 31, 14, 4 + xxlor 36, 28, 28 + xxlor 6, 37, 37 + vadduwm 16, 30, 4 + xxlxor 43, 63, 43 + xxlxor 37, 48, 1 + vrlw 4, 13, 12 + vrlw 18, 18, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vadduwm 15, 24, 6 + vadduwm 28, 28, 7 + vadduwm 17, 4, 17 + vadduwm 19, 18, 19 + vadduwm 15, 11, 15 + vadduwm 28, 5, 28 + xxlor 25, 38, 38 + xxlxor 61, 49, 61 + xxlxor 34, 51, 34 + xxlxor 46, 47, 46 + xxlxor 62, 60, 62 + xxlor 38, 27, 27 + vadduwm 19, 19, 1 + vperm 29, 29, 29, 6 + vperm 2, 2, 2, 6 + vperm 24, 14, 14, 6 + vperm 30, 30, 30, 6 + xxlor 5, 33, 33 + vadduwm 17, 17, 25 + xxland 61, 61, 31 + xxland 34, 34, 31 + xxland 56, 56, 31 + xxland 62, 62, 31 + vadduwm 27, 29, 27 + vadduwm 3, 2, 3 + vadduwm 31, 24, 31 + vadduwm 16, 30, 16 + xxlxor 36, 59, 36 + xxlxor 50, 35, 50 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 1, 18, 10 + xxmrgld 50, 32, 55 + vrlw 11, 11, 10 + xxmrghd 55, 32, 55 + vrlw 5, 5, 10 + vrlw 4, 4, 10 + vadduwm 15, 15, 8 + vadduwm 28, 28, 18 + vadduwm 17, 1, 17 + vadduwm 19, 11, 19 + vadduwm 15, 5, 15 + vadduwm 28, 4, 28 + xxlor 7, 57, 57 + xxlxor 62, 49, 62 + xxlxor 61, 51, 61 + xxlxor 57, 47, 34 + xxlxor 34, 60, 56 + vperm 24, 30, 30, 9 + xxmrgld 62, 20, 21 + vperm 29, 29, 29, 9 + vperm 25, 25, 25, 9 + vperm 2, 2, 2, 9 + vmr 14, 8 + xxmrghd 40, 58, 53 + xxmrgld 58, 54, 22 + vadduwm 17, 17, 30 + xxland 56, 56, 10 + vadduwm 21, 19, 8 + xxland 61, 61, 10 + xxland 51, 57, 10 + xxland 34, 34, 10 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 37, 59, 37 + xxlxor 36, 35, 36 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vrlw 4, 4, 12 + vadduwm 0, 15, 26 + vadduwm 15, 28, 23 + vadduwm 17, 1, 17 + vadduwm 28, 11, 21 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 6 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vmr 13, 8 + xxlor 53, 3, 3 + xxland 56, 56, 31 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 36, 35, 36 + xxlxor 37, 59, 37 + vrlw 4, 4, 10 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + xxlor 52, 4, 4 + xxlor 40, 2, 2 + vadduwm 17, 17, 21 + vadduwm 28, 28, 20 + vadduwm 0, 0, 7 + vadduwm 15, 15, 8 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + vperm 24, 24, 24, 9 + vmr 25, 26 + xxlor 3, 39, 39 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + xxland 56, 56, 10 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 4, 4, 12 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + xxlor 54, 6, 6 + xxlor 58, 5, 5 + xxlor 39, 8, 8 + vadduwm 17, 17, 22 + vadduwm 28, 28, 26 + vadduwm 0, 0, 7 + vadduwm 15, 15, 25 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vperm 24, 24, 24, 6 + xxlor 39, 26, 26 + vadduwm 28, 28, 14 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + xxland 56, 56, 31 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vrlw 4, 4, 10 + vadduwm 17, 17, 7 + vadduwm 0, 0, 30 + vadduwm 15, 15, 23 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 9 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + xxlor 24, 55, 55 + vadduwm 17, 17, 13 + xxland 56, 56, 10 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 37, 59, 37 + xxlxor 36, 35, 36 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vrlw 4, 4, 12 + vmr 23, 13 + xxlor 45, 25, 25 + xxlor 39, 7, 7 + vadduwm 28, 28, 13 + vadduwm 0, 0, 18 + vadduwm 15, 15, 7 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 6 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + xxlor 2, 46, 46 + xxlor 46, 3, 3 + xxland 56, 56, 31 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 36, 35, 36 + xxlxor 37, 59, 37 + vrlw 4, 4, 10 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vadduwm 17, 17, 20 + vadduwm 28, 28, 26 + vadduwm 0, 0, 25 + vadduwm 15, 15, 14 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + vperm 24, 24, 24, 9 + xxlor 52, 2, 2 + vadduwm 17, 17, 8 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + xxland 56, 56, 10 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 4, 4, 12 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vadduwm 28, 28, 20 + vadduwm 0, 0, 21 + vadduwm 15, 15, 18 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vperm 24, 24, 24, 6 + vadduwm 17, 17, 22 + vadduwm 28, 28, 30 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + xxland 56, 56, 31 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vrlw 4, 4, 10 + vadduwm 0, 0, 23 + vadduwm 15, 15, 7 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 9 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + xxlor 5, 4, 4 + xxlor 4, 58, 58 + xxland 56, 56, 10 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 37, 59, 37 + xxlxor 36, 35, 36 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vrlw 4, 4, 12 + xxlor 39, 8, 8 + xxlor 54, 24, 24 + xxlor 58, 26, 26 + vadduwm 17, 17, 13 + vadduwm 28, 28, 7 + vadduwm 0, 0, 22 + vadduwm 15, 15, 26 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 6 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + xxlor 3, 53, 53 + xxlor 53, 4, 4 + xxland 56, 56, 31 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 36, 35, 36 + xxlxor 37, 59, 37 + vrlw 4, 4, 10 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vadduwm 17, 17, 21 + vadduwm 28, 28, 20 + vadduwm 0, 0, 18 + vadduwm 15, 15, 25 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + vperm 24, 24, 24, 9 + xxlor 2, 55, 55 + vmr 23, 18 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + xxland 56, 56, 10 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 4, 4, 12 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + xxlor 50, 5, 5 + vadduwm 17, 17, 14 + vadduwm 28, 28, 30 + vadduwm 0, 0, 18 + vadduwm 15, 15, 22 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vperm 24, 24, 24, 6 + xxlor 25, 40, 40 + vmr 8, 13 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + xxland 56, 56, 31 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + xxlor 45, 25, 25 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vrlw 4, 4, 10 + vadduwm 17, 17, 13 + xxlor 45, 2, 2 + vadduwm 0, 0, 8 + vadduwm 28, 28, 13 + vadduwm 15, 15, 26 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 9 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + xxlor 4, 57, 57 + xxlor 26, 46, 46 + xxland 56, 56, 10 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 37, 59, 37 + xxlxor 36, 35, 36 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vrlw 4, 4, 12 + xxlor 8, 62, 62 + xxlor 57, 3, 3 + xxlor 46, 7, 7 + xxlor 62, 6, 6 + vadduwm 17, 17, 7 + vadduwm 28, 28, 25 + vadduwm 0, 0, 14 + vadduwm 15, 15, 30 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 6 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vadduwm 17, 17, 20 + xxlor 3, 52, 52 + xxland 56, 56, 31 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 36, 35, 36 + xxlxor 37, 59, 37 + vrlw 4, 4, 10 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + xxlor 52, 8, 8 + vadduwm 0, 0, 22 + vadduwm 28, 28, 20 + vadduwm 15, 15, 23 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + vperm 24, 24, 24, 9 + xxlor 6, 55, 55 + xxlor 55, 4, 4 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + xxland 56, 56, 10 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 4, 4, 12 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vadduwm 17, 17, 23 + vadduwm 28, 28, 13 + vadduwm 0, 0, 21 + vadduwm 15, 15, 14 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vperm 24, 24, 24, 6 + xxlor 4, 53, 53 + xxlor 53, 26, 26 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + xxland 56, 56, 31 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vrlw 4, 4, 10 + vadduwm 17, 17, 21 + vadduwm 28, 28, 8 + vadduwm 0, 0, 7 + vadduwm 15, 15, 30 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 9 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + xxlor 5, 25, 25 + xxlor 2, 58, 58 + xxland 56, 56, 10 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 37, 59, 37 + xxlxor 36, 35, 36 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vrlw 4, 4, 12 + vmr 22, 26 + vadduwm 0, 0, 26 + xxlor 58, 5, 5 + vadduwm 17, 17, 25 + vadduwm 28, 28, 18 + vadduwm 15, 15, 26 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 6 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + xxlor 7, 24, 24 + xxlor 8, 57, 57 + xxland 56, 56, 31 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 36, 35, 36 + xxlxor 37, 59, 37 + vrlw 4, 4, 10 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + xxlor 57, 7, 7 + vadduwm 17, 17, 20 + vadduwm 28, 28, 13 + vadduwm 0, 0, 14 + vadduwm 15, 15, 25 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + vperm 24, 24, 24, 9 + xxlor 5, 52, 52 + xxlor 23, 45, 45 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + xxland 56, 56, 10 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 4, 4, 12 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + xxlor 52, 6, 6 + vadduwm 28, 28, 8 + vmr 13, 8 + xxlor 40, 3, 3 + vadduwm 17, 17, 20 + vadduwm 0, 0, 8 + vadduwm 15, 15, 22 + vadduwm 17, 4, 17 + vadduwm 28, 1, 28 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 49, 61 + xxlxor 51, 60, 51 + xxlxor 34, 32, 34 + xxlxor 56, 47, 56 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vperm 24, 24, 24, 6 + xxlor 25, 39, 39 + vmr 7, 30 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + xxland 56, 56, 31 + vadduwm 27, 29, 27 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 24, 16 + xxlxor 36, 59, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vrlw 4, 4, 10 + vmr 30, 18 + xxlor 24, 46, 46 + xxlor 46, 25, 25 + xxlor 50, 8, 8 + vadduwm 17, 17, 23 + vadduwm 28, 28, 14 + vadduwm 0, 0, 18 + vadduwm 15, 15, 26 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 9 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + xxlor 6, 58, 58 + xxlor 58, 4, 4 + xxland 56, 56, 10 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + vadduwm 31, 24, 31 + vadduwm 16, 29, 16 + vadduwm 27, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 37, 59, 37 + xxlxor 36, 35, 36 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vrlw 4, 4, 12 + vadduwm 17, 17, 30 + vadduwm 28, 28, 26 + vadduwm 0, 0, 7 + vadduwm 15, 15, 21 + vadduwm 17, 1, 17 + vadduwm 28, 11, 28 + vadduwm 0, 5, 0 + vadduwm 15, 4, 15 + xxlxor 56, 49, 56 + xxlxor 61, 60, 61 + xxlxor 51, 32, 51 + xxlxor 34, 47, 34 + vperm 24, 24, 24, 6 + vperm 29, 29, 29, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + xxlor 40, 23, 23 + vadduwm 13, 28, 13 + vadduwm 8, 17, 8 + xxland 49, 56, 31 + xxland 61, 61, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + vadduwm 31, 17, 31 + vadduwm 16, 29, 16 + vadduwm 28, 19, 27 + vadduwm 3, 2, 3 + xxlxor 33, 63, 33 + xxlxor 43, 48, 43 + xxlxor 36, 35, 36 + xxlxor 37, 60, 37 + vrlw 4, 4, 10 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + xxlor 2, 55, 55 + vmr 23, 30 + xxlor 62, 24, 24 + vadduwm 0, 0, 22 + vadduwm 15, 15, 30 + vadduwm 8, 4, 8 + vadduwm 13, 1, 13 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 61, 40, 61 + xxlxor 51, 45, 51 + xxlxor 34, 32, 34 + xxlxor 49, 47, 49 + vperm 29, 29, 29, 9 + vperm 19, 19, 19, 9 + vperm 2, 2, 2, 9 + vperm 17, 17, 17, 9 + vadduwm 13, 13, 14 + xxlor 46, 5, 5 + xxland 61, 61, 10 + xxland 51, 51, 10 + xxland 34, 34, 10 + xxland 49, 49, 10 + vadduwm 28, 29, 28 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 17, 16 + xxlxor 36, 60, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 4, 4, 12 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vadduwm 8, 8, 25 + vadduwm 0, 0, 14 + vadduwm 15, 15, 7 + vadduwm 8, 4, 8 + vadduwm 13, 1, 13 + vadduwm 0, 11, 0 + vadduwm 15, 5, 15 + xxlxor 62, 40, 61 + xxlxor 51, 45, 51 + xxlxor 34, 32, 34 + xxlxor 49, 47, 49 + vperm 30, 30, 30, 6 + vperm 19, 19, 19, 6 + vperm 2, 2, 2, 6 + vperm 17, 17, 17, 6 + vadduwm 29, 8, 20 + vadduwm 8, 13, 18 + xxland 45, 62, 31 + xxland 51, 51, 31 + xxland 34, 34, 31 + xxland 49, 49, 31 + vadduwm 30, 13, 28 + vadduwm 3, 19, 3 + vadduwm 31, 2, 31 + vadduwm 16, 17, 16 + xxlxor 36, 62, 36 + xxlxor 33, 35, 33 + xxlxor 43, 63, 43 + xxlxor 37, 48, 37 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + vrlw 4, 4, 10 + vadduwm 0, 0, 23 + vadduwm 7, 15, 21 + vadduwm 29, 1, 29 + vadduwm 8, 11, 8 + vadduwm 0, 5, 0 + vadduwm 7, 4, 7 + xxlxor 47, 61, 49 + xxlxor 45, 40, 45 + xxlxor 49, 32, 51 + xxlxor 34, 39, 34 + vperm 15, 15, 15, 9 + vperm 13, 13, 13, 9 + vperm 17, 17, 17, 9 + vperm 2, 2, 2, 9 + xxlor 46, 3, 3 + vadduwm 9, 29, 26 + vadduwm 8, 8, 14 + xxland 46, 47, 10 + xxland 45, 45, 10 + xxland 47, 49, 10 + xxland 34, 34, 10 + vadduwm 17, 14, 31 + vadduwm 16, 13, 16 + vadduwm 18, 15, 30 + vadduwm 3, 2, 3 + xxlxor 33, 49, 33 + xxlxor 43, 48, 43 + xxlxor 37, 50, 37 + xxlxor 36, 35, 36 + vrlw 1, 1, 12 + vrlw 11, 11, 12 + vrlw 5, 5, 12 + vrlw 4, 4, 12 + xxlor 44, 6, 6 + xxlor 0, 10, 10 + vadduwm 0, 0, 12 + xxlor 44, 2, 2 + vadduwm 9, 1, 9 + vadduwm 7, 7, 12 + vadduwm 8, 11, 8 + vadduwm 7, 4, 7 + vadduwm 0, 5, 0 + xxlxor 34, 39, 34 + xxlxor 44, 32, 47 + vperm 2, 2, 2, 6 + xxlxor 46, 41, 46 + xxlxor 45, 40, 45 + vperm 12, 12, 12, 6 + vperm 14, 14, 14, 6 + vperm 13, 13, 13, 6 + xxland 34, 34, 31 + xxlor 1, 31, 31 + vadduwm 3, 2, 3 + xxland 44, 44, 31 + xxlxor 36, 35, 36 + xxlxor 51, 35, 40 + xxland 35, 46, 31 + xxland 38, 45, 31 + vadduwm 15, 12, 18 + vadduwm 8, 3, 17 + vadduwm 13, 6, 16 + xxlxor 37, 47, 37 + xxlxor 33, 40, 33 + xxlxor 43, 45, 43 + vrlw 4, 4, 10 + vrlw 1, 1, 10 + vrlw 11, 11, 10 + vrlw 5, 5, 10 + xxlxor 47, 47, 41 + xxlxor 40, 40, 32 + xxlxor 39, 45, 39 + xxlxor 50, 36, 38 + xxlxor 63, 33, 44 + xxlxor 43, 43, 34 + xxlxor 41, 37, 35 + bne 0, .LBB3_2 +.LBB3_5: + vmrglw 2, 19, 15 + li 3, 32 + li 4, 48 + vmrglw 4, 7, 8 + vmrglw 0, 31, 18 + vmrglw 1, 9, 11 + vmrghw 3, 19, 15 + vmrghw 5, 7, 8 + vmrghw 6, 31, 18 + vmrghw 7, 9, 11 + xxmrgld 40, 36, 34 + xxmrghd 34, 36, 34 + xxmrgld 41, 33, 32 + xxswapd 0, 40 + xxmrgld 36, 37, 35 + xxmrghd 35, 37, 35 + xxmrghd 37, 33, 32 + xxswapd 1, 41 + xxmrgld 32, 39, 38 + xxmrghd 33, 39, 38 + xxswapd 2, 34 + xxswapd 4, 36 + xxswapd 3, 37 + stxvd2x 0, 0, 5 + xxswapd 5, 32 + stxvd2x 1, 5, 11 + xxswapd 0, 35 + xxswapd 1, 33 + stxvd2x 2, 5, 3 + li 3, 64 + stxvd2x 3, 5, 4 + li 4, 80 + stxvd2x 4, 5, 3 + li 3, 96 + stxvd2x 5, 5, 4 + li 4, 112 + stxvd2x 0, 5, 3 + stxvd2x 1, 5, 4 + li 3, 224 + lxvd2x 63, 1, 3 + li 3, 208 + lfd 31, 408(1) + ld 30, 304(1) + ld 29, 296(1) + lxvd2x 62, 1, 3 + li 3, 192 + lfd 30, 400(1) + ld 28, 288(1) + ld 27, 280(1) + lxvd2x 61, 1, 3 + li 3, 176 + lfd 29, 392(1) + ld 26, 272(1) + ld 25, 264(1) + lxvd2x 60, 1, 3 + li 3, 160 + lfd 28, 384(1) + ld 24, 256(1) + ld 23, 248(1) + lxvd2x 59, 1, 3 + li 3, 144 + lfd 27, 376(1) + ld 22, 240(1) + lxvd2x 58, 1, 3 + li 3, 128 + lfd 26, 368(1) + lxvd2x 57, 1, 3 + li 3, 112 + lfd 25, 360(1) + lxvd2x 56, 1, 3 + li 3, 96 + lfd 24, 352(1) + lxvd2x 55, 1, 3 + li 3, 80 + lfd 23, 344(1) + lxvd2x 54, 1, 3 + li 3, 64 + lfd 22, 336(1) + lxvd2x 53, 1, 3 + li 3, 48 + lfd 21, 328(1) + lxvd2x 52, 1, 3 + lfd 20, 320(1) + addi 1, 1, 416 + blr + .long 0 + .quad 0 +.Lfunc_end3: + .size blake3_hash4_sse41, .Lfunc_end3-.Lfunc_begin3 + .cfi_endproc + .section ".note.GNU-stack","",@progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-p8.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-p8.S new file mode 100644 index 000000000000..dc3c4cea669c --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-p8.S @@ -0,0 +1,1520 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if (defined(__PPC64__) && defined(__BIG_ENDIAN__)) + +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.text + +.globl zfs_sha256_power8 +.globl .zfs_sha256_power8 +.type zfs_sha256_power8,@function +.section ".opd","aw" +.align 3 +zfs_sha256_power8: +.quad .zfs_sha256_power8,.TOC.@tocbase,0 +.previous +.align 6 +.zfs_sha256_power8: +#else +.abiversion 2 +.text + +.globl zfs_sha256_power8 +.type zfs_sha256_power8,@function +.align 6 +zfs_sha256_power8: +.localentry zfs_sha256_power8,0 +#endif + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + mtspr 256,11 + + bl .LPICmeup + addi 11,1,79 + .long 0x7C001E19 + .long 0x7C8A1E19 + vsldoi 1,0,0,4 + vsldoi 2,0,0,8 + vsldoi 3,0,0,12 + vsldoi 5,4,4,4 + vsldoi 6,4,4,8 + vsldoi 7,4,4,12 + li 0,3 + b .Loop +.align 5 +.Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + vadduwm 7,7,28 + lvx 28,10,6 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 9,8,8,4 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 10,9,9,4 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,4 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 13,12,12,4 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 14,13,13,4 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,4 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 17,16,16,4 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 18,17,17,4 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,4 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 25,24,24,4 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 26,25,25,4 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + vsldoi 27,26,26,4 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + mtctr 0 + b .L16_xx +.align 5 +.L16_xx: + .long 0x13CA0682 + vadduwm 9,9,30 + .long 0x13DB7E82 + vadduwm 9,9,30 + vadduwm 9,9,18 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13CB0682 + vadduwm 10,10,30 + .long 0x13C87E82 + vadduwm 10,10,30 + vadduwm 10,10,19 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13CC0682 + vadduwm 11,11,30 + .long 0x13C97E82 + vadduwm 11,11,30 + vadduwm 11,11,24 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13CD0682 + vadduwm 12,12,30 + .long 0x13CA7E82 + vadduwm 12,12,30 + vadduwm 12,12,25 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13CE0682 + vadduwm 13,13,30 + .long 0x13CB7E82 + vadduwm 13,13,30 + vadduwm 13,13,26 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13CF0682 + vadduwm 14,14,30 + .long 0x13CC7E82 + vadduwm 14,14,30 + vadduwm 14,14,27 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D00682 + vadduwm 15,15,30 + .long 0x13CD7E82 + vadduwm 15,15,30 + vadduwm 15,15,8 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13D10682 + vadduwm 16,16,30 + .long 0x13CE7E82 + vadduwm 16,16,30 + vadduwm 16,16,9 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + .long 0x13D20682 + vadduwm 17,17,30 + .long 0x13CF7E82 + vadduwm 17,17,30 + vadduwm 17,17,10 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13D30682 + vadduwm 18,18,30 + .long 0x13D07E82 + vadduwm 18,18,30 + vadduwm 18,18,11 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13D80682 + vadduwm 19,19,30 + .long 0x13D17E82 + vadduwm 19,19,30 + vadduwm 19,19,12 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13D90682 + vadduwm 24,24,30 + .long 0x13D27E82 + vadduwm 24,24,30 + vadduwm 24,24,13 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13DA0682 + vadduwm 25,25,30 + .long 0x13D37E82 + vadduwm 25,25,30 + vadduwm 25,25,14 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13DB0682 + vadduwm 26,26,30 + .long 0x13D87E82 + vadduwm 26,26,30 + vadduwm 26,26,15 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C80682 + vadduwm 27,27,30 + .long 0x13D97E82 + vadduwm 27,27,30 + vadduwm 27,27,16 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + bdnz .L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + vadduwm 0,0,10 + lvx 12,26,11 + vadduwm 1,1,11 + lvx 13,27,11 + vadduwm 2,2,12 + lvx 14,28,11 + vadduwm 3,3,13 + lvx 15,29,11 + vadduwm 4,4,14 + lvx 16,30,11 + vadduwm 5,5,15 + lvx 17,31,11 + vadduwm 6,6,16 + vadduwm 7,7,17 + bne .Loop + lvx 8,26,7 + vperm 0,0,1,28 + lvx 9,27,7 + vperm 4,4,5,28 + vperm 0,0,2,8 + vperm 4,4,6,8 + vperm 0,0,3,9 + vperm 4,4,7,9 + .long 0x7C001F19 + .long 0x7C8A1F19 + addi 11,1,207 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.size .zfs_sha256_power8,.-.zfs_sha256_power8 +.size zfs_sha256_power8,.-.zfs_sha256_power8 +#else +.size zfs_sha256_power8,.-zfs_sha256_power8 +#endif +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98 +.long 0x71374491,0x71374491,0x71374491,0x71374491 +.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf +.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5 +.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b +.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1 +.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4 +.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5 +.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98 +.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01 +.long 0x243185be,0x243185be,0x243185be,0x243185be +.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3 +.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74 +.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe +.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7 +.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174 +.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1 +.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786 +.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6 +.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc +.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f +.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa +.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc +.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da +.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152 +.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d +.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8 +.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7 +.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3 +.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147 +.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351 +.long 0x14292967,0x14292967,0x14292967,0x14292967 +.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85 +.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138 +.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc +.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13 +.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354 +.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb +.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e +.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85 +.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1 +.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b +.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70 +.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3 +.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819 +.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624 +.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585 +.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070 +.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116 +.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08 +.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c +.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5 +.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3 +.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a +.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f +.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3 +.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee +.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f +.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814 +.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208 +.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa +.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb +.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7 +.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2 +.long 0,0,0,0 +.long 0x00010203,0x10111213,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x10111213,0x10111213 +.long 0x00010203,0x04050607,0x08090a0b,0x10111213 + +#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + +.abiversion 2 +.text + +.globl zfs_sha256_power8 +.type zfs_sha256_power8,@function +.align 6 +zfs_sha256_power8: +.localentry zfs_sha256_power8,0 + + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + li 12,-1 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + or 11,11,11 + + bl .LPICmeup + addi 11,1,79 + li 7,8 + lvsl 31,0,7 + vspltisb 28,0x0f + vxor 31,31,28 + .long 0x7C001E19 + .long 0x7C8A1E19 + vsldoi 1,0,0,4 + vsldoi 2,0,0,8 + vsldoi 3,0,0,12 + vsldoi 5,4,4,4 + vsldoi 6,4,4,8 + vsldoi 7,4,4,12 + li 0,3 + b .Loop +.align 5 +.Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + vadduwm 7,7,28 + lvx 28,10,6 + vperm 8,8,8,31 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 9,8,8,4 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 10,9,9,4 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,4 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vperm 12,12,12,31 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 13,12,12,4 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 14,13,13,4 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,4 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + vperm 16,16,16,31 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + vsldoi 17,16,16,4 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + vsldoi 18,17,17,4 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,4 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + vperm 24,24,24,31 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + vsldoi 25,24,24,4 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + vsldoi 26,25,25,4 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + vsldoi 27,26,26,4 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + mtctr 0 + b .L16_xx +.align 5 +.L16_xx: + .long 0x13CA0682 + vadduwm 9,9,30 + .long 0x13DB7E82 + vadduwm 9,9,30 + vadduwm 9,9,18 + vadduwm 7,7,8 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13CB0682 + vadduwm 10,10,30 + .long 0x13C87E82 + vadduwm 10,10,30 + vadduwm 10,10,19 + vadduwm 6,6,9 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13CC0682 + vadduwm 11,11,30 + .long 0x13C97E82 + vadduwm 11,11,30 + vadduwm 11,11,24 + vadduwm 5,5,10 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13CD0682 + vadduwm 12,12,30 + .long 0x13CA7E82 + vadduwm 12,12,30 + vadduwm 12,12,25 + vadduwm 4,4,11 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13CE0682 + vadduwm 13,13,30 + .long 0x13CB7E82 + vadduwm 13,13,30 + vadduwm 13,13,26 + vadduwm 3,3,12 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13CF0682 + vadduwm 14,14,30 + .long 0x13CC7E82 + vadduwm 14,14,30 + vadduwm 14,14,27 + vadduwm 2,2,13 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D00682 + vadduwm 15,15,30 + .long 0x13CD7E82 + vadduwm 15,15,30 + vadduwm 15,15,8 + vadduwm 1,1,14 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13D10682 + vadduwm 16,16,30 + .long 0x13CE7E82 + vadduwm 16,16,30 + vadduwm 16,16,9 + vadduwm 0,0,15 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + .long 0x13D20682 + vadduwm 17,17,30 + .long 0x13CF7E82 + vadduwm 17,17,30 + vadduwm 17,17,10 + vadduwm 7,7,16 + vsel 29,6,5,4 + vadduwm 6,6,28 + vadduwm 7,7,29 + .long 0x13C4FE82 + vadduwm 7,7,30 + vxor 29,0,1 + vsel 29,1,2,29 + vadduwm 3,3,7 + .long 0x13C08682 + vadduwm 30,30,29 + vadduwm 7,7,30 + lvx 28,26,7 + .long 0x13D30682 + vadduwm 18,18,30 + .long 0x13D07E82 + vadduwm 18,18,30 + vadduwm 18,18,11 + vadduwm 6,6,17 + vsel 29,5,4,3 + vadduwm 5,5,28 + vadduwm 6,6,29 + .long 0x13C3FE82 + vadduwm 6,6,30 + vxor 29,7,0 + vsel 29,0,1,29 + vadduwm 2,2,6 + .long 0x13C78682 + vadduwm 30,30,29 + vadduwm 6,6,30 + lvx 28,27,7 + .long 0x13D80682 + vadduwm 19,19,30 + .long 0x13D17E82 + vadduwm 19,19,30 + vadduwm 19,19,12 + vadduwm 5,5,18 + vsel 29,4,3,2 + vadduwm 4,4,28 + vadduwm 5,5,29 + .long 0x13C2FE82 + vadduwm 5,5,30 + vxor 29,6,7 + vsel 29,7,0,29 + vadduwm 1,1,5 + .long 0x13C68682 + vadduwm 30,30,29 + vadduwm 5,5,30 + lvx 28,28,7 + .long 0x13D90682 + vadduwm 24,24,30 + .long 0x13D27E82 + vadduwm 24,24,30 + vadduwm 24,24,13 + vadduwm 4,4,19 + vsel 29,3,2,1 + vadduwm 3,3,28 + vadduwm 4,4,29 + .long 0x13C1FE82 + vadduwm 4,4,30 + vxor 29,5,6 + vsel 29,6,7,29 + vadduwm 0,0,4 + .long 0x13C58682 + vadduwm 30,30,29 + vadduwm 4,4,30 + lvx 28,29,7 + .long 0x13DA0682 + vadduwm 25,25,30 + .long 0x13D37E82 + vadduwm 25,25,30 + vadduwm 25,25,14 + vadduwm 3,3,24 + vsel 29,2,1,0 + vadduwm 2,2,28 + vadduwm 3,3,29 + .long 0x13C0FE82 + vadduwm 3,3,30 + vxor 29,4,5 + vsel 29,5,6,29 + vadduwm 7,7,3 + .long 0x13C48682 + vadduwm 30,30,29 + vadduwm 3,3,30 + lvx 28,30,7 + .long 0x13DB0682 + vadduwm 26,26,30 + .long 0x13D87E82 + vadduwm 26,26,30 + vadduwm 26,26,15 + vadduwm 2,2,25 + vsel 29,1,0,7 + vadduwm 1,1,28 + vadduwm 2,2,29 + .long 0x13C7FE82 + vadduwm 2,2,30 + vxor 29,3,4 + vsel 29,4,5,29 + vadduwm 6,6,2 + .long 0x13C38682 + vadduwm 30,30,29 + vadduwm 2,2,30 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C80682 + vadduwm 27,27,30 + .long 0x13D97E82 + vadduwm 27,27,30 + vadduwm 27,27,16 + vadduwm 1,1,26 + vsel 29,0,7,6 + vadduwm 0,0,28 + vadduwm 1,1,29 + .long 0x13C6FE82 + vadduwm 1,1,30 + vxor 29,2,3 + vsel 29,3,4,29 + vadduwm 5,5,1 + .long 0x13C28682 + vadduwm 30,30,29 + vadduwm 1,1,30 + lvx 28,0,7 + .long 0x13C90682 + vadduwm 8,8,30 + .long 0x13DA7E82 + vadduwm 8,8,30 + vadduwm 8,8,17 + vadduwm 0,0,27 + vsel 29,7,6,5 + vadduwm 7,7,28 + vadduwm 0,0,29 + .long 0x13C5FE82 + vadduwm 0,0,30 + vxor 29,1,2 + vsel 29,2,3,29 + vadduwm 4,4,0 + .long 0x13C18682 + vadduwm 30,30,29 + vadduwm 0,0,30 + lvx 28,10,7 + bdnz .L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + vadduwm 0,0,10 + lvx 12,26,11 + vadduwm 1,1,11 + lvx 13,27,11 + vadduwm 2,2,12 + lvx 14,28,11 + vadduwm 3,3,13 + lvx 15,29,11 + vadduwm 4,4,14 + lvx 16,30,11 + vadduwm 5,5,15 + lvx 17,31,11 + vadduwm 6,6,16 + vadduwm 7,7,17 + bne .Loop + lvx 8,26,7 + vperm 0,0,1,28 + lvx 9,27,7 + vperm 4,4,5,28 + vperm 0,0,2,8 + vperm 4,4,6,8 + vperm 0,0,3,9 + vperm 4,4,7,9 + .long 0x7C001F19 + .long 0x7C8A1F19 + addi 11,1,207 + mtlr 8 + or 12,12,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 +.size zfs_sha256_power8,.-zfs_sha256_power8 +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0x428a2f98,0x428a2f98,0x428a2f98 +.long 0x71374491,0x71374491,0x71374491,0x71374491 +.long 0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf,0xb5c0fbcf +.long 0xe9b5dba5,0xe9b5dba5,0xe9b5dba5,0xe9b5dba5 +.long 0x3956c25b,0x3956c25b,0x3956c25b,0x3956c25b +.long 0x59f111f1,0x59f111f1,0x59f111f1,0x59f111f1 +.long 0x923f82a4,0x923f82a4,0x923f82a4,0x923f82a4 +.long 0xab1c5ed5,0xab1c5ed5,0xab1c5ed5,0xab1c5ed5 +.long 0xd807aa98,0xd807aa98,0xd807aa98,0xd807aa98 +.long 0x12835b01,0x12835b01,0x12835b01,0x12835b01 +.long 0x243185be,0x243185be,0x243185be,0x243185be +.long 0x550c7dc3,0x550c7dc3,0x550c7dc3,0x550c7dc3 +.long 0x72be5d74,0x72be5d74,0x72be5d74,0x72be5d74 +.long 0x80deb1fe,0x80deb1fe,0x80deb1fe,0x80deb1fe +.long 0x9bdc06a7,0x9bdc06a7,0x9bdc06a7,0x9bdc06a7 +.long 0xc19bf174,0xc19bf174,0xc19bf174,0xc19bf174 +.long 0xe49b69c1,0xe49b69c1,0xe49b69c1,0xe49b69c1 +.long 0xefbe4786,0xefbe4786,0xefbe4786,0xefbe4786 +.long 0x0fc19dc6,0x0fc19dc6,0x0fc19dc6,0x0fc19dc6 +.long 0x240ca1cc,0x240ca1cc,0x240ca1cc,0x240ca1cc +.long 0x2de92c6f,0x2de92c6f,0x2de92c6f,0x2de92c6f +.long 0x4a7484aa,0x4a7484aa,0x4a7484aa,0x4a7484aa +.long 0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc,0x5cb0a9dc +.long 0x76f988da,0x76f988da,0x76f988da,0x76f988da +.long 0x983e5152,0x983e5152,0x983e5152,0x983e5152 +.long 0xa831c66d,0xa831c66d,0xa831c66d,0xa831c66d +.long 0xb00327c8,0xb00327c8,0xb00327c8,0xb00327c8 +.long 0xbf597fc7,0xbf597fc7,0xbf597fc7,0xbf597fc7 +.long 0xc6e00bf3,0xc6e00bf3,0xc6e00bf3,0xc6e00bf3 +.long 0xd5a79147,0xd5a79147,0xd5a79147,0xd5a79147 +.long 0x06ca6351,0x06ca6351,0x06ca6351,0x06ca6351 +.long 0x14292967,0x14292967,0x14292967,0x14292967 +.long 0x27b70a85,0x27b70a85,0x27b70a85,0x27b70a85 +.long 0x2e1b2138,0x2e1b2138,0x2e1b2138,0x2e1b2138 +.long 0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc,0x4d2c6dfc +.long 0x53380d13,0x53380d13,0x53380d13,0x53380d13 +.long 0x650a7354,0x650a7354,0x650a7354,0x650a7354 +.long 0x766a0abb,0x766a0abb,0x766a0abb,0x766a0abb +.long 0x81c2c92e,0x81c2c92e,0x81c2c92e,0x81c2c92e +.long 0x92722c85,0x92722c85,0x92722c85,0x92722c85 +.long 0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1,0xa2bfe8a1 +.long 0xa81a664b,0xa81a664b,0xa81a664b,0xa81a664b +.long 0xc24b8b70,0xc24b8b70,0xc24b8b70,0xc24b8b70 +.long 0xc76c51a3,0xc76c51a3,0xc76c51a3,0xc76c51a3 +.long 0xd192e819,0xd192e819,0xd192e819,0xd192e819 +.long 0xd6990624,0xd6990624,0xd6990624,0xd6990624 +.long 0xf40e3585,0xf40e3585,0xf40e3585,0xf40e3585 +.long 0x106aa070,0x106aa070,0x106aa070,0x106aa070 +.long 0x19a4c116,0x19a4c116,0x19a4c116,0x19a4c116 +.long 0x1e376c08,0x1e376c08,0x1e376c08,0x1e376c08 +.long 0x2748774c,0x2748774c,0x2748774c,0x2748774c +.long 0x34b0bcb5,0x34b0bcb5,0x34b0bcb5,0x34b0bcb5 +.long 0x391c0cb3,0x391c0cb3,0x391c0cb3,0x391c0cb3 +.long 0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a,0x4ed8aa4a +.long 0x5b9cca4f,0x5b9cca4f,0x5b9cca4f,0x5b9cca4f +.long 0x682e6ff3,0x682e6ff3,0x682e6ff3,0x682e6ff3 +.long 0x748f82ee,0x748f82ee,0x748f82ee,0x748f82ee +.long 0x78a5636f,0x78a5636f,0x78a5636f,0x78a5636f +.long 0x84c87814,0x84c87814,0x84c87814,0x84c87814 +.long 0x8cc70208,0x8cc70208,0x8cc70208,0x8cc70208 +.long 0x90befffa,0x90befffa,0x90befffa,0x90befffa +.long 0xa4506ceb,0xa4506ceb,0xa4506ceb,0xa4506ceb +.long 0xbef9a3f7,0xbef9a3f7,0xbef9a3f7,0xbef9a3f7 +.long 0xc67178f2,0xc67178f2,0xc67178f2,0xc67178f2 +.long 0,0,0,0 +.long 0x10111213,0x10111213,0x10111213,0x00010203 +.long 0x10111213,0x10111213,0x04050607,0x00010203 +.long 0x10111213,0x08090a0b,0x04050607,0x00010203 +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-ppc.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-ppc.S new file mode 100644 index 000000000000..d039bc36ee11 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha256-ppc.S @@ -0,0 +1,2727 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if (defined(__PPC64__) && defined(__BIG_ENDIAN__)) + +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.text + +.globl zfs_sha256_ppc +.globl .zfs_sha256_ppc +.type zfs_sha256_ppc,@function +.section ".opd","aw" +.align 3 +zfs_sha256_ppc: +.quad .zfs_sha256_ppc,.TOC.@tocbase,0 +.previous +.align 6 +.zfs_sha256_ppc: +#else +.abiversion 2 +.text + +.globl zfs_sha256_ppc +.type zfs_sha256_ppc,@function +.align 6 +zfs_sha256_ppc: +.localentry zfs_sha256_ppc,0 +#endif + stdu 1,-320(1) + mflr 0 + sldi 5,5,6 + + std 3,144(1) + + std 14,176(1) + std 15,184(1) + std 16,192(1) + std 17,200(1) + std 18,208(1) + std 19,216(1) + std 20,224(1) + std 21,232(1) + std 22,240(1) + std 23,248(1) + std 24,256(1) + std 25,264(1) + std 26,272(1) + std 27,280(1) + std 28,288(1) + std 29,296(1) + std 30,304(1) + std 31,312(1) + std 0,336(1) + lwz 8,0(3) + mr 31,4 + lwz 9,4(3) + lwz 10,8(3) + lwz 11,12(3) + lwz 12,16(3) + lwz 6,20(3) + lwz 14,24(3) + lwz 15,28(3) + bl .LPICmeup +.LPICedup: + andi. 0,31,3 + bne .Lunaligned +.Laligned: + add 5,31,5 + std 5,128(1) + std 31,136(1) + bl .Lsha2_block_private + b .Ldone + +.align 4 +.Lunaligned: + subfic 0,31,4096 + andi. 0,0,4032 + beq .Lcross_page + cmpld 5,0 + ble .Laligned + subfc 5,0,5 + add 0,31,0 + std 5,120(1) + std 0,128(1) + std 31,136(1) + bl .Lsha2_block_private + + ld 5,120(1) +.Lcross_page: + li 0,16 + mtctr 0 + addi 20,1,48 +.Lmemcpy: + lbz 16,0(31) + lbz 17,1(31) + lbz 18,2(31) + lbz 19,3(31) + addi 31,31,4 + stb 16,0(20) + stb 17,1(20) + stb 18,2(20) + stb 19,3(20) + addi 20,20,4 + bdnz .Lmemcpy + std 31,112(1) + addi 0,1,112 + addi 31,1,48 + std 5,120(1) + std 0,128(1) + std 31,136(1) + bl .Lsha2_block_private + ld 31,112(1) + ld 5,120(1) + addic. 5,5,-64 + bne .Lunaligned + +.Ldone: + ld 0,336(1) + ld 14,176(1) + ld 15,184(1) + ld 16,192(1) + ld 17,200(1) + ld 18,208(1) + ld 19,216(1) + ld 20,224(1) + ld 21,232(1) + ld 22,240(1) + ld 23,248(1) + ld 24,256(1) + ld 25,264(1) + ld 26,272(1) + ld 27,280(1) + ld 28,288(1) + ld 29,296(1) + ld 30,304(1) + ld 31,312(1) + mtlr 0 + addi 1,1,320 + blr +.long 0 +.byte 0,12,4,1,0x80,18,3,0 +.long 0 +.align 4 +.Lsha2_block_private: + lwz 0,0(7) + lwz 16,0(31) + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + lwz 0,4(7) + add 15,15,3 + add 15,15,5 + + lwz 17,4(31) + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + lwz 0,8(7) + add 14,14,3 + add 14,14,5 + + lwz 18,8(31) + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + lwz 0,12(7) + add 6,6,3 + add 6,6,5 + + lwz 19,12(31) + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + lwz 0,16(7) + add 12,12,3 + add 12,12,5 + + lwz 20,16(31) + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + lwz 0,20(7) + add 11,11,3 + add 11,11,5 + + lwz 21,20(31) + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + lwz 0,24(7) + add 10,10,3 + add 10,10,5 + + lwz 22,24(31) + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + lwz 0,28(7) + add 9,9,3 + add 9,9,5 + + lwz 23,28(31) + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + lwz 0,32(7) + add 8,8,3 + add 8,8,5 + + lwz 24,32(31) + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + lwz 0,36(7) + add 15,15,3 + add 15,15,5 + + lwz 25,36(31) + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + lwz 0,40(7) + add 14,14,3 + add 14,14,5 + + lwz 26,40(31) + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + lwz 0,44(7) + add 6,6,3 + add 6,6,5 + + lwz 27,44(31) + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + lwz 0,48(7) + add 12,12,3 + add 12,12,5 + + lwz 28,48(31) + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + lwz 0,52(7) + add 11,11,3 + add 11,11,5 + + lwz 29,52(31) + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + lwz 0,56(7) + add 10,10,3 + add 10,10,5 + + lwz 30,56(31) + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + lwz 0,60(7) + add 9,9,3 + add 9,9,5 + + lwz 31,60(31) + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + li 5,3 + mtctr 5 +.align 4 +.Lrounds: + addi 7,7,64 + rotrwi 3,17,7 + rotrwi 4,17,18 + rotrwi 5,30,17 + rotrwi 0,30,19 + xor 3,3,4 + srwi 4,17,3 + xor 5,5,0 + srwi 0,30,10 + add 16,16,25 + xor 3,3,4 + xor 5,5,0 + lwz 0,0(7) + add 16,16,3 + add 16,16,5 + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrwi 3,18,7 + rotrwi 4,18,18 + rotrwi 5,31,17 + rotrwi 0,31,19 + xor 3,3,4 + srwi 4,18,3 + xor 5,5,0 + srwi 0,31,10 + add 17,17,26 + xor 3,3,4 + xor 5,5,0 + lwz 0,4(7) + add 17,17,3 + add 17,17,5 + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrwi 3,19,7 + rotrwi 4,19,18 + rotrwi 5,16,17 + rotrwi 0,16,19 + xor 3,3,4 + srwi 4,19,3 + xor 5,5,0 + srwi 0,16,10 + add 18,18,27 + xor 3,3,4 + xor 5,5,0 + lwz 0,8(7) + add 18,18,3 + add 18,18,5 + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrwi 3,20,7 + rotrwi 4,20,18 + rotrwi 5,17,17 + rotrwi 0,17,19 + xor 3,3,4 + srwi 4,20,3 + xor 5,5,0 + srwi 0,17,10 + add 19,19,28 + xor 3,3,4 + xor 5,5,0 + lwz 0,12(7) + add 19,19,3 + add 19,19,5 + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrwi 3,21,7 + rotrwi 4,21,18 + rotrwi 5,18,17 + rotrwi 0,18,19 + xor 3,3,4 + srwi 4,21,3 + xor 5,5,0 + srwi 0,18,10 + add 20,20,29 + xor 3,3,4 + xor 5,5,0 + lwz 0,16(7) + add 20,20,3 + add 20,20,5 + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrwi 3,22,7 + rotrwi 4,22,18 + rotrwi 5,19,17 + rotrwi 0,19,19 + xor 3,3,4 + srwi 4,22,3 + xor 5,5,0 + srwi 0,19,10 + add 21,21,30 + xor 3,3,4 + xor 5,5,0 + lwz 0,20(7) + add 21,21,3 + add 21,21,5 + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrwi 3,23,7 + rotrwi 4,23,18 + rotrwi 5,20,17 + rotrwi 0,20,19 + xor 3,3,4 + srwi 4,23,3 + xor 5,5,0 + srwi 0,20,10 + add 22,22,31 + xor 3,3,4 + xor 5,5,0 + lwz 0,24(7) + add 22,22,3 + add 22,22,5 + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrwi 3,24,7 + rotrwi 4,24,18 + rotrwi 5,21,17 + rotrwi 0,21,19 + xor 3,3,4 + srwi 4,24,3 + xor 5,5,0 + srwi 0,21,10 + add 23,23,16 + xor 3,3,4 + xor 5,5,0 + lwz 0,28(7) + add 23,23,3 + add 23,23,5 + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + rotrwi 3,25,7 + rotrwi 4,25,18 + rotrwi 5,22,17 + rotrwi 0,22,19 + xor 3,3,4 + srwi 4,25,3 + xor 5,5,0 + srwi 0,22,10 + add 24,24,17 + xor 3,3,4 + xor 5,5,0 + lwz 0,32(7) + add 24,24,3 + add 24,24,5 + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrwi 3,26,7 + rotrwi 4,26,18 + rotrwi 5,23,17 + rotrwi 0,23,19 + xor 3,3,4 + srwi 4,26,3 + xor 5,5,0 + srwi 0,23,10 + add 25,25,18 + xor 3,3,4 + xor 5,5,0 + lwz 0,36(7) + add 25,25,3 + add 25,25,5 + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrwi 3,27,7 + rotrwi 4,27,18 + rotrwi 5,24,17 + rotrwi 0,24,19 + xor 3,3,4 + srwi 4,27,3 + xor 5,5,0 + srwi 0,24,10 + add 26,26,19 + xor 3,3,4 + xor 5,5,0 + lwz 0,40(7) + add 26,26,3 + add 26,26,5 + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrwi 3,28,7 + rotrwi 4,28,18 + rotrwi 5,25,17 + rotrwi 0,25,19 + xor 3,3,4 + srwi 4,28,3 + xor 5,5,0 + srwi 0,25,10 + add 27,27,20 + xor 3,3,4 + xor 5,5,0 + lwz 0,44(7) + add 27,27,3 + add 27,27,5 + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrwi 3,29,7 + rotrwi 4,29,18 + rotrwi 5,26,17 + rotrwi 0,26,19 + xor 3,3,4 + srwi 4,29,3 + xor 5,5,0 + srwi 0,26,10 + add 28,28,21 + xor 3,3,4 + xor 5,5,0 + lwz 0,48(7) + add 28,28,3 + add 28,28,5 + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrwi 3,30,7 + rotrwi 4,30,18 + rotrwi 5,27,17 + rotrwi 0,27,19 + xor 3,3,4 + srwi 4,30,3 + xor 5,5,0 + srwi 0,27,10 + add 29,29,22 + xor 3,3,4 + xor 5,5,0 + lwz 0,52(7) + add 29,29,3 + add 29,29,5 + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrwi 3,31,7 + rotrwi 4,31,18 + rotrwi 5,28,17 + rotrwi 0,28,19 + xor 3,3,4 + srwi 4,31,3 + xor 5,5,0 + srwi 0,28,10 + add 30,30,23 + xor 3,3,4 + xor 5,5,0 + lwz 0,56(7) + add 30,30,3 + add 30,30,5 + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrwi 3,16,7 + rotrwi 4,16,18 + rotrwi 5,29,17 + rotrwi 0,29,19 + xor 3,3,4 + srwi 4,16,3 + xor 5,5,0 + srwi 0,29,10 + add 31,31,24 + xor 3,3,4 + xor 5,5,0 + lwz 0,60(7) + add 31,31,3 + add 31,31,5 + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + bdnz .Lrounds + + ld 3,144(1) + ld 31,136(1) + ld 5,128(1) + subi 7,7,192 + + lwz 16,0(3) + lwz 17,4(3) + lwz 18,8(3) + lwz 19,12(3) + lwz 20,16(3) + lwz 21,20(3) + lwz 22,24(3) + addi 31,31,64 + lwz 23,28(3) + add 8,8,16 + add 9,9,17 + std 31,136(1) + add 10,10,18 + stw 8,0(3) + add 11,11,19 + stw 9,4(3) + add 12,12,20 + stw 10,8(3) + add 6,6,21 + stw 11,12(3) + add 14,14,22 + stw 12,16(3) + add 15,15,23 + stw 6,20(3) + stw 14,24(3) + cmpld 31,5 + stw 15,28(3) + bne .Lsha2_block_private + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.size .zfs_sha256_ppc,.-.zfs_sha256_ppc +.size zfs_sha256_ppc,.-.zfs_sha256_ppc +#else +.size zfs_sha256_ppc,.-zfs_sha256_ppc +#endif +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 7 + addi 7,7,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + +.abiversion 2 +.text + +.globl zfs_sha256_ppc +.type zfs_sha256_ppc,@function +.align 6 +zfs_sha256_ppc: +.localentry zfs_sha256_ppc,0 + + stdu 1,-320(1) + mflr 0 + sldi 5,5,6 + + std 3,144(1) + + std 14,176(1) + std 15,184(1) + std 16,192(1) + std 17,200(1) + std 18,208(1) + std 19,216(1) + std 20,224(1) + std 21,232(1) + std 22,240(1) + std 23,248(1) + std 24,256(1) + std 25,264(1) + std 26,272(1) + std 27,280(1) + std 28,288(1) + std 29,296(1) + std 30,304(1) + std 31,312(1) + std 0,336(1) + lwz 8,0(3) + mr 31,4 + lwz 9,4(3) + lwz 10,8(3) + lwz 11,12(3) + lwz 12,16(3) + lwz 6,20(3) + lwz 14,24(3) + lwz 15,28(3) + bl .LPICmeup +.LPICedup: + andi. 0,31,3 + bne .Lunaligned +.Laligned: + add 5,31,5 + std 5,128(1) + std 31,136(1) + bl .Lsha2_block_private + b .Ldone + +.align 4 +.Lunaligned: + subfic 0,31,4096 + andi. 0,0,4032 + beq .Lcross_page + cmpld 5,0 + ble .Laligned + subfc 5,0,5 + add 0,31,0 + std 5,120(1) + std 0,128(1) + std 31,136(1) + bl .Lsha2_block_private + + ld 5,120(1) +.Lcross_page: + li 0,16 + mtctr 0 + addi 20,1,48 +.Lmemcpy: + lbz 16,0(31) + lbz 17,1(31) + lbz 18,2(31) + lbz 19,3(31) + addi 31,31,4 + stb 16,0(20) + stb 17,1(20) + stb 18,2(20) + stb 19,3(20) + addi 20,20,4 + bdnz .Lmemcpy + std 31,112(1) + addi 0,1,112 + addi 31,1,48 + std 5,120(1) + std 0,128(1) + std 31,136(1) + bl .Lsha2_block_private + ld 31,112(1) + ld 5,120(1) + addic. 5,5,-64 + bne .Lunaligned + +.Ldone: + ld 0,336(1) + ld 14,176(1) + ld 15,184(1) + ld 16,192(1) + ld 17,200(1) + ld 18,208(1) + ld 19,216(1) + ld 20,224(1) + ld 21,232(1) + ld 22,240(1) + ld 23,248(1) + ld 24,256(1) + ld 25,264(1) + ld 26,272(1) + ld 27,280(1) + ld 28,288(1) + ld 29,296(1) + ld 30,304(1) + ld 31,312(1) + mtlr 0 + addi 1,1,320 + blr +.long 0 +.byte 0,12,4,1,0x80,18,3,0 +.long 0 +.align 4 +.Lsha2_block_private: + lwz 0,0(7) + lwz 3,0(31) + rotlwi 16,3,8 + rlwimi 16,3,24,0,7 + rlwimi 16,3,24,16,23 + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + lwz 0,4(7) + add 15,15,3 + add 15,15,5 + + lwz 3,4(31) + rotlwi 17,3,8 + rlwimi 17,3,24,0,7 + rlwimi 17,3,24,16,23 + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + lwz 0,8(7) + add 14,14,3 + add 14,14,5 + + lwz 3,8(31) + rotlwi 18,3,8 + rlwimi 18,3,24,0,7 + rlwimi 18,3,24,16,23 + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + lwz 0,12(7) + add 6,6,3 + add 6,6,5 + + lwz 3,12(31) + rotlwi 19,3,8 + rlwimi 19,3,24,0,7 + rlwimi 19,3,24,16,23 + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + lwz 0,16(7) + add 12,12,3 + add 12,12,5 + + lwz 3,16(31) + rotlwi 20,3,8 + rlwimi 20,3,24,0,7 + rlwimi 20,3,24,16,23 + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + lwz 0,20(7) + add 11,11,3 + add 11,11,5 + + lwz 3,20(31) + rotlwi 21,3,8 + rlwimi 21,3,24,0,7 + rlwimi 21,3,24,16,23 + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + lwz 0,24(7) + add 10,10,3 + add 10,10,5 + + lwz 3,24(31) + rotlwi 22,3,8 + rlwimi 22,3,24,0,7 + rlwimi 22,3,24,16,23 + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + lwz 0,28(7) + add 9,9,3 + add 9,9,5 + + lwz 3,28(31) + rotlwi 23,3,8 + rlwimi 23,3,24,0,7 + rlwimi 23,3,24,16,23 + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + lwz 0,32(7) + add 8,8,3 + add 8,8,5 + + lwz 3,32(31) + rotlwi 24,3,8 + rlwimi 24,3,24,0,7 + rlwimi 24,3,24,16,23 + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + lwz 0,36(7) + add 15,15,3 + add 15,15,5 + + lwz 3,36(31) + rotlwi 25,3,8 + rlwimi 25,3,24,0,7 + rlwimi 25,3,24,16,23 + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + lwz 0,40(7) + add 14,14,3 + add 14,14,5 + + lwz 3,40(31) + rotlwi 26,3,8 + rlwimi 26,3,24,0,7 + rlwimi 26,3,24,16,23 + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + lwz 0,44(7) + add 6,6,3 + add 6,6,5 + + lwz 3,44(31) + rotlwi 27,3,8 + rlwimi 27,3,24,0,7 + rlwimi 27,3,24,16,23 + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + lwz 0,48(7) + add 12,12,3 + add 12,12,5 + + lwz 3,48(31) + rotlwi 28,3,8 + rlwimi 28,3,24,0,7 + rlwimi 28,3,24,16,23 + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + lwz 0,52(7) + add 11,11,3 + add 11,11,5 + + lwz 3,52(31) + rotlwi 29,3,8 + rlwimi 29,3,24,0,7 + rlwimi 29,3,24,16,23 + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + lwz 0,56(7) + add 10,10,3 + add 10,10,5 + + lwz 3,56(31) + rotlwi 30,3,8 + rlwimi 30,3,24,0,7 + rlwimi 30,3,24,16,23 + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + lwz 0,60(7) + add 9,9,3 + add 9,9,5 + + lwz 3,60(31) + rotlwi 31,3,8 + rlwimi 31,3,24,0,7 + rlwimi 31,3,24,16,23 + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + li 5,3 + mtctr 5 +.align 4 +.Lrounds: + addi 7,7,64 + rotrwi 3,17,7 + rotrwi 4,17,18 + rotrwi 5,30,17 + rotrwi 0,30,19 + xor 3,3,4 + srwi 4,17,3 + xor 5,5,0 + srwi 0,30,10 + add 16,16,25 + xor 3,3,4 + xor 5,5,0 + lwz 0,0(7) + add 16,16,3 + add 16,16,5 + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrwi 3,18,7 + rotrwi 4,18,18 + rotrwi 5,31,17 + rotrwi 0,31,19 + xor 3,3,4 + srwi 4,18,3 + xor 5,5,0 + srwi 0,31,10 + add 17,17,26 + xor 3,3,4 + xor 5,5,0 + lwz 0,4(7) + add 17,17,3 + add 17,17,5 + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrwi 3,19,7 + rotrwi 4,19,18 + rotrwi 5,16,17 + rotrwi 0,16,19 + xor 3,3,4 + srwi 4,19,3 + xor 5,5,0 + srwi 0,16,10 + add 18,18,27 + xor 3,3,4 + xor 5,5,0 + lwz 0,8(7) + add 18,18,3 + add 18,18,5 + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrwi 3,20,7 + rotrwi 4,20,18 + rotrwi 5,17,17 + rotrwi 0,17,19 + xor 3,3,4 + srwi 4,20,3 + xor 5,5,0 + srwi 0,17,10 + add 19,19,28 + xor 3,3,4 + xor 5,5,0 + lwz 0,12(7) + add 19,19,3 + add 19,19,5 + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrwi 3,21,7 + rotrwi 4,21,18 + rotrwi 5,18,17 + rotrwi 0,18,19 + xor 3,3,4 + srwi 4,21,3 + xor 5,5,0 + srwi 0,18,10 + add 20,20,29 + xor 3,3,4 + xor 5,5,0 + lwz 0,16(7) + add 20,20,3 + add 20,20,5 + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrwi 3,22,7 + rotrwi 4,22,18 + rotrwi 5,19,17 + rotrwi 0,19,19 + xor 3,3,4 + srwi 4,22,3 + xor 5,5,0 + srwi 0,19,10 + add 21,21,30 + xor 3,3,4 + xor 5,5,0 + lwz 0,20(7) + add 21,21,3 + add 21,21,5 + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrwi 3,23,7 + rotrwi 4,23,18 + rotrwi 5,20,17 + rotrwi 0,20,19 + xor 3,3,4 + srwi 4,23,3 + xor 5,5,0 + srwi 0,20,10 + add 22,22,31 + xor 3,3,4 + xor 5,5,0 + lwz 0,24(7) + add 22,22,3 + add 22,22,5 + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrwi 3,24,7 + rotrwi 4,24,18 + rotrwi 5,21,17 + rotrwi 0,21,19 + xor 3,3,4 + srwi 4,24,3 + xor 5,5,0 + srwi 0,21,10 + add 23,23,16 + xor 3,3,4 + xor 5,5,0 + lwz 0,28(7) + add 23,23,3 + add 23,23,5 + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + rotrwi 3,25,7 + rotrwi 4,25,18 + rotrwi 5,22,17 + rotrwi 0,22,19 + xor 3,3,4 + srwi 4,25,3 + xor 5,5,0 + srwi 0,22,10 + add 24,24,17 + xor 3,3,4 + xor 5,5,0 + lwz 0,32(7) + add 24,24,3 + add 24,24,5 + rotrwi 3,12,6 + rotrwi 4,12,11 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrwi 4,4,14 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrwi 3,8,2 + rotrwi 4,8,13 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrwi 3,26,7 + rotrwi 4,26,18 + rotrwi 5,23,17 + rotrwi 0,23,19 + xor 3,3,4 + srwi 4,26,3 + xor 5,5,0 + srwi 0,23,10 + add 25,25,18 + xor 3,3,4 + xor 5,5,0 + lwz 0,36(7) + add 25,25,3 + add 25,25,5 + rotrwi 3,11,6 + rotrwi 4,11,11 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrwi 4,4,14 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrwi 3,15,2 + rotrwi 4,15,13 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrwi 3,27,7 + rotrwi 4,27,18 + rotrwi 5,24,17 + rotrwi 0,24,19 + xor 3,3,4 + srwi 4,27,3 + xor 5,5,0 + srwi 0,24,10 + add 26,26,19 + xor 3,3,4 + xor 5,5,0 + lwz 0,40(7) + add 26,26,3 + add 26,26,5 + rotrwi 3,10,6 + rotrwi 4,10,11 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrwi 4,4,14 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrwi 3,14,2 + rotrwi 4,14,13 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrwi 3,28,7 + rotrwi 4,28,18 + rotrwi 5,25,17 + rotrwi 0,25,19 + xor 3,3,4 + srwi 4,28,3 + xor 5,5,0 + srwi 0,25,10 + add 27,27,20 + xor 3,3,4 + xor 5,5,0 + lwz 0,44(7) + add 27,27,3 + add 27,27,5 + rotrwi 3,9,6 + rotrwi 4,9,11 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrwi 4,4,14 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrwi 3,6,2 + rotrwi 4,6,13 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrwi 3,29,7 + rotrwi 4,29,18 + rotrwi 5,26,17 + rotrwi 0,26,19 + xor 3,3,4 + srwi 4,29,3 + xor 5,5,0 + srwi 0,26,10 + add 28,28,21 + xor 3,3,4 + xor 5,5,0 + lwz 0,48(7) + add 28,28,3 + add 28,28,5 + rotrwi 3,8,6 + rotrwi 4,8,11 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrwi 4,4,14 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrwi 3,12,2 + rotrwi 4,12,13 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrwi 3,30,7 + rotrwi 4,30,18 + rotrwi 5,27,17 + rotrwi 0,27,19 + xor 3,3,4 + srwi 4,30,3 + xor 5,5,0 + srwi 0,27,10 + add 29,29,22 + xor 3,3,4 + xor 5,5,0 + lwz 0,52(7) + add 29,29,3 + add 29,29,5 + rotrwi 3,15,6 + rotrwi 4,15,11 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrwi 4,4,14 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrwi 3,11,2 + rotrwi 4,11,13 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrwi 3,31,7 + rotrwi 4,31,18 + rotrwi 5,28,17 + rotrwi 0,28,19 + xor 3,3,4 + srwi 4,31,3 + xor 5,5,0 + srwi 0,28,10 + add 30,30,23 + xor 3,3,4 + xor 5,5,0 + lwz 0,56(7) + add 30,30,3 + add 30,30,5 + rotrwi 3,14,6 + rotrwi 4,14,11 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrwi 4,4,14 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrwi 3,10,2 + rotrwi 4,10,13 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrwi 3,16,7 + rotrwi 4,16,18 + rotrwi 5,29,17 + rotrwi 0,29,19 + xor 3,3,4 + srwi 4,16,3 + xor 5,5,0 + srwi 0,29,10 + add 31,31,24 + xor 3,3,4 + xor 5,5,0 + lwz 0,60(7) + add 31,31,3 + add 31,31,5 + rotrwi 3,6,6 + rotrwi 4,6,11 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrwi 4,4,14 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrwi 3,9,2 + rotrwi 4,9,13 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrwi 4,4,9 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + bdnz .Lrounds + + ld 3,144(1) + ld 31,136(1) + ld 5,128(1) + subi 7,7,192 + + lwz 16,0(3) + lwz 17,4(3) + lwz 18,8(3) + lwz 19,12(3) + lwz 20,16(3) + lwz 21,20(3) + lwz 22,24(3) + addi 31,31,64 + lwz 23,28(3) + add 8,8,16 + add 9,9,17 + std 31,136(1) + add 10,10,18 + stw 8,0(3) + add 11,11,19 + stw 9,4(3) + add 12,12,20 + stw 10,8(3) + add 6,6,21 + stw 11,12(3) + add 14,14,22 + stw 12,16(3) + add 15,15,23 + stw 6,20(3) + stw 14,24(3) + cmpld 31,5 + stw 15,28(3) + bne .Lsha2_block_private + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size zfs_sha256_ppc,.-zfs_sha256_ppc +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 7 + addi 7,7,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-p8.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-p8.S new file mode 100644 index 000000000000..2409c53385d6 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-p8.S @@ -0,0 +1,1722 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if (defined(__PPC64__) && defined(__BIG_ENDIAN__)) + +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.text + +.globl zfs_sha512_power8 +.globl .zfs_sha512_power8 +.type zfs_sha512_power8,@function +.section ".opd","aw" +.align 3 +zfs_sha512_power8: +.quad .zfs_sha512_power8,.TOC.@tocbase,0 +.previous +.align 6 +.zfs_sha512_power8: +#else +.abiversion 2 +.text + +.globl zfs_sha512_power8 +.type zfs_sha512_power8,@function +.align 6 +zfs_sha512_power8: +.localentry zfs_sha512_power8,0 +#endif + + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + mfspr 12,256 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + mtspr 256,11 + + bl .LPICmeup + addi 11,1,79 + .long 0x7C001E99 + .long 0x7C4A1E99 + .long 0x7C9A1E99 + vsldoi 1,0,0,8 + .long 0x7CDB1E99 + vsldoi 3,2,2,8 + vsldoi 5,4,4,8 + vsldoi 7,6,6,8 + li 0,4 + b .Loop +.align 5 +.Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + .long 0x10E7E0C0 + lvx 28,10,6 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7D402699 + addi 4,4,16 + vsldoi 9,8,8,8 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,8 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7DC02699 + addi 4,4,16 + vsldoi 13,12,12,8 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,8 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7E402699 + addi 4,4,16 + vsldoi 17,16,16,8 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,8 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7F402699 + addi 4,4,16 + vsldoi 25,24,24,8 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + vsldoi 27,26,26,8 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + mtctr 0 + b .L16_xx +.align 5 +.L16_xx: + .long 0x13CA06C2 + .long 0x1129F0C0 + .long 0x13DB7EC2 + .long 0x1129F0C0 + .long 0x112990C0 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13CB06C2 + .long 0x114AF0C0 + .long 0x13C87EC2 + .long 0x114AF0C0 + .long 0x114A98C0 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13CC06C2 + .long 0x116BF0C0 + .long 0x13C97EC2 + .long 0x116BF0C0 + .long 0x116BC0C0 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13CD06C2 + .long 0x118CF0C0 + .long 0x13CA7EC2 + .long 0x118CF0C0 + .long 0x118CC8C0 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13CE06C2 + .long 0x11ADF0C0 + .long 0x13CB7EC2 + .long 0x11ADF0C0 + .long 0x11ADD0C0 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13CF06C2 + .long 0x11CEF0C0 + .long 0x13CC7EC2 + .long 0x11CEF0C0 + .long 0x11CED8C0 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D006C2 + .long 0x11EFF0C0 + .long 0x13CD7EC2 + .long 0x11EFF0C0 + .long 0x11EF40C0 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13D106C2 + .long 0x1210F0C0 + .long 0x13CE7EC2 + .long 0x1210F0C0 + .long 0x121048C0 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x13D206C2 + .long 0x1231F0C0 + .long 0x13CF7EC2 + .long 0x1231F0C0 + .long 0x123150C0 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13D306C2 + .long 0x1252F0C0 + .long 0x13D07EC2 + .long 0x1252F0C0 + .long 0x125258C0 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13D806C2 + .long 0x1273F0C0 + .long 0x13D17EC2 + .long 0x1273F0C0 + .long 0x127360C0 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13D906C2 + .long 0x1318F0C0 + .long 0x13D27EC2 + .long 0x1318F0C0 + .long 0x131868C0 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13DA06C2 + .long 0x1339F0C0 + .long 0x13D37EC2 + .long 0x1339F0C0 + .long 0x133970C0 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13DB06C2 + .long 0x135AF0C0 + .long 0x13D87EC2 + .long 0x135AF0C0 + .long 0x135A78C0 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C806C2 + .long 0x137BF0C0 + .long 0x13D97EC2 + .long 0x137BF0C0 + .long 0x137B80C0 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + bdnz .L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + .long 0x100050C0 + lvx 12,26,11 + .long 0x102158C0 + lvx 13,27,11 + .long 0x104260C0 + lvx 14,28,11 + .long 0x106368C0 + lvx 15,29,11 + .long 0x108470C0 + lvx 16,30,11 + .long 0x10A578C0 + lvx 17,31,11 + .long 0x10C680C0 + .long 0x10E788C0 + bne .Loop + vperm 0,0,1,28 + vperm 2,2,3,28 + vperm 4,4,5,28 + vperm 6,6,7,28 + .long 0x7C001F99 + .long 0x7C4A1F99 + .long 0x7C9A1F99 + .long 0x7CDB1F99 + addi 11,1,207 + mtlr 8 + mtspr 256,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.size .zfs_sha512_power8,.-.zfs_sha512_power8 +.size zfs_sha512_power8,.-.zfs_sha512_power8 +#else +.size zfs_sha512_power8,.-zfs_sha512_power8 +#endif +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0xd728ae22 +.long 0x428a2f98,0xd728ae22 +.long 0x71374491,0x23ef65cd +.long 0x71374491,0x23ef65cd +.long 0xb5c0fbcf,0xec4d3b2f +.long 0xb5c0fbcf,0xec4d3b2f +.long 0xe9b5dba5,0x8189dbbc +.long 0xe9b5dba5,0x8189dbbc +.long 0x3956c25b,0xf348b538 +.long 0x3956c25b,0xf348b538 +.long 0x59f111f1,0xb605d019 +.long 0x59f111f1,0xb605d019 +.long 0x923f82a4,0xaf194f9b +.long 0x923f82a4,0xaf194f9b +.long 0xab1c5ed5,0xda6d8118 +.long 0xab1c5ed5,0xda6d8118 +.long 0xd807aa98,0xa3030242 +.long 0xd807aa98,0xa3030242 +.long 0x12835b01,0x45706fbe +.long 0x12835b01,0x45706fbe +.long 0x243185be,0x4ee4b28c +.long 0x243185be,0x4ee4b28c +.long 0x550c7dc3,0xd5ffb4e2 +.long 0x550c7dc3,0xd5ffb4e2 +.long 0x72be5d74,0xf27b896f +.long 0x72be5d74,0xf27b896f +.long 0x80deb1fe,0x3b1696b1 +.long 0x80deb1fe,0x3b1696b1 +.long 0x9bdc06a7,0x25c71235 +.long 0x9bdc06a7,0x25c71235 +.long 0xc19bf174,0xcf692694 +.long 0xc19bf174,0xcf692694 +.long 0xe49b69c1,0x9ef14ad2 +.long 0xe49b69c1,0x9ef14ad2 +.long 0xefbe4786,0x384f25e3 +.long 0xefbe4786,0x384f25e3 +.long 0x0fc19dc6,0x8b8cd5b5 +.long 0x0fc19dc6,0x8b8cd5b5 +.long 0x240ca1cc,0x77ac9c65 +.long 0x240ca1cc,0x77ac9c65 +.long 0x2de92c6f,0x592b0275 +.long 0x2de92c6f,0x592b0275 +.long 0x4a7484aa,0x6ea6e483 +.long 0x4a7484aa,0x6ea6e483 +.long 0x5cb0a9dc,0xbd41fbd4 +.long 0x5cb0a9dc,0xbd41fbd4 +.long 0x76f988da,0x831153b5 +.long 0x76f988da,0x831153b5 +.long 0x983e5152,0xee66dfab +.long 0x983e5152,0xee66dfab +.long 0xa831c66d,0x2db43210 +.long 0xa831c66d,0x2db43210 +.long 0xb00327c8,0x98fb213f +.long 0xb00327c8,0x98fb213f +.long 0xbf597fc7,0xbeef0ee4 +.long 0xbf597fc7,0xbeef0ee4 +.long 0xc6e00bf3,0x3da88fc2 +.long 0xc6e00bf3,0x3da88fc2 +.long 0xd5a79147,0x930aa725 +.long 0xd5a79147,0x930aa725 +.long 0x06ca6351,0xe003826f +.long 0x06ca6351,0xe003826f +.long 0x14292967,0x0a0e6e70 +.long 0x14292967,0x0a0e6e70 +.long 0x27b70a85,0x46d22ffc +.long 0x27b70a85,0x46d22ffc +.long 0x2e1b2138,0x5c26c926 +.long 0x2e1b2138,0x5c26c926 +.long 0x4d2c6dfc,0x5ac42aed +.long 0x4d2c6dfc,0x5ac42aed +.long 0x53380d13,0x9d95b3df +.long 0x53380d13,0x9d95b3df +.long 0x650a7354,0x8baf63de +.long 0x650a7354,0x8baf63de +.long 0x766a0abb,0x3c77b2a8 +.long 0x766a0abb,0x3c77b2a8 +.long 0x81c2c92e,0x47edaee6 +.long 0x81c2c92e,0x47edaee6 +.long 0x92722c85,0x1482353b +.long 0x92722c85,0x1482353b +.long 0xa2bfe8a1,0x4cf10364 +.long 0xa2bfe8a1,0x4cf10364 +.long 0xa81a664b,0xbc423001 +.long 0xa81a664b,0xbc423001 +.long 0xc24b8b70,0xd0f89791 +.long 0xc24b8b70,0xd0f89791 +.long 0xc76c51a3,0x0654be30 +.long 0xc76c51a3,0x0654be30 +.long 0xd192e819,0xd6ef5218 +.long 0xd192e819,0xd6ef5218 +.long 0xd6990624,0x5565a910 +.long 0xd6990624,0x5565a910 +.long 0xf40e3585,0x5771202a +.long 0xf40e3585,0x5771202a +.long 0x106aa070,0x32bbd1b8 +.long 0x106aa070,0x32bbd1b8 +.long 0x19a4c116,0xb8d2d0c8 +.long 0x19a4c116,0xb8d2d0c8 +.long 0x1e376c08,0x5141ab53 +.long 0x1e376c08,0x5141ab53 +.long 0x2748774c,0xdf8eeb99 +.long 0x2748774c,0xdf8eeb99 +.long 0x34b0bcb5,0xe19b48a8 +.long 0x34b0bcb5,0xe19b48a8 +.long 0x391c0cb3,0xc5c95a63 +.long 0x391c0cb3,0xc5c95a63 +.long 0x4ed8aa4a,0xe3418acb +.long 0x4ed8aa4a,0xe3418acb +.long 0x5b9cca4f,0x7763e373 +.long 0x5b9cca4f,0x7763e373 +.long 0x682e6ff3,0xd6b2b8a3 +.long 0x682e6ff3,0xd6b2b8a3 +.long 0x748f82ee,0x5defb2fc +.long 0x748f82ee,0x5defb2fc +.long 0x78a5636f,0x43172f60 +.long 0x78a5636f,0x43172f60 +.long 0x84c87814,0xa1f0ab72 +.long 0x84c87814,0xa1f0ab72 +.long 0x8cc70208,0x1a6439ec +.long 0x8cc70208,0x1a6439ec +.long 0x90befffa,0x23631e28 +.long 0x90befffa,0x23631e28 +.long 0xa4506ceb,0xde82bde9 +.long 0xa4506ceb,0xde82bde9 +.long 0xbef9a3f7,0xb2c67915 +.long 0xbef9a3f7,0xb2c67915 +.long 0xc67178f2,0xe372532b +.long 0xc67178f2,0xe372532b +.long 0xca273ece,0xea26619c +.long 0xca273ece,0xea26619c +.long 0xd186b8c7,0x21c0c207 +.long 0xd186b8c7,0x21c0c207 +.long 0xeada7dd6,0xcde0eb1e +.long 0xeada7dd6,0xcde0eb1e +.long 0xf57d4f7f,0xee6ed178 +.long 0xf57d4f7f,0xee6ed178 +.long 0x06f067aa,0x72176fba +.long 0x06f067aa,0x72176fba +.long 0x0a637dc5,0xa2c898a6 +.long 0x0a637dc5,0xa2c898a6 +.long 0x113f9804,0xbef90dae +.long 0x113f9804,0xbef90dae +.long 0x1b710b35,0x131c471b +.long 0x1b710b35,0x131c471b +.long 0x28db77f5,0x23047d84 +.long 0x28db77f5,0x23047d84 +.long 0x32caab7b,0x40c72493 +.long 0x32caab7b,0x40c72493 +.long 0x3c9ebe0a,0x15c9bebc +.long 0x3c9ebe0a,0x15c9bebc +.long 0x431d67c4,0x9c100d4c +.long 0x431d67c4,0x9c100d4c +.long 0x4cc5d4be,0xcb3e42b6 +.long 0x4cc5d4be,0xcb3e42b6 +.long 0x597f299c,0xfc657e2a +.long 0x597f299c,0xfc657e2a +.long 0x5fcb6fab,0x3ad6faec +.long 0x5fcb6fab,0x3ad6faec +.long 0x6c44198c,0x4a475817 +.long 0x6c44198c,0x4a475817 +.long 0,0 +.long 0,0 +.long 0x00010203,0x04050607 +.long 0x10111213,0x14151617 + +#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + +.abiversion 2 +.text + +.globl zfs_sha512_power8 +.type zfs_sha512_power8,@function +.align 6 +zfs_sha512_power8: +.localentry zfs_sha512_power8,0 + + stdu 1,-384(1) + mflr 8 + li 10,207 + li 11,223 + stvx 24,10,1 + addi 10,10,32 + li 12,-1 + stvx 25,11,1 + addi 11,11,32 + stvx 26,10,1 + addi 10,10,32 + stvx 27,11,1 + addi 11,11,32 + stvx 28,10,1 + addi 10,10,32 + stvx 29,11,1 + addi 11,11,32 + stvx 30,10,1 + stvx 31,11,1 + li 11,-4096+255 + stw 12,332(1) + li 10,0x10 + std 26,336(1) + li 26,0x20 + std 27,344(1) + li 27,0x30 + std 28,352(1) + li 28,0x40 + std 29,360(1) + li 29,0x50 + std 30,368(1) + li 30,0x60 + std 31,376(1) + li 31,0x70 + std 8,400(1) + or 11,11,11 + + bl .LPICmeup + addi 11,1,79 + li 7,8 + lvsl 31,0,7 + vspltisb 28,0x0f + vxor 31,31,28 + .long 0x7C001E99 + .long 0x7C4A1E99 + .long 0x7C9A1E99 + vsldoi 1,0,0,8 + .long 0x7CDB1E99 + vsldoi 3,2,2,8 + vsldoi 5,4,4,8 + vsldoi 7,6,6,8 + li 0,4 + b .Loop +.align 5 +.Loop: + lvx 28,0,6 + .long 0x7D002699 + addi 4,4,16 + mr 7,6 + stvx 0,0,11 + stvx 1,10,11 + stvx 2,26,11 + stvx 3,27,11 + stvx 4,28,11 + stvx 5,29,11 + stvx 6,30,11 + stvx 7,31,11 + .long 0x10E7E0C0 + lvx 28,10,6 + vperm 8,8,8,31 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7D402699 + addi 4,4,16 + vsldoi 9,8,8,8 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + vperm 10,10,10,31 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7D802699 + addi 4,4,16 + vsldoi 11,10,10,8 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + vperm 12,12,12,31 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7DC02699 + addi 4,4,16 + vsldoi 13,12,12,8 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + vperm 14,14,14,31 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x7E002699 + addi 4,4,16 + vsldoi 15,14,14,8 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + vperm 16,16,16,31 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x7E402699 + addi 4,4,16 + vsldoi 17,16,16,8 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + vperm 18,18,18,31 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x7F002699 + addi 4,4,16 + vsldoi 19,18,18,8 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + vperm 24,24,24,31 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x7F402699 + addi 4,4,16 + vsldoi 25,24,24,8 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + vperm 26,26,26,31 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + vsldoi 27,26,26,8 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + mtctr 0 + b .L16_xx +.align 5 +.L16_xx: + .long 0x13CA06C2 + .long 0x1129F0C0 + .long 0x13DB7EC2 + .long 0x1129F0C0 + .long 0x112990C0 + .long 0x10E740C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13CB06C2 + .long 0x114AF0C0 + .long 0x13C87EC2 + .long 0x114AF0C0 + .long 0x114A98C0 + .long 0x10C648C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13CC06C2 + .long 0x116BF0C0 + .long 0x13C97EC2 + .long 0x116BF0C0 + .long 0x116BC0C0 + .long 0x10A550C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13CD06C2 + .long 0x118CF0C0 + .long 0x13CA7EC2 + .long 0x118CF0C0 + .long 0x118CC8C0 + .long 0x108458C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13CE06C2 + .long 0x11ADF0C0 + .long 0x13CB7EC2 + .long 0x11ADF0C0 + .long 0x11ADD0C0 + .long 0x106360C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13CF06C2 + .long 0x11CEF0C0 + .long 0x13CC7EC2 + .long 0x11CEF0C0 + .long 0x11CED8C0 + .long 0x104268C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13D006C2 + .long 0x11EFF0C0 + .long 0x13CD7EC2 + .long 0x11EFF0C0 + .long 0x11EF40C0 + .long 0x102170C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13D106C2 + .long 0x1210F0C0 + .long 0x13CE7EC2 + .long 0x1210F0C0 + .long 0x121048C0 + .long 0x100078C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + .long 0x13D206C2 + .long 0x1231F0C0 + .long 0x13CF7EC2 + .long 0x1231F0C0 + .long 0x123150C0 + .long 0x10E780C0 + vsel 29,6,5,4 + .long 0x10C6E0C0 + .long 0x10E7E8C0 + .long 0x13C4FEC2 + .long 0x10E7F0C0 + vxor 29,0,1 + vsel 29,1,2,29 + .long 0x106338C0 + .long 0x13C086C2 + .long 0x13DEE8C0 + .long 0x10E7F0C0 + lvx 28,26,7 + .long 0x13D306C2 + .long 0x1252F0C0 + .long 0x13D07EC2 + .long 0x1252F0C0 + .long 0x125258C0 + .long 0x10C688C0 + vsel 29,5,4,3 + .long 0x10A5E0C0 + .long 0x10C6E8C0 + .long 0x13C3FEC2 + .long 0x10C6F0C0 + vxor 29,7,0 + vsel 29,0,1,29 + .long 0x104230C0 + .long 0x13C786C2 + .long 0x13DEE8C0 + .long 0x10C6F0C0 + lvx 28,27,7 + .long 0x13D806C2 + .long 0x1273F0C0 + .long 0x13D17EC2 + .long 0x1273F0C0 + .long 0x127360C0 + .long 0x10A590C0 + vsel 29,4,3,2 + .long 0x1084E0C0 + .long 0x10A5E8C0 + .long 0x13C2FEC2 + .long 0x10A5F0C0 + vxor 29,6,7 + vsel 29,7,0,29 + .long 0x102128C0 + .long 0x13C686C2 + .long 0x13DEE8C0 + .long 0x10A5F0C0 + lvx 28,28,7 + .long 0x13D906C2 + .long 0x1318F0C0 + .long 0x13D27EC2 + .long 0x1318F0C0 + .long 0x131868C0 + .long 0x108498C0 + vsel 29,3,2,1 + .long 0x1063E0C0 + .long 0x1084E8C0 + .long 0x13C1FEC2 + .long 0x1084F0C0 + vxor 29,5,6 + vsel 29,6,7,29 + .long 0x100020C0 + .long 0x13C586C2 + .long 0x13DEE8C0 + .long 0x1084F0C0 + lvx 28,29,7 + .long 0x13DA06C2 + .long 0x1339F0C0 + .long 0x13D37EC2 + .long 0x1339F0C0 + .long 0x133970C0 + .long 0x1063C0C0 + vsel 29,2,1,0 + .long 0x1042E0C0 + .long 0x1063E8C0 + .long 0x13C0FEC2 + .long 0x1063F0C0 + vxor 29,4,5 + vsel 29,5,6,29 + .long 0x10E718C0 + .long 0x13C486C2 + .long 0x13DEE8C0 + .long 0x1063F0C0 + lvx 28,30,7 + .long 0x13DB06C2 + .long 0x135AF0C0 + .long 0x13D87EC2 + .long 0x135AF0C0 + .long 0x135A78C0 + .long 0x1042C8C0 + vsel 29,1,0,7 + .long 0x1021E0C0 + .long 0x1042E8C0 + .long 0x13C7FEC2 + .long 0x1042F0C0 + vxor 29,3,4 + vsel 29,4,5,29 + .long 0x10C610C0 + .long 0x13C386C2 + .long 0x13DEE8C0 + .long 0x1042F0C0 + lvx 28,31,7 + addi 7,7,0x80 + .long 0x13C806C2 + .long 0x137BF0C0 + .long 0x13D97EC2 + .long 0x137BF0C0 + .long 0x137B80C0 + .long 0x1021D0C0 + vsel 29,0,7,6 + .long 0x1000E0C0 + .long 0x1021E8C0 + .long 0x13C6FEC2 + .long 0x1021F0C0 + vxor 29,2,3 + vsel 29,3,4,29 + .long 0x10A508C0 + .long 0x13C286C2 + .long 0x13DEE8C0 + .long 0x1021F0C0 + lvx 28,0,7 + .long 0x13C906C2 + .long 0x1108F0C0 + .long 0x13DA7EC2 + .long 0x1108F0C0 + .long 0x110888C0 + .long 0x1000D8C0 + vsel 29,7,6,5 + .long 0x10E7E0C0 + .long 0x1000E8C0 + .long 0x13C5FEC2 + .long 0x1000F0C0 + vxor 29,1,2 + vsel 29,2,3,29 + .long 0x108400C0 + .long 0x13C186C2 + .long 0x13DEE8C0 + .long 0x1000F0C0 + lvx 28,10,7 + bdnz .L16_xx + + lvx 10,0,11 + subic. 5,5,1 + lvx 11,10,11 + .long 0x100050C0 + lvx 12,26,11 + .long 0x102158C0 + lvx 13,27,11 + .long 0x104260C0 + lvx 14,28,11 + .long 0x106368C0 + lvx 15,29,11 + .long 0x108470C0 + lvx 16,30,11 + .long 0x10A578C0 + lvx 17,31,11 + .long 0x10C680C0 + .long 0x10E788C0 + bne .Loop + vperm 0,0,1,28 + vperm 2,2,3,28 + vperm 4,4,5,28 + vperm 6,6,7,28 + .long 0x7C001F99 + .long 0x7C4A1F99 + .long 0x7C9A1F99 + .long 0x7CDB1F99 + addi 11,1,207 + mtlr 8 + or 12,12,12 + lvx 24,0,11 + lvx 25,10,11 + lvx 26,26,11 + lvx 27,27,11 + lvx 28,28,11 + lvx 29,29,11 + lvx 30,30,11 + lvx 31,31,11 + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,6,3,0 +.long 0 +.size zfs_sha512_power8,.-zfs_sha512_power8 +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 6 + addi 6,6,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0xd728ae22,0x428a2f98 +.long 0xd728ae22,0x428a2f98 +.long 0x23ef65cd,0x71374491 +.long 0x23ef65cd,0x71374491 +.long 0xec4d3b2f,0xb5c0fbcf +.long 0xec4d3b2f,0xb5c0fbcf +.long 0x8189dbbc,0xe9b5dba5 +.long 0x8189dbbc,0xe9b5dba5 +.long 0xf348b538,0x3956c25b +.long 0xf348b538,0x3956c25b +.long 0xb605d019,0x59f111f1 +.long 0xb605d019,0x59f111f1 +.long 0xaf194f9b,0x923f82a4 +.long 0xaf194f9b,0x923f82a4 +.long 0xda6d8118,0xab1c5ed5 +.long 0xda6d8118,0xab1c5ed5 +.long 0xa3030242,0xd807aa98 +.long 0xa3030242,0xd807aa98 +.long 0x45706fbe,0x12835b01 +.long 0x45706fbe,0x12835b01 +.long 0x4ee4b28c,0x243185be +.long 0x4ee4b28c,0x243185be +.long 0xd5ffb4e2,0x550c7dc3 +.long 0xd5ffb4e2,0x550c7dc3 +.long 0xf27b896f,0x72be5d74 +.long 0xf27b896f,0x72be5d74 +.long 0x3b1696b1,0x80deb1fe +.long 0x3b1696b1,0x80deb1fe +.long 0x25c71235,0x9bdc06a7 +.long 0x25c71235,0x9bdc06a7 +.long 0xcf692694,0xc19bf174 +.long 0xcf692694,0xc19bf174 +.long 0x9ef14ad2,0xe49b69c1 +.long 0x9ef14ad2,0xe49b69c1 +.long 0x384f25e3,0xefbe4786 +.long 0x384f25e3,0xefbe4786 +.long 0x8b8cd5b5,0x0fc19dc6 +.long 0x8b8cd5b5,0x0fc19dc6 +.long 0x77ac9c65,0x240ca1cc +.long 0x77ac9c65,0x240ca1cc +.long 0x592b0275,0x2de92c6f +.long 0x592b0275,0x2de92c6f +.long 0x6ea6e483,0x4a7484aa +.long 0x6ea6e483,0x4a7484aa +.long 0xbd41fbd4,0x5cb0a9dc +.long 0xbd41fbd4,0x5cb0a9dc +.long 0x831153b5,0x76f988da +.long 0x831153b5,0x76f988da +.long 0xee66dfab,0x983e5152 +.long 0xee66dfab,0x983e5152 +.long 0x2db43210,0xa831c66d +.long 0x2db43210,0xa831c66d +.long 0x98fb213f,0xb00327c8 +.long 0x98fb213f,0xb00327c8 +.long 0xbeef0ee4,0xbf597fc7 +.long 0xbeef0ee4,0xbf597fc7 +.long 0x3da88fc2,0xc6e00bf3 +.long 0x3da88fc2,0xc6e00bf3 +.long 0x930aa725,0xd5a79147 +.long 0x930aa725,0xd5a79147 +.long 0xe003826f,0x06ca6351 +.long 0xe003826f,0x06ca6351 +.long 0x0a0e6e70,0x14292967 +.long 0x0a0e6e70,0x14292967 +.long 0x46d22ffc,0x27b70a85 +.long 0x46d22ffc,0x27b70a85 +.long 0x5c26c926,0x2e1b2138 +.long 0x5c26c926,0x2e1b2138 +.long 0x5ac42aed,0x4d2c6dfc +.long 0x5ac42aed,0x4d2c6dfc +.long 0x9d95b3df,0x53380d13 +.long 0x9d95b3df,0x53380d13 +.long 0x8baf63de,0x650a7354 +.long 0x8baf63de,0x650a7354 +.long 0x3c77b2a8,0x766a0abb +.long 0x3c77b2a8,0x766a0abb +.long 0x47edaee6,0x81c2c92e +.long 0x47edaee6,0x81c2c92e +.long 0x1482353b,0x92722c85 +.long 0x1482353b,0x92722c85 +.long 0x4cf10364,0xa2bfe8a1 +.long 0x4cf10364,0xa2bfe8a1 +.long 0xbc423001,0xa81a664b +.long 0xbc423001,0xa81a664b +.long 0xd0f89791,0xc24b8b70 +.long 0xd0f89791,0xc24b8b70 +.long 0x0654be30,0xc76c51a3 +.long 0x0654be30,0xc76c51a3 +.long 0xd6ef5218,0xd192e819 +.long 0xd6ef5218,0xd192e819 +.long 0x5565a910,0xd6990624 +.long 0x5565a910,0xd6990624 +.long 0x5771202a,0xf40e3585 +.long 0x5771202a,0xf40e3585 +.long 0x32bbd1b8,0x106aa070 +.long 0x32bbd1b8,0x106aa070 +.long 0xb8d2d0c8,0x19a4c116 +.long 0xb8d2d0c8,0x19a4c116 +.long 0x5141ab53,0x1e376c08 +.long 0x5141ab53,0x1e376c08 +.long 0xdf8eeb99,0x2748774c +.long 0xdf8eeb99,0x2748774c +.long 0xe19b48a8,0x34b0bcb5 +.long 0xe19b48a8,0x34b0bcb5 +.long 0xc5c95a63,0x391c0cb3 +.long 0xc5c95a63,0x391c0cb3 +.long 0xe3418acb,0x4ed8aa4a +.long 0xe3418acb,0x4ed8aa4a +.long 0x7763e373,0x5b9cca4f +.long 0x7763e373,0x5b9cca4f +.long 0xd6b2b8a3,0x682e6ff3 +.long 0xd6b2b8a3,0x682e6ff3 +.long 0x5defb2fc,0x748f82ee +.long 0x5defb2fc,0x748f82ee +.long 0x43172f60,0x78a5636f +.long 0x43172f60,0x78a5636f +.long 0xa1f0ab72,0x84c87814 +.long 0xa1f0ab72,0x84c87814 +.long 0x1a6439ec,0x8cc70208 +.long 0x1a6439ec,0x8cc70208 +.long 0x23631e28,0x90befffa +.long 0x23631e28,0x90befffa +.long 0xde82bde9,0xa4506ceb +.long 0xde82bde9,0xa4506ceb +.long 0xb2c67915,0xbef9a3f7 +.long 0xb2c67915,0xbef9a3f7 +.long 0xe372532b,0xc67178f2 +.long 0xe372532b,0xc67178f2 +.long 0xea26619c,0xca273ece +.long 0xea26619c,0xca273ece +.long 0x21c0c207,0xd186b8c7 +.long 0x21c0c207,0xd186b8c7 +.long 0xcde0eb1e,0xeada7dd6 +.long 0xcde0eb1e,0xeada7dd6 +.long 0xee6ed178,0xf57d4f7f +.long 0xee6ed178,0xf57d4f7f +.long 0x72176fba,0x06f067aa +.long 0x72176fba,0x06f067aa +.long 0xa2c898a6,0x0a637dc5 +.long 0xa2c898a6,0x0a637dc5 +.long 0xbef90dae,0x113f9804 +.long 0xbef90dae,0x113f9804 +.long 0x131c471b,0x1b710b35 +.long 0x131c471b,0x1b710b35 +.long 0x23047d84,0x28db77f5 +.long 0x23047d84,0x28db77f5 +.long 0x40c72493,0x32caab7b +.long 0x40c72493,0x32caab7b +.long 0x15c9bebc,0x3c9ebe0a +.long 0x15c9bebc,0x3c9ebe0a +.long 0x9c100d4c,0x431d67c4 +.long 0x9c100d4c,0x431d67c4 +.long 0xcb3e42b6,0x4cc5d4be +.long 0xcb3e42b6,0x4cc5d4be +.long 0xfc657e2a,0x597f299c +.long 0xfc657e2a,0x597f299c +.long 0x3ad6faec,0x5fcb6fab +.long 0x3ad6faec,0x5fcb6fab +.long 0x4a475817,0x6c44198c +.long 0x4a475817,0x6c44198c +.long 0,0 +.long 0,0 +.long 0x14151617,0x10111213 +.long 0x04050607,0x00010203 + +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-ppc.S b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-ppc.S new file mode 100644 index 000000000000..57213f68abc5 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-ppc64/sha2/sha512-ppc.S @@ -0,0 +1,2973 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if (defined(__PPC64__) && defined(__BIG_ENDIAN__)) + +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.text + +.globl zfs_sha512_ppc +.globl .zfs_sha512_ppc +.type zfs_sha512_ppc,@function +.section ".opd","aw" +.align 3 +zfs_sha512_ppc: +.quad .zfs_sha512_ppc,.TOC.@tocbase,0 +.previous +.align 6 +.zfs_sha512_ppc: +#else +.abiversion 2 +.text + +.globl zfs_sha512_ppc +.type zfs_sha512_ppc,@function +.align 6 +zfs_sha512_ppc: +.localentry zfs_sha512_ppc,0 +#endif + stdu 1,-384(1) + mflr 0 + sldi 5,5,7 + + std 3,208(1) + + std 14,240(1) + std 15,248(1) + std 16,256(1) + std 17,264(1) + std 18,272(1) + std 19,280(1) + std 20,288(1) + std 21,296(1) + std 22,304(1) + std 23,312(1) + std 24,320(1) + std 25,328(1) + std 26,336(1) + std 27,344(1) + std 28,352(1) + std 29,360(1) + std 30,368(1) + std 31,376(1) + std 0,400(1) + ld 8,0(3) + mr 31,4 + ld 9,8(3) + ld 10,16(3) + ld 11,24(3) + ld 12,32(3) + ld 6,40(3) + ld 14,48(3) + ld 15,56(3) + bl .LPICmeup +.LPICedup: + andi. 0,31,3 + bne .Lunaligned +.Laligned: + add 5,31,5 + std 5,192(1) + std 31,200(1) + bl .Lsha2_block_private + b .Ldone + + + + + + + +.align 4 +.Lunaligned: + subfic 0,31,4096 + andi. 0,0,3968 + beq .Lcross_page + cmpld 5,0 + ble .Laligned + subfc 5,0,5 + add 0,31,0 + std 5,184(1) + std 0,192(1) + std 31,200(1) + bl .Lsha2_block_private + + ld 5,184(1) +.Lcross_page: + li 0,32 + mtctr 0 + addi 20,1,48 +.Lmemcpy: + lbz 16,0(31) + lbz 17,1(31) + lbz 18,2(31) + lbz 19,3(31) + addi 31,31,4 + stb 16,0(20) + stb 17,1(20) + stb 18,2(20) + stb 19,3(20) + addi 20,20,4 + bdnz .Lmemcpy + std 31,176(1) + addi 0,1,176 + addi 31,1,48 + std 5,184(1) + std 0,192(1) + std 31,200(1) + bl .Lsha2_block_private + ld 31,176(1) + ld 5,184(1) + addic. 5,5,-128 + bne .Lunaligned + +.Ldone: + ld 0,400(1) + ld 14,240(1) + ld 15,248(1) + ld 16,256(1) + ld 17,264(1) + ld 18,272(1) + ld 19,280(1) + ld 20,288(1) + ld 21,296(1) + ld 22,304(1) + ld 23,312(1) + ld 24,320(1) + ld 25,328(1) + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + mtlr 0 + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,18,3,0 +.long 0 +.align 4 +.Lsha2_block_private: + ld 0,0(7) + lwz 5,0(31) + lwz 16,4(31) + insrdi 16,5,32,0 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + ld 0,8(7) + add 15,15,3 + add 15,15,5 + + lwz 5,8(31) + lwz 17,12(31) + insrdi 17,5,32,0 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + ld 0,16(7) + add 14,14,3 + add 14,14,5 + + lwz 5,16(31) + lwz 18,20(31) + insrdi 18,5,32,0 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + ld 0,24(7) + add 6,6,3 + add 6,6,5 + + lwz 5,24(31) + lwz 19,28(31) + insrdi 19,5,32,0 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + ld 0,32(7) + add 12,12,3 + add 12,12,5 + + lwz 5,32(31) + lwz 20,36(31) + insrdi 20,5,32,0 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + ld 0,40(7) + add 11,11,3 + add 11,11,5 + + lwz 5,40(31) + lwz 21,44(31) + insrdi 21,5,32,0 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + ld 0,48(7) + add 10,10,3 + add 10,10,5 + + lwz 5,48(31) + lwz 22,52(31) + insrdi 22,5,32,0 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + ld 0,56(7) + add 9,9,3 + add 9,9,5 + + lwz 5,56(31) + lwz 23,60(31) + insrdi 23,5,32,0 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + ld 0,64(7) + add 8,8,3 + add 8,8,5 + + lwz 5,64(31) + lwz 24,68(31) + insrdi 24,5,32,0 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + ld 0,72(7) + add 15,15,3 + add 15,15,5 + + lwz 5,72(31) + lwz 25,76(31) + insrdi 25,5,32,0 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + ld 0,80(7) + add 14,14,3 + add 14,14,5 + + lwz 5,80(31) + lwz 26,84(31) + insrdi 26,5,32,0 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + ld 0,88(7) + add 6,6,3 + add 6,6,5 + + lwz 5,88(31) + lwz 27,92(31) + insrdi 27,5,32,0 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + ld 0,96(7) + add 12,12,3 + add 12,12,5 + + lwz 5,96(31) + lwz 28,100(31) + insrdi 28,5,32,0 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + ld 0,104(7) + add 11,11,3 + add 11,11,5 + + lwz 5,104(31) + lwz 29,108(31) + insrdi 29,5,32,0 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + ld 0,112(7) + add 10,10,3 + add 10,10,5 + + lwz 5,112(31) + lwz 30,116(31) + insrdi 30,5,32,0 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + ld 0,120(7) + add 9,9,3 + add 9,9,5 + + lwz 5,120(31) + lwz 31,124(31) + insrdi 31,5,32,0 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + li 5,4 + mtctr 5 +.align 4 +.Lrounds: + addi 7,7,128 + rotrdi 3,17,1 + rotrdi 4,17,8 + rotrdi 5,30,19 + rotrdi 0,30,61 + xor 3,3,4 + srdi 4,17,7 + xor 5,5,0 + srdi 0,30,6 + add 16,16,25 + xor 3,3,4 + xor 5,5,0 + ld 0,0(7) + add 16,16,3 + add 16,16,5 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrdi 3,18,1 + rotrdi 4,18,8 + rotrdi 5,31,19 + rotrdi 0,31,61 + xor 3,3,4 + srdi 4,18,7 + xor 5,5,0 + srdi 0,31,6 + add 17,17,26 + xor 3,3,4 + xor 5,5,0 + ld 0,8(7) + add 17,17,3 + add 17,17,5 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrdi 3,19,1 + rotrdi 4,19,8 + rotrdi 5,16,19 + rotrdi 0,16,61 + xor 3,3,4 + srdi 4,19,7 + xor 5,5,0 + srdi 0,16,6 + add 18,18,27 + xor 3,3,4 + xor 5,5,0 + ld 0,16(7) + add 18,18,3 + add 18,18,5 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrdi 3,20,1 + rotrdi 4,20,8 + rotrdi 5,17,19 + rotrdi 0,17,61 + xor 3,3,4 + srdi 4,20,7 + xor 5,5,0 + srdi 0,17,6 + add 19,19,28 + xor 3,3,4 + xor 5,5,0 + ld 0,24(7) + add 19,19,3 + add 19,19,5 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrdi 3,21,1 + rotrdi 4,21,8 + rotrdi 5,18,19 + rotrdi 0,18,61 + xor 3,3,4 + srdi 4,21,7 + xor 5,5,0 + srdi 0,18,6 + add 20,20,29 + xor 3,3,4 + xor 5,5,0 + ld 0,32(7) + add 20,20,3 + add 20,20,5 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrdi 3,22,1 + rotrdi 4,22,8 + rotrdi 5,19,19 + rotrdi 0,19,61 + xor 3,3,4 + srdi 4,22,7 + xor 5,5,0 + srdi 0,19,6 + add 21,21,30 + xor 3,3,4 + xor 5,5,0 + ld 0,40(7) + add 21,21,3 + add 21,21,5 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrdi 3,23,1 + rotrdi 4,23,8 + rotrdi 5,20,19 + rotrdi 0,20,61 + xor 3,3,4 + srdi 4,23,7 + xor 5,5,0 + srdi 0,20,6 + add 22,22,31 + xor 3,3,4 + xor 5,5,0 + ld 0,48(7) + add 22,22,3 + add 22,22,5 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrdi 3,24,1 + rotrdi 4,24,8 + rotrdi 5,21,19 + rotrdi 0,21,61 + xor 3,3,4 + srdi 4,24,7 + xor 5,5,0 + srdi 0,21,6 + add 23,23,16 + xor 3,3,4 + xor 5,5,0 + ld 0,56(7) + add 23,23,3 + add 23,23,5 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + rotrdi 3,25,1 + rotrdi 4,25,8 + rotrdi 5,22,19 + rotrdi 0,22,61 + xor 3,3,4 + srdi 4,25,7 + xor 5,5,0 + srdi 0,22,6 + add 24,24,17 + xor 3,3,4 + xor 5,5,0 + ld 0,64(7) + add 24,24,3 + add 24,24,5 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrdi 3,26,1 + rotrdi 4,26,8 + rotrdi 5,23,19 + rotrdi 0,23,61 + xor 3,3,4 + srdi 4,26,7 + xor 5,5,0 + srdi 0,23,6 + add 25,25,18 + xor 3,3,4 + xor 5,5,0 + ld 0,72(7) + add 25,25,3 + add 25,25,5 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrdi 3,27,1 + rotrdi 4,27,8 + rotrdi 5,24,19 + rotrdi 0,24,61 + xor 3,3,4 + srdi 4,27,7 + xor 5,5,0 + srdi 0,24,6 + add 26,26,19 + xor 3,3,4 + xor 5,5,0 + ld 0,80(7) + add 26,26,3 + add 26,26,5 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrdi 3,28,1 + rotrdi 4,28,8 + rotrdi 5,25,19 + rotrdi 0,25,61 + xor 3,3,4 + srdi 4,28,7 + xor 5,5,0 + srdi 0,25,6 + add 27,27,20 + xor 3,3,4 + xor 5,5,0 + ld 0,88(7) + add 27,27,3 + add 27,27,5 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrdi 3,29,1 + rotrdi 4,29,8 + rotrdi 5,26,19 + rotrdi 0,26,61 + xor 3,3,4 + srdi 4,29,7 + xor 5,5,0 + srdi 0,26,6 + add 28,28,21 + xor 3,3,4 + xor 5,5,0 + ld 0,96(7) + add 28,28,3 + add 28,28,5 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrdi 3,30,1 + rotrdi 4,30,8 + rotrdi 5,27,19 + rotrdi 0,27,61 + xor 3,3,4 + srdi 4,30,7 + xor 5,5,0 + srdi 0,27,6 + add 29,29,22 + xor 3,3,4 + xor 5,5,0 + ld 0,104(7) + add 29,29,3 + add 29,29,5 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrdi 3,31,1 + rotrdi 4,31,8 + rotrdi 5,28,19 + rotrdi 0,28,61 + xor 3,3,4 + srdi 4,31,7 + xor 5,5,0 + srdi 0,28,6 + add 30,30,23 + xor 3,3,4 + xor 5,5,0 + ld 0,112(7) + add 30,30,3 + add 30,30,5 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrdi 3,16,1 + rotrdi 4,16,8 + rotrdi 5,29,19 + rotrdi 0,29,61 + xor 3,3,4 + srdi 4,16,7 + xor 5,5,0 + srdi 0,29,6 + add 31,31,24 + xor 3,3,4 + xor 5,5,0 + ld 0,120(7) + add 31,31,3 + add 31,31,5 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + bdnz .Lrounds + + ld 3,208(1) + ld 31,200(1) + ld 5,192(1) + subi 7,7,512 + + ld 16,0(3) + ld 17,8(3) + ld 18,16(3) + ld 19,24(3) + ld 20,32(3) + ld 21,40(3) + ld 22,48(3) + addi 31,31,128 + ld 23,56(3) + add 8,8,16 + add 9,9,17 + std 31,200(1) + add 10,10,18 + std 8,0(3) + add 11,11,19 + std 9,8(3) + add 12,12,20 + std 10,16(3) + add 6,6,21 + std 11,24(3) + add 14,14,22 + std 12,32(3) + add 15,15,23 + std 6,40(3) + std 14,48(3) + cmpld 31,5 + std 15,56(3) + bne .Lsha2_block_private + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +#if (!defined(_CALL_ELF) || _CALL_ELF == 1) +.size .zfs_sha512_ppc,.-.zfs_sha512_ppc +.size zfs_sha512_ppc,.-.zfs_sha512_ppc +#else +.size zfs_sha512_ppc,.-zfs_sha512_ppc +#endif +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 7 + addi 7,7,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0x428a2f98,0xd728ae22 +.long 0x71374491,0x23ef65cd +.long 0xb5c0fbcf,0xec4d3b2f +.long 0xe9b5dba5,0x8189dbbc +.long 0x3956c25b,0xf348b538 +.long 0x59f111f1,0xb605d019 +.long 0x923f82a4,0xaf194f9b +.long 0xab1c5ed5,0xda6d8118 +.long 0xd807aa98,0xa3030242 +.long 0x12835b01,0x45706fbe +.long 0x243185be,0x4ee4b28c +.long 0x550c7dc3,0xd5ffb4e2 +.long 0x72be5d74,0xf27b896f +.long 0x80deb1fe,0x3b1696b1 +.long 0x9bdc06a7,0x25c71235 +.long 0xc19bf174,0xcf692694 +.long 0xe49b69c1,0x9ef14ad2 +.long 0xefbe4786,0x384f25e3 +.long 0x0fc19dc6,0x8b8cd5b5 +.long 0x240ca1cc,0x77ac9c65 +.long 0x2de92c6f,0x592b0275 +.long 0x4a7484aa,0x6ea6e483 +.long 0x5cb0a9dc,0xbd41fbd4 +.long 0x76f988da,0x831153b5 +.long 0x983e5152,0xee66dfab +.long 0xa831c66d,0x2db43210 +.long 0xb00327c8,0x98fb213f +.long 0xbf597fc7,0xbeef0ee4 +.long 0xc6e00bf3,0x3da88fc2 +.long 0xd5a79147,0x930aa725 +.long 0x06ca6351,0xe003826f +.long 0x14292967,0x0a0e6e70 +.long 0x27b70a85,0x46d22ffc +.long 0x2e1b2138,0x5c26c926 +.long 0x4d2c6dfc,0x5ac42aed +.long 0x53380d13,0x9d95b3df +.long 0x650a7354,0x8baf63de +.long 0x766a0abb,0x3c77b2a8 +.long 0x81c2c92e,0x47edaee6 +.long 0x92722c85,0x1482353b +.long 0xa2bfe8a1,0x4cf10364 +.long 0xa81a664b,0xbc423001 +.long 0xc24b8b70,0xd0f89791 +.long 0xc76c51a3,0x0654be30 +.long 0xd192e819,0xd6ef5218 +.long 0xd6990624,0x5565a910 +.long 0xf40e3585,0x5771202a +.long 0x106aa070,0x32bbd1b8 +.long 0x19a4c116,0xb8d2d0c8 +.long 0x1e376c08,0x5141ab53 +.long 0x2748774c,0xdf8eeb99 +.long 0x34b0bcb5,0xe19b48a8 +.long 0x391c0cb3,0xc5c95a63 +.long 0x4ed8aa4a,0xe3418acb +.long 0x5b9cca4f,0x7763e373 +.long 0x682e6ff3,0xd6b2b8a3 +.long 0x748f82ee,0x5defb2fc +.long 0x78a5636f,0x43172f60 +.long 0x84c87814,0xa1f0ab72 +.long 0x8cc70208,0x1a6439ec +.long 0x90befffa,0x23631e28 +.long 0xa4506ceb,0xde82bde9 +.long 0xbef9a3f7,0xb2c67915 +.long 0xc67178f2,0xe372532b +.long 0xca273ece,0xea26619c +.long 0xd186b8c7,0x21c0c207 +.long 0xeada7dd6,0xcde0eb1e +.long 0xf57d4f7f,0xee6ed178 +.long 0x06f067aa,0x72176fba +.long 0x0a637dc5,0xa2c898a6 +.long 0x113f9804,0xbef90dae +.long 0x1b710b35,0x131c471b +.long 0x28db77f5,0x23047d84 +.long 0x32caab7b,0x40c72493 +.long 0x3c9ebe0a,0x15c9bebc +.long 0x431d67c4,0x9c100d4c +.long 0x4cc5d4be,0xcb3e42b6 +.long 0x597f299c,0xfc657e2a +.long 0x5fcb6fab,0x3ad6faec +.long 0x6c44198c,0x4a475817 + +#elif (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)) + +.abiversion 2 +.text + +.globl zfs_sha512_ppc +.type zfs_sha512_ppc,@function +.align 6 +zfs_sha512_ppc: +.localentry zfs_sha512_ppc,0 + + stdu 1,-384(1) + mflr 0 + sldi 5,5,7 + + std 3,208(1) + + std 14,240(1) + std 15,248(1) + std 16,256(1) + std 17,264(1) + std 18,272(1) + std 19,280(1) + std 20,288(1) + std 21,296(1) + std 22,304(1) + std 23,312(1) + std 24,320(1) + std 25,328(1) + std 26,336(1) + std 27,344(1) + std 28,352(1) + std 29,360(1) + std 30,368(1) + std 31,376(1) + std 0,400(1) + ld 8,0(3) + mr 31,4 + ld 9,8(3) + ld 10,16(3) + ld 11,24(3) + ld 12,32(3) + ld 6,40(3) + ld 14,48(3) + ld 15,56(3) + bl .LPICmeup +.LPICedup: + andi. 0,31,3 + bne .Lunaligned +.Laligned: + add 5,31,5 + std 5,192(1) + std 31,200(1) + bl .Lsha2_block_private + b .Ldone + +.align 4 +.Lunaligned: + subfic 0,31,4096 + andi. 0,0,3968 + beq .Lcross_page + cmpld 5,0 + ble .Laligned + subfc 5,0,5 + add 0,31,0 + std 5,184(1) + std 0,192(1) + std 31,200(1) + bl .Lsha2_block_private + + ld 5,184(1) +.Lcross_page: + li 0,32 + mtctr 0 + addi 20,1,48 +.Lmemcpy: + lbz 16,0(31) + lbz 17,1(31) + lbz 18,2(31) + lbz 19,3(31) + addi 31,31,4 + stb 16,0(20) + stb 17,1(20) + stb 18,2(20) + stb 19,3(20) + addi 20,20,4 + bdnz .Lmemcpy + std 31,176(1) + addi 0,1,176 + addi 31,1,48 + std 5,184(1) + std 0,192(1) + std 31,200(1) + bl .Lsha2_block_private + ld 31,176(1) + ld 5,184(1) + addic. 5,5,-128 + bne .Lunaligned + +.Ldone: + ld 0,400(1) + ld 14,240(1) + ld 15,248(1) + ld 16,256(1) + ld 17,264(1) + ld 18,272(1) + ld 19,280(1) + ld 20,288(1) + ld 21,296(1) + ld 22,304(1) + ld 23,312(1) + ld 24,320(1) + ld 25,328(1) + ld 26,336(1) + ld 27,344(1) + ld 28,352(1) + ld 29,360(1) + ld 30,368(1) + ld 31,376(1) + mtlr 0 + addi 1,1,384 + blr +.long 0 +.byte 0,12,4,1,0x80,18,3,0 +.long 0 +.align 4 +.Lsha2_block_private: + ld 0,0(7) + lwz 3,0(31) + lwz 4,4(31) + rotlwi 5,3,8 + rotlwi 16,4,8 + rlwimi 5,3,24,0,7 + rlwimi 16,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 16,4,24,16,23 + insrdi 16,5,32,0 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + ld 0,8(7) + add 15,15,3 + add 15,15,5 + + lwz 3,8(31) + lwz 4,12(31) + rotlwi 5,3,8 + rotlwi 17,4,8 + rlwimi 5,3,24,0,7 + rlwimi 17,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 17,4,24,16,23 + insrdi 17,5,32,0 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + ld 0,16(7) + add 14,14,3 + add 14,14,5 + + lwz 3,16(31) + lwz 4,20(31) + rotlwi 5,3,8 + rotlwi 18,4,8 + rlwimi 5,3,24,0,7 + rlwimi 18,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 18,4,24,16,23 + insrdi 18,5,32,0 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + ld 0,24(7) + add 6,6,3 + add 6,6,5 + + lwz 3,24(31) + lwz 4,28(31) + rotlwi 5,3,8 + rotlwi 19,4,8 + rlwimi 5,3,24,0,7 + rlwimi 19,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 19,4,24,16,23 + insrdi 19,5,32,0 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + ld 0,32(7) + add 12,12,3 + add 12,12,5 + + lwz 3,32(31) + lwz 4,36(31) + rotlwi 5,3,8 + rotlwi 20,4,8 + rlwimi 5,3,24,0,7 + rlwimi 20,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 20,4,24,16,23 + insrdi 20,5,32,0 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + ld 0,40(7) + add 11,11,3 + add 11,11,5 + + lwz 3,40(31) + lwz 4,44(31) + rotlwi 5,3,8 + rotlwi 21,4,8 + rlwimi 5,3,24,0,7 + rlwimi 21,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 21,4,24,16,23 + insrdi 21,5,32,0 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + ld 0,48(7) + add 10,10,3 + add 10,10,5 + + lwz 3,48(31) + lwz 4,52(31) + rotlwi 5,3,8 + rotlwi 22,4,8 + rlwimi 5,3,24,0,7 + rlwimi 22,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 22,4,24,16,23 + insrdi 22,5,32,0 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + ld 0,56(7) + add 9,9,3 + add 9,9,5 + + lwz 3,56(31) + lwz 4,60(31) + rotlwi 5,3,8 + rotlwi 23,4,8 + rlwimi 5,3,24,0,7 + rlwimi 23,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 23,4,24,16,23 + insrdi 23,5,32,0 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + ld 0,64(7) + add 8,8,3 + add 8,8,5 + + lwz 3,64(31) + lwz 4,68(31) + rotlwi 5,3,8 + rotlwi 24,4,8 + rlwimi 5,3,24,0,7 + rlwimi 24,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 24,4,24,16,23 + insrdi 24,5,32,0 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + ld 0,72(7) + add 15,15,3 + add 15,15,5 + + lwz 3,72(31) + lwz 4,76(31) + rotlwi 5,3,8 + rotlwi 25,4,8 + rlwimi 5,3,24,0,7 + rlwimi 25,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 25,4,24,16,23 + insrdi 25,5,32,0 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + ld 0,80(7) + add 14,14,3 + add 14,14,5 + + lwz 3,80(31) + lwz 4,84(31) + rotlwi 5,3,8 + rotlwi 26,4,8 + rlwimi 5,3,24,0,7 + rlwimi 26,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 26,4,24,16,23 + insrdi 26,5,32,0 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + ld 0,88(7) + add 6,6,3 + add 6,6,5 + + lwz 3,88(31) + lwz 4,92(31) + rotlwi 5,3,8 + rotlwi 27,4,8 + rlwimi 5,3,24,0,7 + rlwimi 27,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 27,4,24,16,23 + insrdi 27,5,32,0 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + ld 0,96(7) + add 12,12,3 + add 12,12,5 + + lwz 3,96(31) + lwz 4,100(31) + rotlwi 5,3,8 + rotlwi 28,4,8 + rlwimi 5,3,24,0,7 + rlwimi 28,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 28,4,24,16,23 + insrdi 28,5,32,0 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + ld 0,104(7) + add 11,11,3 + add 11,11,5 + + lwz 3,104(31) + lwz 4,108(31) + rotlwi 5,3,8 + rotlwi 29,4,8 + rlwimi 5,3,24,0,7 + rlwimi 29,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 29,4,24,16,23 + insrdi 29,5,32,0 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + ld 0,112(7) + add 10,10,3 + add 10,10,5 + + lwz 3,112(31) + lwz 4,116(31) + rotlwi 5,3,8 + rotlwi 30,4,8 + rlwimi 5,3,24,0,7 + rlwimi 30,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 30,4,24,16,23 + insrdi 30,5,32,0 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + ld 0,120(7) + add 9,9,3 + add 9,9,5 + + lwz 3,120(31) + lwz 4,124(31) + rotlwi 5,3,8 + rotlwi 31,4,8 + rlwimi 5,3,24,0,7 + rlwimi 31,4,24,0,7 + rlwimi 5,3,24,16,23 + rlwimi 31,4,24,16,23 + insrdi 31,5,32,0 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + li 5,4 + mtctr 5 +.align 4 +.Lrounds: + addi 7,7,128 + rotrdi 3,17,1 + rotrdi 4,17,8 + rotrdi 5,30,19 + rotrdi 0,30,61 + xor 3,3,4 + srdi 4,17,7 + xor 5,5,0 + srdi 0,30,6 + add 16,16,25 + xor 3,3,4 + xor 5,5,0 + ld 0,0(7) + add 16,16,3 + add 16,16,5 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,16 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrdi 3,18,1 + rotrdi 4,18,8 + rotrdi 5,31,19 + rotrdi 0,31,61 + xor 3,3,4 + srdi 4,18,7 + xor 5,5,0 + srdi 0,31,6 + add 17,17,26 + xor 3,3,4 + xor 5,5,0 + ld 0,8(7) + add 17,17,3 + add 17,17,5 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,17 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrdi 3,19,1 + rotrdi 4,19,8 + rotrdi 5,16,19 + rotrdi 0,16,61 + xor 3,3,4 + srdi 4,19,7 + xor 5,5,0 + srdi 0,16,6 + add 18,18,27 + xor 3,3,4 + xor 5,5,0 + ld 0,16(7) + add 18,18,3 + add 18,18,5 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,18 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrdi 3,20,1 + rotrdi 4,20,8 + rotrdi 5,17,19 + rotrdi 0,17,61 + xor 3,3,4 + srdi 4,20,7 + xor 5,5,0 + srdi 0,17,6 + add 19,19,28 + xor 3,3,4 + xor 5,5,0 + ld 0,24(7) + add 19,19,3 + add 19,19,5 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,19 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrdi 3,21,1 + rotrdi 4,21,8 + rotrdi 5,18,19 + rotrdi 0,18,61 + xor 3,3,4 + srdi 4,21,7 + xor 5,5,0 + srdi 0,18,6 + add 20,20,29 + xor 3,3,4 + xor 5,5,0 + ld 0,32(7) + add 20,20,3 + add 20,20,5 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,20 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrdi 3,22,1 + rotrdi 4,22,8 + rotrdi 5,19,19 + rotrdi 0,19,61 + xor 3,3,4 + srdi 4,22,7 + xor 5,5,0 + srdi 0,19,6 + add 21,21,30 + xor 3,3,4 + xor 5,5,0 + ld 0,40(7) + add 21,21,3 + add 21,21,5 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,21 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrdi 3,23,1 + rotrdi 4,23,8 + rotrdi 5,20,19 + rotrdi 0,20,61 + xor 3,3,4 + srdi 4,23,7 + xor 5,5,0 + srdi 0,20,6 + add 22,22,31 + xor 3,3,4 + xor 5,5,0 + ld 0,48(7) + add 22,22,3 + add 22,22,5 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,22 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrdi 3,24,1 + rotrdi 4,24,8 + rotrdi 5,21,19 + rotrdi 0,21,61 + xor 3,3,4 + srdi 4,24,7 + xor 5,5,0 + srdi 0,21,6 + add 23,23,16 + xor 3,3,4 + xor 5,5,0 + ld 0,56(7) + add 23,23,3 + add 23,23,5 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,23 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + rotrdi 3,25,1 + rotrdi 4,25,8 + rotrdi 5,22,19 + rotrdi 0,22,61 + xor 3,3,4 + srdi 4,25,7 + xor 5,5,0 + srdi 0,22,6 + add 24,24,17 + xor 3,3,4 + xor 5,5,0 + ld 0,64(7) + add 24,24,3 + add 24,24,5 + rotrdi 3,12,14 + rotrdi 4,12,18 + and 5,6,12 + xor 3,3,4 + add 15,15,0 + andc 0,14,12 + rotrdi 4,4,23 + or 5,5,0 + add 15,15,24 + xor 3,3,4 + add 15,15,5 + add 15,15,3 + + rotrdi 3,8,28 + rotrdi 4,8,34 + and 5,8,9 + and 0,8,10 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,9,10 + xor 3,3,4 + add 11,11,15 + xor 5,5,0 + add 15,15,3 + add 15,15,5 + + rotrdi 3,26,1 + rotrdi 4,26,8 + rotrdi 5,23,19 + rotrdi 0,23,61 + xor 3,3,4 + srdi 4,26,7 + xor 5,5,0 + srdi 0,23,6 + add 25,25,18 + xor 3,3,4 + xor 5,5,0 + ld 0,72(7) + add 25,25,3 + add 25,25,5 + rotrdi 3,11,14 + rotrdi 4,11,18 + and 5,12,11 + xor 3,3,4 + add 14,14,0 + andc 0,6,11 + rotrdi 4,4,23 + or 5,5,0 + add 14,14,25 + xor 3,3,4 + add 14,14,5 + add 14,14,3 + + rotrdi 3,15,28 + rotrdi 4,15,34 + and 5,15,8 + and 0,15,9 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,8,9 + xor 3,3,4 + add 10,10,14 + xor 5,5,0 + add 14,14,3 + add 14,14,5 + + rotrdi 3,27,1 + rotrdi 4,27,8 + rotrdi 5,24,19 + rotrdi 0,24,61 + xor 3,3,4 + srdi 4,27,7 + xor 5,5,0 + srdi 0,24,6 + add 26,26,19 + xor 3,3,4 + xor 5,5,0 + ld 0,80(7) + add 26,26,3 + add 26,26,5 + rotrdi 3,10,14 + rotrdi 4,10,18 + and 5,11,10 + xor 3,3,4 + add 6,6,0 + andc 0,12,10 + rotrdi 4,4,23 + or 5,5,0 + add 6,6,26 + xor 3,3,4 + add 6,6,5 + add 6,6,3 + + rotrdi 3,14,28 + rotrdi 4,14,34 + and 5,14,15 + and 0,14,8 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,15,8 + xor 3,3,4 + add 9,9,6 + xor 5,5,0 + add 6,6,3 + add 6,6,5 + + rotrdi 3,28,1 + rotrdi 4,28,8 + rotrdi 5,25,19 + rotrdi 0,25,61 + xor 3,3,4 + srdi 4,28,7 + xor 5,5,0 + srdi 0,25,6 + add 27,27,20 + xor 3,3,4 + xor 5,5,0 + ld 0,88(7) + add 27,27,3 + add 27,27,5 + rotrdi 3,9,14 + rotrdi 4,9,18 + and 5,10,9 + xor 3,3,4 + add 12,12,0 + andc 0,11,9 + rotrdi 4,4,23 + or 5,5,0 + add 12,12,27 + xor 3,3,4 + add 12,12,5 + add 12,12,3 + + rotrdi 3,6,28 + rotrdi 4,6,34 + and 5,6,14 + and 0,6,15 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,14,15 + xor 3,3,4 + add 8,8,12 + xor 5,5,0 + add 12,12,3 + add 12,12,5 + + rotrdi 3,29,1 + rotrdi 4,29,8 + rotrdi 5,26,19 + rotrdi 0,26,61 + xor 3,3,4 + srdi 4,29,7 + xor 5,5,0 + srdi 0,26,6 + add 28,28,21 + xor 3,3,4 + xor 5,5,0 + ld 0,96(7) + add 28,28,3 + add 28,28,5 + rotrdi 3,8,14 + rotrdi 4,8,18 + and 5,9,8 + xor 3,3,4 + add 11,11,0 + andc 0,10,8 + rotrdi 4,4,23 + or 5,5,0 + add 11,11,28 + xor 3,3,4 + add 11,11,5 + add 11,11,3 + + rotrdi 3,12,28 + rotrdi 4,12,34 + and 5,12,6 + and 0,12,14 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,6,14 + xor 3,3,4 + add 15,15,11 + xor 5,5,0 + add 11,11,3 + add 11,11,5 + + rotrdi 3,30,1 + rotrdi 4,30,8 + rotrdi 5,27,19 + rotrdi 0,27,61 + xor 3,3,4 + srdi 4,30,7 + xor 5,5,0 + srdi 0,27,6 + add 29,29,22 + xor 3,3,4 + xor 5,5,0 + ld 0,104(7) + add 29,29,3 + add 29,29,5 + rotrdi 3,15,14 + rotrdi 4,15,18 + and 5,8,15 + xor 3,3,4 + add 10,10,0 + andc 0,9,15 + rotrdi 4,4,23 + or 5,5,0 + add 10,10,29 + xor 3,3,4 + add 10,10,5 + add 10,10,3 + + rotrdi 3,11,28 + rotrdi 4,11,34 + and 5,11,12 + and 0,11,6 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,12,6 + xor 3,3,4 + add 14,14,10 + xor 5,5,0 + add 10,10,3 + add 10,10,5 + + rotrdi 3,31,1 + rotrdi 4,31,8 + rotrdi 5,28,19 + rotrdi 0,28,61 + xor 3,3,4 + srdi 4,31,7 + xor 5,5,0 + srdi 0,28,6 + add 30,30,23 + xor 3,3,4 + xor 5,5,0 + ld 0,112(7) + add 30,30,3 + add 30,30,5 + rotrdi 3,14,14 + rotrdi 4,14,18 + and 5,15,14 + xor 3,3,4 + add 9,9,0 + andc 0,8,14 + rotrdi 4,4,23 + or 5,5,0 + add 9,9,30 + xor 3,3,4 + add 9,9,5 + add 9,9,3 + + rotrdi 3,10,28 + rotrdi 4,10,34 + and 5,10,11 + and 0,10,12 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,11,12 + xor 3,3,4 + add 6,6,9 + xor 5,5,0 + add 9,9,3 + add 9,9,5 + + rotrdi 3,16,1 + rotrdi 4,16,8 + rotrdi 5,29,19 + rotrdi 0,29,61 + xor 3,3,4 + srdi 4,16,7 + xor 5,5,0 + srdi 0,29,6 + add 31,31,24 + xor 3,3,4 + xor 5,5,0 + ld 0,120(7) + add 31,31,3 + add 31,31,5 + rotrdi 3,6,14 + rotrdi 4,6,18 + and 5,14,6 + xor 3,3,4 + add 8,8,0 + andc 0,15,6 + rotrdi 4,4,23 + or 5,5,0 + add 8,8,31 + xor 3,3,4 + add 8,8,5 + add 8,8,3 + + rotrdi 3,9,28 + rotrdi 4,9,34 + and 5,9,10 + and 0,9,11 + xor 3,3,4 + rotrdi 4,4,5 + xor 5,5,0 + and 0,10,11 + xor 3,3,4 + add 12,12,8 + xor 5,5,0 + add 8,8,3 + add 8,8,5 + + bdnz .Lrounds + + ld 3,208(1) + ld 31,200(1) + ld 5,192(1) + subi 7,7,512 + + ld 16,0(3) + ld 17,8(3) + ld 18,16(3) + ld 19,24(3) + ld 20,32(3) + ld 21,40(3) + ld 22,48(3) + addi 31,31,128 + ld 23,56(3) + add 8,8,16 + add 9,9,17 + std 31,200(1) + add 10,10,18 + std 8,0(3) + add 11,11,19 + std 9,8(3) + add 12,12,20 + std 10,16(3) + add 6,6,21 + std 11,24(3) + add 14,14,22 + std 12,32(3) + add 15,15,23 + std 6,40(3) + std 14,48(3) + cmpld 31,5 + std 15,56(3) + bne .Lsha2_block_private + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.size zfs_sha512_ppc,.-zfs_sha512_ppc +.align 6 +.LPICmeup: + mflr 0 + bcl 20,31,$+4 + mflr 7 + addi 7,7,56 + mtlr 0 + blr +.long 0 +.byte 0,12,0x14,0,0,0,0,0 +.space 28 +.long 0xd728ae22,0x428a2f98 +.long 0x23ef65cd,0x71374491 +.long 0xec4d3b2f,0xb5c0fbcf +.long 0x8189dbbc,0xe9b5dba5 +.long 0xf348b538,0x3956c25b +.long 0xb605d019,0x59f111f1 +.long 0xaf194f9b,0x923f82a4 +.long 0xda6d8118,0xab1c5ed5 +.long 0xa3030242,0xd807aa98 +.long 0x45706fbe,0x12835b01 +.long 0x4ee4b28c,0x243185be +.long 0xd5ffb4e2,0x550c7dc3 +.long 0xf27b896f,0x72be5d74 +.long 0x3b1696b1,0x80deb1fe +.long 0x25c71235,0x9bdc06a7 +.long 0xcf692694,0xc19bf174 +.long 0x9ef14ad2,0xe49b69c1 +.long 0x384f25e3,0xefbe4786 +.long 0x8b8cd5b5,0x0fc19dc6 +.long 0x77ac9c65,0x240ca1cc +.long 0x592b0275,0x2de92c6f +.long 0x6ea6e483,0x4a7484aa +.long 0xbd41fbd4,0x5cb0a9dc +.long 0x831153b5,0x76f988da +.long 0xee66dfab,0x983e5152 +.long 0x2db43210,0xa831c66d +.long 0x98fb213f,0xb00327c8 +.long 0xbeef0ee4,0xbf597fc7 +.long 0x3da88fc2,0xc6e00bf3 +.long 0x930aa725,0xd5a79147 +.long 0xe003826f,0x06ca6351 +.long 0x0a0e6e70,0x14292967 +.long 0x46d22ffc,0x27b70a85 +.long 0x5c26c926,0x2e1b2138 +.long 0x5ac42aed,0x4d2c6dfc +.long 0x9d95b3df,0x53380d13 +.long 0x8baf63de,0x650a7354 +.long 0x3c77b2a8,0x766a0abb +.long 0x47edaee6,0x81c2c92e +.long 0x1482353b,0x92722c85 +.long 0x4cf10364,0xa2bfe8a1 +.long 0xbc423001,0xa81a664b +.long 0xd0f89791,0xc24b8b70 +.long 0x0654be30,0xc76c51a3 +.long 0xd6ef5218,0xd192e819 +.long 0x5565a910,0xd6990624 +.long 0x5771202a,0xf40e3585 +.long 0x32bbd1b8,0x106aa070 +.long 0xb8d2d0c8,0x19a4c116 +.long 0x5141ab53,0x1e376c08 +.long 0xdf8eeb99,0x2748774c +.long 0xe19b48a8,0x34b0bcb5 +.long 0xc5c95a63,0x391c0cb3 +.long 0xe3418acb,0x4ed8aa4a +.long 0x7763e373,0x5b9cca4f +.long 0xd6b2b8a3,0x682e6ff3 +.long 0x5defb2fc,0x748f82ee +.long 0x43172f60,0x78a5636f +.long 0xa1f0ab72,0x84c87814 +.long 0x1a6439ec,0x8cc70208 +.long 0x23631e28,0x90befffa +.long 0xde82bde9,0xa4506ceb +.long 0xb2c67915,0xbef9a3f7 +.long 0xe372532b,0xc67178f2 +.long 0xea26619c,0xca273ece +.long 0x21c0c207,0xd186b8c7 +.long 0xcde0eb1e,0xeada7dd6 +.long 0xee6ed178,0xf57d4f7f +.long 0x72176fba,0x06f067aa +.long 0xa2c898a6,0x0a637dc5 +.long 0xbef90dae,0x113f9804 +.long 0x131c471b,0x1b710b35 +.long 0x23047d84,0x28db77f5 +.long 0x40c72493,0x32caab7b +.long 0x15c9bebc,0x3c9ebe0a +.long 0x9c100d4c,0x431d67c4 +.long 0xcb3e42b6,0x4cc5d4be +.long 0xfc657e2a,0x597f299c +.long 0x3ad6faec,0x5fcb6fab +.long 0x4a475817,0x6c44198c + +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S index 4a80c62097ae..4f3fe3ec65d6 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_aesni.S @@ -154,26 +154,26 @@ #include <sys/types.h> -/* ARGSUSED */ void aes_encrypt_intel(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } -/* ARGSUSED */ void aes_decrypt_intel(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]) { + (void) rk, (void) Nr, (void) ct, (void) pt; } -/* ARGSUSED */ int rijndael_key_setup_enc_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits) { + (void) rk, (void) cipherKey, (void) keyBits; return (0); } -/* ARGSUSED */ int rijndael_key_setup_dec_intel(uint32_t rk[], const uint32_t cipherKey[], uint64_t keyBits) { + (void) rk, (void) cipherKey, (void) keyBits; return (0); } @@ -208,7 +208,7 @@ _key_expansion_256a_local: pxor %xmm1, %xmm0 movups %xmm0, (%rcx) add $0x10, %rcx - ret + RET nop SET_SIZE(_key_expansion_128) SET_SIZE(_key_expansion_256a) @@ -236,7 +236,7 @@ _key_expansion_192a_local: shufps $0b01001110, %xmm2, %xmm1 movups %xmm1, 0x10(%rcx) add $0x20, %rcx - ret + RET SET_SIZE(_key_expansion_192a) @@ -257,7 +257,7 @@ _key_expansion_192b_local: movups %xmm0, (%rcx) add $0x10, %rcx - ret + RET SET_SIZE(_key_expansion_192b) @@ -271,7 +271,7 @@ _key_expansion_256b_local: pxor %xmm1, %xmm2 movups %xmm2, (%rcx) add $0x10, %rcx - ret + RET SET_SIZE(_key_expansion_256b) @@ -376,9 +376,9 @@ rijndael_key_setup_enc_intel_local: mov $14, %rax // return # rounds = 14 #endif FRAME_END - ret + RET -.align 4 +.balign 4 .Lenc_key192: cmp $192, %KEYSIZE32 jnz .Lenc_key128 @@ -413,9 +413,9 @@ rijndael_key_setup_enc_intel_local: mov $12, %rax // return # rounds = 12 #endif FRAME_END - ret + RET -.align 4 +.balign 4 .Lenc_key128: cmp $128, %KEYSIZE32 jnz .Lenc_key_invalid_key_bits @@ -453,13 +453,13 @@ rijndael_key_setup_enc_intel_local: mov $10, %rax // return # rounds = 10 #endif FRAME_END - ret + RET .Lenc_key_invalid_param: #ifdef OPENSSL_INTERFACE mov $-1, %rax // user key or AES key pointer is NULL FRAME_END - ret + RET #else /* FALLTHROUGH */ #endif /* OPENSSL_INTERFACE */ @@ -471,7 +471,7 @@ rijndael_key_setup_enc_intel_local: xor %rax, %rax // a key pointer is NULL or invalid keysize #endif /* OPENSSL_INTERFACE */ FRAME_END - ret + RET SET_SIZE(rijndael_key_setup_enc_intel) @@ -522,7 +522,7 @@ FRAME_BEGIN add %AESKEY, %ROUNDS64 mov %ROUNDS64, %ENDAESKEY -.align 4 +.balign 4 .Ldec_key_reorder_loop: movups (%AESKEY), %xmm0 movups (%ROUNDS64), %xmm1 @@ -533,7 +533,7 @@ FRAME_BEGIN cmp %AESKEY, %ROUNDS64 ja .Ldec_key_reorder_loop -.align 4 +.balign 4 .Ldec_key_inv_loop: movups (%rcx), %xmm0 // Convert an encryption round key to a form usable for decryption @@ -548,7 +548,7 @@ FRAME_BEGIN // OpenSolaris: rax = # rounds (10, 12, or 14) or 0 for error // OpenSSL: rax = 0 for OK, or non-zero for error FRAME_END - ret + RET SET_SIZE(rijndael_key_setup_dec_intel) @@ -622,7 +622,7 @@ ENTRY_NP(aes_encrypt_intel) movups -0x50(%KEYP), %KEY aesenc %KEY, %STATE -.align 4 +.balign 4 .Lenc192: // AES 192 and 256 movups -0x40(%KEYP), %KEY @@ -630,7 +630,7 @@ ENTRY_NP(aes_encrypt_intel) movups -0x30(%KEYP), %KEY aesenc %KEY, %STATE -.align 4 +.balign 4 .Lenc128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY @@ -655,7 +655,7 @@ ENTRY_NP(aes_encrypt_intel) aesenclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output - ret + RET SET_SIZE(aes_encrypt_intel) @@ -705,7 +705,7 @@ ENTRY_NP(aes_decrypt_intel) movups -0x50(%KEYP), %KEY aesdec %KEY, %STATE -.align 4 +.balign 4 .Ldec192: // AES 192 and 256 movups -0x40(%KEYP), %KEY @@ -713,7 +713,7 @@ ENTRY_NP(aes_decrypt_intel) movups -0x30(%KEYP), %KEY aesdec %KEY, %STATE -.align 4 +.balign 4 .Ldec128: // AES 128, 192, and 256 movups -0x20(%KEYP), %KEY @@ -738,7 +738,7 @@ ENTRY_NP(aes_decrypt_intel) aesdeclast %KEY, %STATE // last round movups %STATE, (%OUTP) // output - ret + RET SET_SIZE(aes_decrypt_intel) #endif /* lint || __lint */ diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S index 9db3a3179230..c4870a28ead6 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aes_amd64.S @@ -186,15 +186,15 @@ #if defined(lint) || defined(__lint) #include <sys/types.h> -/* ARGSUSED */ void aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4], - uint32_t ct[4]) { + uint32_t ct[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } -/* ARGSUSED */ void aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], - uint32_t pt[4]) { + uint32_t pt[4]) { + (void) rk, (void) Nr, (void) pt, (void) ct; } @@ -221,23 +221,23 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], // finite field multiplies by {02}, {04} and {08} -#define f2(x) [[x<<1]^[[[x>>7]&1]*0x11b]] -#define f4(x) [[x<<2]^[[[x>>6]&1]*0x11b]^[[[x>>6]&2]*0x11b]] -#define f8(x) [[x<<3]^[[[x>>5]&1]*0x11b]^[[[x>>5]&2]*0x11b]^[[[x>>5]&4]*0x11b]] +#define f2(x) ((x<<1)^(((x>>7)&1)*0x11b)) +#define f4(x) ((x<<2)^(((x>>6)&1)*0x11b)^(((x>>6)&2)*0x11b)) +#define f8(x) ((x<<3)^(((x>>5)&1)*0x11b)^(((x>>5)&2)*0x11b)^(((x>>5)&4)*0x11b)) // finite field multiplies required in table generation -#define f3(x) [[f2(x)] ^ [x]] -#define f9(x) [[f8(x)] ^ [x]] -#define fb(x) [[f8(x)] ^ [f2(x)] ^ [x]] -#define fd(x) [[f8(x)] ^ [f4(x)] ^ [x]] -#define fe(x) [[f8(x)] ^ [f4(x)] ^ [f2(x)]] +#define f3(x) ((f2(x)) ^ (x)) +#define f9(x) ((f8(x)) ^ (x)) +#define fb(x) ((f8(x)) ^ (f2(x)) ^ (x)) +#define fd(x) ((f8(x)) ^ (f4(x)) ^ (x)) +#define fe(x) ((f8(x)) ^ (f4(x)) ^ (f2(x))) // macros for expanding S-box data -#define u8(x) [f2(x)], [x], [x], [f3(x)], [f2(x)], [x], [x], [f3(x)] -#define v8(x) [fe(x)], [f9(x)], [fd(x)], [fb(x)], [fe(x)], [f9(x)], [fd(x)], [x] -#define w8(x) [x], 0, 0, 0, [x], 0, 0, 0 +#define u8(x) (f2(x)), (x), (x), (f3(x)), (f2(x)), (x), (x), (f3(x)) +#define v8(x) (fe(x)), (f9(x)), (fd(x)), (fb(x)), (fe(x)), (f9(x)), (fd(x)), (x) +#define w8(x) (x), 0, 0, 0, (x), 0, 0, 0 #define enc_vals(x) \ .byte x(0x63),x(0x7c),x(0x77),x(0x7b),x(0xf2),x(0x6b),x(0x6f),x(0xc5); \ @@ -693,8 +693,8 @@ aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], * int aes_encrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ -.data -.align 64 +SECTION_STATIC +.balign 64 enc_tab: enc_vals(u8) #ifdef LAST_ROUND_TABLES @@ -704,6 +704,7 @@ enc_tab: ENTRY_NP(aes_encrypt_amd64) + ENDBR #ifdef GLADMAN_INTERFACE // Original interface sub $[4*8], %rsp // gnu/linux/opensolaris binary interface @@ -717,7 +718,7 @@ ENTRY_NP(aes_encrypt_amd64) #else // OpenSolaris OS interface - sub $[4*8], %rsp // Make room on stack to save registers + sub $(4*8), %rsp // Make room on stack to save registers mov %rcx, (%rsp) // Save output pointer (P4) on stack mov %rdi, %r8 // context (P1) mov %rdx, %rdi // P3: save input pointer @@ -748,11 +749,11 @@ ENTRY_NP(aes_encrypt_amd64) lea (kptr,%rsi), kptr // Jump based on byte key length * 16: - cmp $[10*16], %esi + cmp $(10*16), %esi je 3f - cmp $[12*16], %esi + cmp $(12*16), %esi je 2f - cmp $[14*16], %esi + cmp $(14*16), %esi je 1f mov $-1, %rax // error jmp 4f @@ -784,8 +785,8 @@ ENTRY_NP(aes_encrypt_amd64) mov 1*8(%rsp), %rbx mov 2*8(%rsp), %rbp mov 3*8(%rsp), %r12 - add $[4*8], %rsp - ret + add $(4*8), %rsp + RET SET_SIZE(aes_encrypt_amd64) @@ -798,8 +799,8 @@ ENTRY_NP(aes_encrypt_amd64) * int aes_decrypt(const unsigned char *in, * unsigned char *out, const aes_encrypt_ctx cx[1])/ */ -.data -.align 64 +SECTION_STATIC +.balign 64 dec_tab: dec_vals(v8) #ifdef LAST_ROUND_TABLES @@ -809,6 +810,7 @@ dec_tab: ENTRY_NP(aes_decrypt_amd64) + ENDBR #ifdef GLADMAN_INTERFACE // Original interface sub $[4*8], %rsp // gnu/linux/opensolaris binary interface @@ -822,7 +824,7 @@ ENTRY_NP(aes_decrypt_amd64) #else // OpenSolaris OS interface - sub $[4*8], %rsp // Make room on stack to save registers + sub $(4*8), %rsp // Make room on stack to save registers mov %rcx, (%rsp) // Save output pointer (P4) on stack mov %rdi, %r8 // context (P1) mov %rdx, %rdi // P3: save input pointer @@ -859,11 +861,11 @@ ENTRY_NP(aes_decrypt_amd64) xor rofs+12(%rdi), %edx // Jump based on byte key length * 16: - cmp $[10*16], %esi + cmp $(10*16), %esi je 3f - cmp $[12*16], %esi + cmp $(12*16), %esi je 2f - cmp $[14*16], %esi + cmp $(14*16), %esi je 1f mov $-1, %rax // error jmp 4f @@ -895,11 +897,11 @@ ENTRY_NP(aes_decrypt_amd64) mov 1*8(%rsp), %rbx mov 2*8(%rsp), %rbp mov 3*8(%rsp), %r12 - add $[4*8], %rsp - ret + add $(4*8), %rsp + RET SET_SIZE(aes_decrypt_amd64) -#endif /* lint || __lint */ +#endif /* lint || __lint */ #ifdef __ELF__ .section .note.GNU-stack,"",%progbits diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h index eb13f72b10d8..003534e0fa50 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/aes/aestab2.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S new file mode 100644 index 000000000000..0ebec5c1095e --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S @@ -0,0 +1,1828 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2020 Samuel Neves + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#if defined(HAVE_AVX2) + +#define _ASM +#include <sys/asm_linkage.h> + +.intel_syntax noprefix +.text + +ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64) + ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 680 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9d + vmovd xmm0, r9d + vpbroadcastd ymm0, xmm0 + vmovdqa ymmword ptr [rsp+0x280], ymm0 + vpand ymm1, ymm0, ymmword ptr [ADD0+rip] + vpand ymm2, ymm0, ymmword ptr [ADD1+rip] + vmovdqa ymmword ptr [rsp+0x220], ymm2 + vmovd xmm2, r8d + vpbroadcastd ymm2, xmm2 + vpaddd ymm2, ymm2, ymm1 + vmovdqa ymmword ptr [rsp+0x240], ymm2 + vpxor ymm1, ymm1, ymmword ptr [CMP_MSB_MASK+rip] + vpxor ymm2, ymm2, ymmword ptr [CMP_MSB_MASK+rip] + vpcmpgtd ymm2, ymm1, ymm2 + shr r8, 32 + vmovd xmm3, r8d + vpbroadcastd ymm3, xmm3 + vpsubd ymm3, ymm3, ymm2 + vmovdqa ymmword ptr [rsp+0x260], ymm3 + shl rdx, 6 + mov qword ptr [rsp+0x2A0], rdx + cmp rsi, 8 + jc 3f +2: + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+0x4] + vpbroadcastd ymm2, dword ptr [rcx+0x8] + vpbroadcastd ymm3, dword ptr [rcx+0xC] + vpbroadcastd ymm4, dword ptr [rcx+0x10] + vpbroadcastd ymm5, dword ptr [rcx+0x14] + vpbroadcastd ymm6, dword ptr [rcx+0x18] + vpbroadcastd ymm7, dword ptr [rcx+0x1C] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x20] + mov r13, qword ptr [rdi+0x28] + mov r14, qword ptr [rdi+0x30] + mov r15, qword ptr [rdi+0x38] + movzx eax, byte ptr [rbp+0x38] + movzx ebx, byte ptr [rbp+0x40] + or eax, ebx + xor edx, edx +.p2align 5 +9: + movzx ebx, byte ptr [rbp+0x48] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x2A0] + cmove eax, ebx + mov dword ptr [rsp+0x200], eax + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x40] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x40] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x20], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x40], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x60], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x30] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x30] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x80], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0xA0], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0xC0], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0xE0], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x20] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x20] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x100], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x120], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x140], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x160], ymm11 + vmovups xmm8, xmmword ptr [r8+rdx-0x10] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x10] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm8, ymm12, ymm14, 136 + vmovaps ymmword ptr [rsp+0x180], ymm8 + vshufps ymm9, ymm12, ymm14, 221 + vmovaps ymmword ptr [rsp+0x1A0], ymm9 + vshufps ymm10, ymm13, ymm15, 136 + vmovaps ymmword ptr [rsp+0x1C0], ymm10 + vshufps ymm11, ymm13, ymm15, 221 + vmovaps ymmword ptr [rsp+0x1E0], ymm11 + vpbroadcastd ymm15, dword ptr [rsp+0x200] + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm0, ymmword ptr [rsp+0x240] + vpxor ymm13, ymm1, ymmword ptr [rsp+0x260] + vpxor ymm14, ymm2, ymmword ptr [BLAKE3_BLOCK_LEN+rip] + vpxor ymm15, ymm3, ymm15 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [BLAKE3_IV_0+rip] + vpaddd ymm9, ymm13, ymmword ptr [BLAKE3_IV_1+rip] + vpaddd ymm10, ymm14, ymmword ptr [BLAKE3_IV_2+rip] + vpaddd ymm11, ymm15, ymmword ptr [BLAKE3_IV_3+rip] + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x100] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xE0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x20] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1A0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xC0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x160] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xA0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1C0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x80] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xA0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x180] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x120] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x1E0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1C0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x140] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0xE0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x40] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x60] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x120] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x160] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x100] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1E0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x180] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x20] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1A0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x40] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x80] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x60] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x140] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0xC0] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x160] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0xA0] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x20] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x100] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1E0] + vpaddd ymm1, ymm1, ymmword ptr [rsp] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x120] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xC0] + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxor ymm12, ymm12, ymm0 + vpxor ymm13, ymm13, ymm1 + vpxor ymm14, ymm14, ymm2 + vpxor ymm15, ymm15, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpshufb ymm15, ymm15, ymm8 + vpaddd ymm8, ymm12, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxor ymm4, ymm4, ymm8 + vpxor ymm5, ymm5, ymm9 + vpxor ymm6, ymm6, ymm10 + vpxor ymm7, ymm7, ymm11 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x1C0] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x40] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x60] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0xE0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT16+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vmovdqa ymmword ptr [rsp+0x200], ymm8 + vpsrld ymm8, ymm5, 12 + vpslld ymm5, ymm5, 20 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 12 + vpslld ymm6, ymm6, 20 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 12 + vpslld ymm7, ymm7, 20 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 12 + vpslld ymm4, ymm4, 20 + vpor ymm4, ymm4, ymm8 + vpaddd ymm0, ymm0, ymmword ptr [rsp+0x140] + vpaddd ymm1, ymm1, ymmword ptr [rsp+0x180] + vpaddd ymm2, ymm2, ymmword ptr [rsp+0x80] + vpaddd ymm3, ymm3, ymmword ptr [rsp+0x1A0] + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxor ymm15, ymm15, ymm0 + vpxor ymm12, ymm12, ymm1 + vpxor ymm13, ymm13, ymm2 + vpxor ymm14, ymm14, ymm3 + vbroadcasti128 ymm8, xmmword ptr [ROT8+rip] + vpshufb ymm15, ymm15, ymm8 + vpshufb ymm12, ymm12, ymm8 + vpshufb ymm13, ymm13, ymm8 + vpshufb ymm14, ymm14, ymm8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm13, ymmword ptr [rsp+0x200] + vpaddd ymm9, ymm9, ymm14 + vpxor ymm5, ymm5, ymm10 + vpxor ymm6, ymm6, ymm11 + vpxor ymm7, ymm7, ymm8 + vpxor ymm4, ymm4, ymm9 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpsrld ymm8, ymm5, 7 + vpslld ymm5, ymm5, 25 + vpor ymm5, ymm5, ymm8 + vpsrld ymm8, ymm6, 7 + vpslld ymm6, ymm6, 25 + vpor ymm6, ymm6, ymm8 + vpsrld ymm8, ymm7, 7 + vpslld ymm7, ymm7, 25 + vpor ymm7, ymm7, ymm8 + vpsrld ymm8, ymm4, 7 + vpslld ymm4, ymm4, 25 + vpor ymm4, ymm4, ymm8 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+0x38] + jne 9b + mov rbx, qword ptr [rbp+0x50] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0xCC + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0xCC + vblendps ymm3, ymm12, ymm9, 0xCC + vperm2f128 ymm12, ymm1, ymm2, 0x20 + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0xCC + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 0x20 + vmovups ymmword ptr [rbx+0x20], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0xCC + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0xCC + vblendps ymm14, ymm14, ymm13, 0xCC + vperm2f128 ymm8, ymm10, ymm14, 0x20 + vmovups ymmword ptr [rbx+0x40], ymm8 + vblendps ymm15, ymm13, ymm15, 0xCC + vperm2f128 ymm13, ymm6, ymm15, 0x20 + vmovups ymmword ptr [rbx+0x60], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 0x31 + vperm2f128 ymm11, ymm3, ymm4, 0x31 + vmovups ymmword ptr [rbx+0x80], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 0x31 + vperm2f128 ymm15, ymm6, ymm15, 0x31 + vmovups ymmword ptr [rbx+0xA0], ymm11 + vmovups ymmword ptr [rbx+0xC0], ymm14 + vmovups ymmword ptr [rbx+0xE0], ymm15 + vmovdqa ymm0, ymmword ptr [rsp+0x220] + vpaddd ymm1, ymm0, ymmword ptr [rsp+0x240] + vmovdqa ymmword ptr [rsp+0x240], ymm1 + vpxor ymm0, ymm0, ymmword ptr [CMP_MSB_MASK+rip] + vpxor ymm2, ymm1, ymmword ptr [CMP_MSB_MASK+rip] + vpcmpgtd ymm2, ymm0, ymm2 + vmovdqa ymm0, ymmword ptr [rsp+0x260] + vpsubd ymm2, ymm0, ymm2 + vmovdqa ymmword ptr [rsp+0x260], ymm2 + add rdi, 64 + add rbx, 256 + mov qword ptr [rbp+0x50], rbx + sub rsi, 8 + cmp rsi, 8 + jnc 2b + test rsi, rsi + jnz 3f +4: + vzeroupper + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + RET +.p2align 5 +3: + mov rbx, qword ptr [rbp+0x50] + mov r15, qword ptr [rsp+0x2A0] + movzx r13d, byte ptr [rbp+0x38] + movzx r12d, byte ptr [rbp+0x48] + test rsi, 0x4 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovdqa ymm8, ymm0 + vmovdqa ymm9, ymm1 + vbroadcasti128 ymm12, xmmword ptr [rsp+0x240] + vbroadcasti128 ymm13, xmmword ptr [rsp+0x260] + vpunpckldq ymm14, ymm12, ymm13 + vpunpckhdq ymm15, ymm12, ymm13 + vpermq ymm14, ymm14, 0x50 + vpermq ymm15, ymm15, 0x50 + vbroadcasti128 ymm12, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + vpblendd ymm14, ymm14, ymm12, 0x44 + vpblendd ymm15, ymm15, ymm12, 0x44 + vmovdqa ymmword ptr [rsp], ymm14 + vmovdqa ymmword ptr [rsp+0x20], ymm15 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x200], eax + vmovups ymm2, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm3, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm2, ymm3, 136 + vshufps ymm5, ymm2, ymm3, 221 + vmovups ymm2, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm2, ymm2, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm3, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm3, ymm3, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm2, ymm3, 136 + vshufps ymm7, ymm2, ymm3, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + vmovups ymm10, ymmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x40], 0x01 + vmovups ymm11, ymmword ptr [r10+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x30], 0x01 + vshufps ymm12, ymm10, ymm11, 136 + vshufps ymm13, ymm10, ymm11, 221 + vmovups ymm10, ymmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r11+rdx-0x20], 0x01 + vmovups ymm11, ymmword ptr [r10+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r11+rdx-0x10], 0x01 + vshufps ymm14, ymm10, ymm11, 136 + vshufps ymm15, ymm10, ymm11, 221 + vpshufd ymm14, ymm14, 0x93 + vpshufd ymm15, ymm15, 0x93 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + vpbroadcastd ymm2, dword ptr [rsp+0x200] + vmovdqa ymm3, ymmword ptr [rsp] + vmovdqa ymm11, ymmword ptr [rsp+0x20] + vpblendd ymm3, ymm3, ymm2, 0x88 + vpblendd ymm11, ymm11, ymm2, 0x88 + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vmovdqa ymm10, ymm2 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm8, ymm8, ymm12 + vmovdqa ymmword ptr [rsp+0x40], ymm4 + nop + vmovdqa ymmword ptr [rsp+0x60], ymm12 + nop + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vmovdqa ymmword ptr [rsp+0x80], ymm5 + vmovdqa ymmword ptr [rsp+0xA0], ymm13 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm8, ymm8, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm11, ymm11, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpshufd ymm10, ymm10, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm8, ymm8, ymm14 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT16+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 12 + vpslld ymm9, ymm9, 20 + vpor ymm9, ymm9, ymm4 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm8, ymm8, ymm15 + vpaddd ymm0, ymm0, ymm1 + vpaddd ymm8, ymm8, ymm9 + vpxor ymm3, ymm3, ymm0 + vpxor ymm11, ymm11, ymm8 + vbroadcasti128 ymm4, xmmword ptr [ROT8+rip] + vpshufb ymm3, ymm3, ymm4 + vpshufb ymm11, ymm11, ymm4 + vpaddd ymm2, ymm2, ymm3 + vpaddd ymm10, ymm10, ymm11 + vpxor ymm1, ymm1, ymm2 + vpxor ymm9, ymm9, ymm10 + vpsrld ymm4, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm4 + vpsrld ymm4, ymm9, 7 + vpslld ymm9, ymm9, 25 + vpor ymm9, ymm9, ymm4 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm8, ymm8, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm11, ymm11, 0x4E + vpshufd ymm2, ymm2, 0x93 + vpshufd ymm10, ymm10, 0x93 + dec al + je 9f + vmovdqa ymm4, ymmword ptr [rsp+0x40] + vmovdqa ymm5, ymmword ptr [rsp+0x80] + vshufps ymm12, ymm4, ymm5, 214 + vpshufd ymm13, ymm4, 0x0F + vpshufd ymm4, ymm12, 0x39 + vshufps ymm12, ymm6, ymm7, 250 + vpblendd ymm13, ymm13, ymm12, 0xAA + vpunpcklqdq ymm12, ymm7, ymm5 + vpblendd ymm12, ymm12, ymm6, 0x88 + vpshufd ymm12, ymm12, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymmword ptr [rsp+0x40], ymm13 + vmovdqa ymmword ptr [rsp+0x80], ymm12 + vmovdqa ymm12, ymmword ptr [rsp+0x60] + vmovdqa ymm13, ymmword ptr [rsp+0xA0] + vshufps ymm5, ymm12, ymm13, 214 + vpshufd ymm6, ymm12, 0x0F + vpshufd ymm12, ymm5, 0x39 + vshufps ymm5, ymm14, ymm15, 250 + vpblendd ymm6, ymm6, ymm5, 0xAA + vpunpcklqdq ymm5, ymm15, ymm13 + vpblendd ymm5, ymm5, ymm14, 0x88 + vpshufd ymm5, ymm5, 0x78 + vpunpckhdq ymm13, ymm13, ymm15 + vpunpckldq ymm14, ymm14, ymm13 + vpshufd ymm15, ymm14, 0x1E + vmovdqa ymm13, ymm6 + vmovdqa ymm14, ymm5 + vmovdqa ymm5, ymmword ptr [rsp+0x40] + vmovdqa ymm6, ymmword ptr [rsp+0x80] + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + vpxor ymm8, ymm8, ymm10 + vpxor ymm9, ymm9, ymm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovdqu xmmword ptr [rbx+0x40], xmm8 + vmovdqu xmmword ptr [rbx+0x50], xmm9 + vextracti128 xmmword ptr [rbx+0x60], ymm8, 0x01 + vextracti128 xmmword ptr [rbx+0x70], ymm9, 0x01 + vmovaps xmm8, xmmword ptr [rsp+0x280] + vmovaps xmm0, xmmword ptr [rsp+0x240] + vmovaps xmm1, xmmword ptr [rsp+0x250] + vmovaps xmm2, xmmword ptr [rsp+0x260] + vmovaps xmm3, xmmword ptr [rsp+0x270] + vblendvps xmm0, xmm0, xmm1, xmm8 + vblendvps xmm2, xmm2, xmm3, xmm8 + vmovaps xmmword ptr [rsp+0x240], xmm0 + vmovaps xmmword ptr [rsp+0x260], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +3: + test rsi, 0x2 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovd xmm13, dword ptr [rsp+0x240] + vpinsrd xmm13, xmm13, dword ptr [rsp+0x260], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovd xmm14, dword ptr [rsp+0x244] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x264], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vinserti128 ymm13, ymm13, xmm14, 0x01 + vbroadcasti128 ymm14, xmmword ptr [ROT16+rip] + vbroadcasti128 ymm15, xmmword ptr [ROT8+rip] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x200], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vpbroadcastd ymm8, dword ptr [rsp+0x200] + vpblendd ymm3, ymm13, ymm8, 0x88 + vmovups ymm8, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm14 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 12 + vpslld ymm1, ymm1, 20 + vpor ymm1, ymm1, ymm8 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxor ymm3, ymm3, ymm0 + vpshufb ymm3, ymm3, ymm15 + vpaddd ymm2, ymm2, ymm3 + vpxor ymm1, ymm1, ymm2 + vpsrld ymm8, ymm1, 7 + vpslld ymm1, ymm1, 25 + vpor ymm1, ymm1, ymm8 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x93 + dec al + jz 9f + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0x0F + vpshufd ymm4, ymm8, 0x39 + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0xAA + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 0x88 + vpshufd ymm8, ymm8, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovaps ymm8, ymmword ptr [rsp+0x280] + vmovaps ymm0, ymmword ptr [rsp+0x240] + vmovups ymm1, ymmword ptr [rsp+0x248] + vmovaps ymm2, ymmword ptr [rsp+0x260] + vmovups ymm3, ymmword ptr [rsp+0x268] + vblendvps ymm0, ymm0, ymm1, ymm8 + vblendvps ymm2, ymm2, ymm3, ymm8 + vmovaps ymmword ptr [rsp+0x240], ymm0 + vmovaps ymmword ptr [rsp+0x260], ymm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +3: + test rsi, 0x1 + je 4b + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + vmovd xmm3, dword ptr [rsp+0x240] + vpinsrd xmm3, xmm3, dword ptr [rsp+0x260], 1 + vpinsrd xmm13, xmm3, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovdqa xmm14, xmmword ptr [ROT16+rip] + vmovdqa xmm15, xmmword ptr [ROT8+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vmovdqa xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovdqa xmm3, xmm13 + vpinsrd xmm3, xmm3, eax, 3 + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vmovups xmm9, xmmword ptr [r8+rdx-0x30] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vmovups xmm9, xmmword ptr [r8+rdx-0x10] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm14 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 12 + vpslld xmm1, xmm1, 20 + vpor xmm1, xmm1, xmm8 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxor xmm3, xmm3, xmm0 + vpshufb xmm3, xmm3, xmm15 + vpaddd xmm2, xmm2, xmm3 + vpxor xmm1, xmm1, xmm2 + vpsrld xmm8, xmm1, 7 + vpslld xmm1, xmm1, 25 + vpor xmm1, xmm1, xmm8 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + jmp 4b + +SET_SIZE(zfs_blake3_hash_many_avx2) + +SECTION_STATIC + +.p2align 6 +ADD0: + .long 0, 1, 2, 3, 4, 5, 6, 7 +ADD1: + .long 8, 8, 8, 8, 8, 8, 8, 8 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 0x00000040, 0x00000040, 0x00000040, 0x00000040 + .long 0x00000040, 0x00000040, 0x00000040, 0x00000040 +ROT16: + .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 +ROT8: + .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A +#endif /* HAVE_AVX2 */ + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S new file mode 100644 index 000000000000..39830f1556bb --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S @@ -0,0 +1,2594 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2020 Samuel Neves + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#if defined(HAVE_AVX512F) && defined(HAVE_AVX512VL) + +#define _ASM +#include <sys/asm_linkage.h> + +.intel_syntax noprefix +.text + +ENTRY_ALIGN(zfs_blake3_hash_many_avx512, 64) + ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 144 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9 + kmovw k1, r9d + vmovd xmm0, r8d + vpbroadcastd ymm0, xmm0 + shr r8, 32 + vmovd xmm1, r8d + vpbroadcastd ymm1, xmm1 + vmovdqa ymm4, ymm1 + vmovdqa ymm5, ymm1 + vpaddd ymm2, ymm0, ymmword ptr [ADD0+rip] + vpaddd ymm3, ymm0, ymmword ptr [ADD0+32+rip] + vpcmpltud k2, ymm2, ymm0 + vpcmpltud k3, ymm3, ymm0 + vpaddd ymm4 {k2}, ymm4, dword ptr [ADD1+rip] {1to8} + vpaddd ymm5 {k3}, ymm5, dword ptr [ADD1+rip] {1to8} + knotw k2, k1 + vmovdqa32 ymm2 {k2}, ymm0 + vmovdqa32 ymm3 {k2}, ymm0 + vmovdqa32 ymm4 {k2}, ymm1 + vmovdqa32 ymm5 {k2}, ymm1 + vmovdqa ymmword ptr [rsp], ymm2 + vmovdqa ymmword ptr [rsp+0x1*0x20], ymm3 + vmovdqa ymmword ptr [rsp+0x2*0x20], ymm4 + vmovdqa ymmword ptr [rsp+0x3*0x20], ymm5 + shl rdx, 6 + mov qword ptr [rsp+0x80], rdx + cmp rsi, 16 + jc 3f +2: + vpbroadcastd zmm0, dword ptr [rcx] + vpbroadcastd zmm1, dword ptr [rcx+0x1*0x4] + vpbroadcastd zmm2, dword ptr [rcx+0x2*0x4] + vpbroadcastd zmm3, dword ptr [rcx+0x3*0x4] + vpbroadcastd zmm4, dword ptr [rcx+0x4*0x4] + vpbroadcastd zmm5, dword ptr [rcx+0x5*0x4] + vpbroadcastd zmm6, dword ptr [rcx+0x6*0x4] + vpbroadcastd zmm7, dword ptr [rcx+0x7*0x4] + movzx eax, byte ptr [rbp+0x38] + movzx ebx, byte ptr [rbp+0x40] + or eax, ebx + xor edx, edx +.p2align 5 +9: + movzx ebx, byte ptr [rbp+0x48] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x80] + cmove eax, ebx + mov dword ptr [rsp+0x88], eax + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x40] + mov r13, qword ptr [rdi+0x48] + mov r14, qword ptr [rdi+0x50] + mov r15, qword ptr [rdi+0x58] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] + vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 + vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] + vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01 + vpunpcklqdq zmm8, zmm16, zmm17 + vpunpckhqdq zmm9, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20] + vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01 + vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20] + vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01 + vpunpcklqdq zmm10, zmm18, zmm19 + vpunpckhqdq zmm11, zmm18, zmm19 + mov r8, qword ptr [rdi+0x20] + mov r9, qword ptr [rdi+0x28] + mov r10, qword ptr [rdi+0x30] + mov r11, qword ptr [rdi+0x38] + mov r12, qword ptr [rdi+0x60] + mov r13, qword ptr [rdi+0x68] + mov r14, qword ptr [rdi+0x70] + mov r15, qword ptr [rdi+0x78] + vmovdqu32 ymm16, ymmword ptr [rdx+r8-0x2*0x20] + vinserti32x8 zmm16, zmm16, ymmword ptr [rdx+r12-0x2*0x20], 0x01 + vmovdqu32 ymm17, ymmword ptr [rdx+r9-0x2*0x20] + vinserti32x8 zmm17, zmm17, ymmword ptr [rdx+r13-0x2*0x20], 0x01 + vpunpcklqdq zmm12, zmm16, zmm17 + vpunpckhqdq zmm13, zmm16, zmm17 + vmovdqu32 ymm18, ymmword ptr [rdx+r10-0x2*0x20] + vinserti32x8 zmm18, zmm18, ymmword ptr [rdx+r14-0x2*0x20], 0x01 + vmovdqu32 ymm19, ymmword ptr [rdx+r11-0x2*0x20] + vinserti32x8 zmm19, zmm19, ymmword ptr [rdx+r15-0x2*0x20], 0x01 + vpunpcklqdq zmm14, zmm18, zmm19 + vpunpckhqdq zmm15, zmm18, zmm19 + vmovdqa32 zmm27, zmmword ptr [INDEX0+rip] + vmovdqa32 zmm31, zmmword ptr [INDEX1+rip] + vshufps zmm16, zmm8, zmm10, 136 + vshufps zmm17, zmm12, zmm14, 136 + vmovdqa32 zmm20, zmm16 + vpermt2d zmm16, zmm27, zmm17 + vpermt2d zmm20, zmm31, zmm17 + vshufps zmm17, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm21, zmm17 + vpermt2d zmm17, zmm27, zmm30 + vpermt2d zmm21, zmm31, zmm30 + vshufps zmm18, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm22, zmm18 + vpermt2d zmm18, zmm27, zmm8 + vpermt2d zmm22, zmm31, zmm8 + vshufps zmm19, zmm9, zmm11, 221 + vshufps zmm8, zmm13, zmm15, 221 + vmovdqa32 zmm23, zmm19 + vpermt2d zmm19, zmm27, zmm8 + vpermt2d zmm23, zmm31, zmm8 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x40] + mov r13, qword ptr [rdi+0x48] + mov r14, qword ptr [rdi+0x50] + mov r15, qword ptr [rdi+0x58] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] + vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] + vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm8, zmm24, zmm25 + vpunpckhqdq zmm9, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20] + vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20] + vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm10, zmm24, zmm25 + vpunpckhqdq zmm11, zmm24, zmm25 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + mov r8, qword ptr [rdi+0x20] + mov r9, qword ptr [rdi+0x28] + mov r10, qword ptr [rdi+0x30] + mov r11, qword ptr [rdi+0x38] + mov r12, qword ptr [rdi+0x60] + mov r13, qword ptr [rdi+0x68] + mov r14, qword ptr [rdi+0x70] + mov r15, qword ptr [rdi+0x78] + vmovdqu32 ymm24, ymmword ptr [r8+rdx-0x1*0x20] + vinserti32x8 zmm24, zmm24, ymmword ptr [r12+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r9+rdx-0x1*0x20] + vinserti32x8 zmm25, zmm25, ymmword ptr [r13+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm12, zmm24, zmm25 + vpunpckhqdq zmm13, zmm24, zmm25 + vmovdqu32 ymm24, ymmword ptr [r10+rdx-0x1*0x20] + vinserti32x8 zmm24, zmm24, ymmword ptr [r14+rdx-0x1*0x20], 0x01 + vmovdqu32 ymm25, ymmword ptr [r11+rdx-0x1*0x20] + vinserti32x8 zmm25, zmm25, ymmword ptr [r15+rdx-0x1*0x20], 0x01 + vpunpcklqdq zmm14, zmm24, zmm25 + vpunpckhqdq zmm15, zmm24, zmm25 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r12+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r13+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r14+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + prefetcht0 [r15+rdx+0x80] + vshufps zmm24, zmm8, zmm10, 136 + vshufps zmm30, zmm12, zmm14, 136 + vmovdqa32 zmm28, zmm24 + vpermt2d zmm24, zmm27, zmm30 + vpermt2d zmm28, zmm31, zmm30 + vshufps zmm25, zmm8, zmm10, 221 + vshufps zmm30, zmm12, zmm14, 221 + vmovdqa32 zmm29, zmm25 + vpermt2d zmm25, zmm27, zmm30 + vpermt2d zmm29, zmm31, zmm30 + vshufps zmm26, zmm9, zmm11, 136 + vshufps zmm8, zmm13, zmm15, 136 + vmovdqa32 zmm30, zmm26 + vpermt2d zmm26, zmm27, zmm8 + vpermt2d zmm30, zmm31, zmm8 + vshufps zmm8, zmm9, zmm11, 221 + vshufps zmm10, zmm13, zmm15, 221 + vpermi2d zmm27, zmm8, zmm10 + vpermi2d zmm31, zmm8, zmm10 + vpbroadcastd zmm8, dword ptr [BLAKE3_IV_0+rip] + vpbroadcastd zmm9, dword ptr [BLAKE3_IV_1+rip] + vpbroadcastd zmm10, dword ptr [BLAKE3_IV_2+rip] + vpbroadcastd zmm11, dword ptr [BLAKE3_IV_3+rip] + vmovdqa32 zmm12, zmmword ptr [rsp] + vmovdqa32 zmm13, zmmword ptr [rsp+0x1*0x40] + vpbroadcastd zmm14, dword ptr [BLAKE3_BLOCK_LEN+rip] + vpbroadcastd zmm15, dword ptr [rsp+0x22*0x4] + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm24 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm23 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm17 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm29 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm22 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm27 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm21 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm30 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm20 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm21 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm16 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm28 + vpaddd zmm1, zmm1, zmm25 + vpaddd zmm2, zmm2, zmm31 + vpaddd zmm3, zmm3, zmm30 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm26 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm23 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm16 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm18 + vpaddd zmm1, zmm1, zmm19 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm25 + vpaddd zmm1, zmm1, zmm27 + vpaddd zmm2, zmm2, zmm24 + vpaddd zmm3, zmm3, zmm31 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm28 + vpaddd zmm3, zmm3, zmm17 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm29 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm18 + vpaddd zmm3, zmm3, zmm20 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm19 + vpaddd zmm1, zmm1, zmm26 + vpaddd zmm2, zmm2, zmm22 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpaddd zmm0, zmm0, zmm27 + vpaddd zmm1, zmm1, zmm21 + vpaddd zmm2, zmm2, zmm17 + vpaddd zmm3, zmm3, zmm24 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vprord zmm15, zmm15, 16 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 12 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vpaddd zmm0, zmm0, zmm31 + vpaddd zmm1, zmm1, zmm16 + vpaddd zmm2, zmm2, zmm25 + vpaddd zmm3, zmm3, zmm22 + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm1, zmm1, zmm5 + vpaddd zmm2, zmm2, zmm6 + vpaddd zmm3, zmm3, zmm7 + vpxord zmm12, zmm12, zmm0 + vpxord zmm13, zmm13, zmm1 + vpxord zmm14, zmm14, zmm2 + vpxord zmm15, zmm15, zmm3 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vprord zmm15, zmm15, 8 + vpaddd zmm8, zmm8, zmm12 + vpaddd zmm9, zmm9, zmm13 + vpaddd zmm10, zmm10, zmm14 + vpaddd zmm11, zmm11, zmm15 + vpxord zmm4, zmm4, zmm8 + vpxord zmm5, zmm5, zmm9 + vpxord zmm6, zmm6, zmm10 + vpxord zmm7, zmm7, zmm11 + vprord zmm4, zmm4, 7 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vpaddd zmm0, zmm0, zmm30 + vpaddd zmm1, zmm1, zmm18 + vpaddd zmm2, zmm2, zmm19 + vpaddd zmm3, zmm3, zmm23 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 16 + vprord zmm12, zmm12, 16 + vprord zmm13, zmm13, 16 + vprord zmm14, zmm14, 16 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 12 + vprord zmm6, zmm6, 12 + vprord zmm7, zmm7, 12 + vprord zmm4, zmm4, 12 + vpaddd zmm0, zmm0, zmm26 + vpaddd zmm1, zmm1, zmm28 + vpaddd zmm2, zmm2, zmm20 + vpaddd zmm3, zmm3, zmm29 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm1, zmm1, zmm6 + vpaddd zmm2, zmm2, zmm7 + vpaddd zmm3, zmm3, zmm4 + vpxord zmm15, zmm15, zmm0 + vpxord zmm12, zmm12, zmm1 + vpxord zmm13, zmm13, zmm2 + vpxord zmm14, zmm14, zmm3 + vprord zmm15, zmm15, 8 + vprord zmm12, zmm12, 8 + vprord zmm13, zmm13, 8 + vprord zmm14, zmm14, 8 + vpaddd zmm10, zmm10, zmm15 + vpaddd zmm11, zmm11, zmm12 + vpaddd zmm8, zmm8, zmm13 + vpaddd zmm9, zmm9, zmm14 + vpxord zmm5, zmm5, zmm10 + vpxord zmm6, zmm6, zmm11 + vpxord zmm7, zmm7, zmm8 + vpxord zmm4, zmm4, zmm9 + vprord zmm5, zmm5, 7 + vprord zmm6, zmm6, 7 + vprord zmm7, zmm7, 7 + vprord zmm4, zmm4, 7 + vpxord zmm0, zmm0, zmm8 + vpxord zmm1, zmm1, zmm9 + vpxord zmm2, zmm2, zmm10 + vpxord zmm3, zmm3, zmm11 + vpxord zmm4, zmm4, zmm12 + vpxord zmm5, zmm5, zmm13 + vpxord zmm6, zmm6, zmm14 + vpxord zmm7, zmm7, zmm15 + movzx eax, byte ptr [rbp+0x38] + jne 9b + mov rbx, qword ptr [rbp+0x50] + vpunpckldq zmm16, zmm0, zmm1 + vpunpckhdq zmm17, zmm0, zmm1 + vpunpckldq zmm18, zmm2, zmm3 + vpunpckhdq zmm19, zmm2, zmm3 + vpunpckldq zmm20, zmm4, zmm5 + vpunpckhdq zmm21, zmm4, zmm5 + vpunpckldq zmm22, zmm6, zmm7 + vpunpckhdq zmm23, zmm6, zmm7 + vpunpcklqdq zmm0, zmm16, zmm18 + vpunpckhqdq zmm1, zmm16, zmm18 + vpunpcklqdq zmm2, zmm17, zmm19 + vpunpckhqdq zmm3, zmm17, zmm19 + vpunpcklqdq zmm4, zmm20, zmm22 + vpunpckhqdq zmm5, zmm20, zmm22 + vpunpcklqdq zmm6, zmm21, zmm23 + vpunpckhqdq zmm7, zmm21, zmm23 + vshufi32x4 zmm16, zmm0, zmm4, 0x88 + vshufi32x4 zmm17, zmm1, zmm5, 0x88 + vshufi32x4 zmm18, zmm2, zmm6, 0x88 + vshufi32x4 zmm19, zmm3, zmm7, 0x88 + vshufi32x4 zmm20, zmm0, zmm4, 0xDD + vshufi32x4 zmm21, zmm1, zmm5, 0xDD + vshufi32x4 zmm22, zmm2, zmm6, 0xDD + vshufi32x4 zmm23, zmm3, zmm7, 0xDD + vshufi32x4 zmm0, zmm16, zmm17, 0x88 + vshufi32x4 zmm1, zmm18, zmm19, 0x88 + vshufi32x4 zmm2, zmm20, zmm21, 0x88 + vshufi32x4 zmm3, zmm22, zmm23, 0x88 + vshufi32x4 zmm4, zmm16, zmm17, 0xDD + vshufi32x4 zmm5, zmm18, zmm19, 0xDD + vshufi32x4 zmm6, zmm20, zmm21, 0xDD + vshufi32x4 zmm7, zmm22, zmm23, 0xDD + vmovdqu32 zmmword ptr [rbx], zmm0 + vmovdqu32 zmmword ptr [rbx+0x1*0x40], zmm1 + vmovdqu32 zmmword ptr [rbx+0x2*0x40], zmm2 + vmovdqu32 zmmword ptr [rbx+0x3*0x40], zmm3 + vmovdqu32 zmmword ptr [rbx+0x4*0x40], zmm4 + vmovdqu32 zmmword ptr [rbx+0x5*0x40], zmm5 + vmovdqu32 zmmword ptr [rbx+0x6*0x40], zmm6 + vmovdqu32 zmmword ptr [rbx+0x7*0x40], zmm7 + vmovdqa32 zmm0, zmmword ptr [rsp] + vmovdqa32 zmm1, zmmword ptr [rsp+0x1*0x40] + vmovdqa32 zmm2, zmm0 + vpaddd zmm2{k1}, zmm0, dword ptr [ADD16+rip] {1to16} + vpcmpltud k2, zmm2, zmm0 + vpaddd zmm1 {k2}, zmm1, dword ptr [ADD1+rip] {1to16} + vmovdqa32 zmmword ptr [rsp], zmm2 + vmovdqa32 zmmword ptr [rsp+0x1*0x40], zmm1 + add rdi, 128 + add rbx, 512 + mov qword ptr [rbp+0x50], rbx + sub rsi, 16 + cmp rsi, 16 + jnc 2b + test rsi, rsi + jnz 3f +4: + vzeroupper + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + RET +.p2align 6 +3: + test esi, 0x8 + je 3f + vpbroadcastd ymm0, dword ptr [rcx] + vpbroadcastd ymm1, dword ptr [rcx+0x4] + vpbroadcastd ymm2, dword ptr [rcx+0x8] + vpbroadcastd ymm3, dword ptr [rcx+0xC] + vpbroadcastd ymm4, dword ptr [rcx+0x10] + vpbroadcastd ymm5, dword ptr [rcx+0x14] + vpbroadcastd ymm6, dword ptr [rcx+0x18] + vpbroadcastd ymm7, dword ptr [rcx+0x1C] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov r12, qword ptr [rdi+0x20] + mov r13, qword ptr [rdi+0x28] + mov r14, qword ptr [rdi+0x30] + mov r15, qword ptr [rdi+0x38] + movzx eax, byte ptr [rbp+0x38] + movzx ebx, byte ptr [rbp+0x40] + or eax, ebx + xor edx, edx +2: + movzx ebx, byte ptr [rbp+0x48] + or ebx, eax + add rdx, 64 + cmp rdx, qword ptr [rsp+0x80] + cmove eax, ebx + mov dword ptr [rsp+0x88], eax + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x40], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x40] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x40], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x40] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x40], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x40] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x40], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm16, ymm12, ymm14, 136 + vshufps ymm17, ymm12, ymm14, 221 + vshufps ymm18, ymm13, ymm15, 136 + vshufps ymm19, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x30] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x30], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x30], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x30] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x30], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x30] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x30], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm20, ymm12, ymm14, 136 + vshufps ymm21, ymm12, ymm14, 221 + vshufps ymm22, ymm13, ymm15, 136 + vshufps ymm23, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x20], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x20] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x20], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x20] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x20], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x20] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x20], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm24, ymm12, ymm14, 136 + vshufps ymm25, ymm12, ymm14, 221 + vshufps ymm26, ymm13, ymm15, 136 + vshufps ymm27, ymm13, ymm15, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x10] + vinsertf128 ymm8, ymm8, xmmword ptr [r12+rdx-0x10], 0x01 + vmovups xmm9, xmmword ptr [r9+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r13+rdx-0x10], 0x01 + vunpcklpd ymm12, ymm8, ymm9 + vunpckhpd ymm13, ymm8, ymm9 + vmovups xmm10, xmmword ptr [r10+rdx-0x10] + vinsertf128 ymm10, ymm10, xmmword ptr [r14+rdx-0x10], 0x01 + vmovups xmm11, xmmword ptr [r11+rdx-0x10] + vinsertf128 ymm11, ymm11, xmmword ptr [r15+rdx-0x10], 0x01 + vunpcklpd ymm14, ymm10, ymm11 + vunpckhpd ymm15, ymm10, ymm11 + vshufps ymm28, ymm12, ymm14, 136 + vshufps ymm29, ymm12, ymm14, 221 + vshufps ymm30, ymm13, ymm15, 136 + vshufps ymm31, ymm13, ymm15, 221 + vpbroadcastd ymm8, dword ptr [BLAKE3_IV_0+rip] + vpbroadcastd ymm9, dword ptr [BLAKE3_IV_1+rip] + vpbroadcastd ymm10, dword ptr [BLAKE3_IV_2+rip] + vpbroadcastd ymm11, dword ptr [BLAKE3_IV_3+rip] + vmovdqa ymm12, ymmword ptr [rsp] + vmovdqa ymm13, ymmword ptr [rsp+0x40] + vpbroadcastd ymm14, dword ptr [BLAKE3_BLOCK_LEN+rip] + vpbroadcastd ymm15, dword ptr [rsp+0x88] + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm24 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm23 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm17 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm29 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm22 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm27 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm21 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm30 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm20 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm21 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm16 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm28 + vpaddd ymm1, ymm1, ymm25 + vpaddd ymm2, ymm2, ymm31 + vpaddd ymm3, ymm3, ymm30 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm26 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm23 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm16 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm18 + vpaddd ymm1, ymm1, ymm19 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm25 + vpaddd ymm1, ymm1, ymm27 + vpaddd ymm2, ymm2, ymm24 + vpaddd ymm3, ymm3, ymm31 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm28 + vpaddd ymm3, ymm3, ymm17 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm29 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm18 + vpaddd ymm3, ymm3, ymm20 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm19 + vpaddd ymm1, ymm1, ymm26 + vpaddd ymm2, ymm2, ymm22 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpaddd ymm0, ymm0, ymm27 + vpaddd ymm1, ymm1, ymm21 + vpaddd ymm2, ymm2, ymm17 + vpaddd ymm3, ymm3, ymm24 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vprord ymm15, ymm15, 16 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 12 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vpaddd ymm0, ymm0, ymm31 + vpaddd ymm1, ymm1, ymm16 + vpaddd ymm2, ymm2, ymm25 + vpaddd ymm3, ymm3, ymm22 + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm1, ymm1, ymm5 + vpaddd ymm2, ymm2, ymm6 + vpaddd ymm3, ymm3, ymm7 + vpxord ymm12, ymm12, ymm0 + vpxord ymm13, ymm13, ymm1 + vpxord ymm14, ymm14, ymm2 + vpxord ymm15, ymm15, ymm3 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vprord ymm15, ymm15, 8 + vpaddd ymm8, ymm8, ymm12 + vpaddd ymm9, ymm9, ymm13 + vpaddd ymm10, ymm10, ymm14 + vpaddd ymm11, ymm11, ymm15 + vpxord ymm4, ymm4, ymm8 + vpxord ymm5, ymm5, ymm9 + vpxord ymm6, ymm6, ymm10 + vpxord ymm7, ymm7, ymm11 + vprord ymm4, ymm4, 7 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vpaddd ymm0, ymm0, ymm30 + vpaddd ymm1, ymm1, ymm18 + vpaddd ymm2, ymm2, ymm19 + vpaddd ymm3, ymm3, ymm23 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 16 + vprord ymm12, ymm12, 16 + vprord ymm13, ymm13, 16 + vprord ymm14, ymm14, 16 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 12 + vprord ymm6, ymm6, 12 + vprord ymm7, ymm7, 12 + vprord ymm4, ymm4, 12 + vpaddd ymm0, ymm0, ymm26 + vpaddd ymm1, ymm1, ymm28 + vpaddd ymm2, ymm2, ymm20 + vpaddd ymm3, ymm3, ymm29 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm1, ymm1, ymm6 + vpaddd ymm2, ymm2, ymm7 + vpaddd ymm3, ymm3, ymm4 + vpxord ymm15, ymm15, ymm0 + vpxord ymm12, ymm12, ymm1 + vpxord ymm13, ymm13, ymm2 + vpxord ymm14, ymm14, ymm3 + vprord ymm15, ymm15, 8 + vprord ymm12, ymm12, 8 + vprord ymm13, ymm13, 8 + vprord ymm14, ymm14, 8 + vpaddd ymm10, ymm10, ymm15 + vpaddd ymm11, ymm11, ymm12 + vpaddd ymm8, ymm8, ymm13 + vpaddd ymm9, ymm9, ymm14 + vpxord ymm5, ymm5, ymm10 + vpxord ymm6, ymm6, ymm11 + vpxord ymm7, ymm7, ymm8 + vpxord ymm4, ymm4, ymm9 + vprord ymm5, ymm5, 7 + vprord ymm6, ymm6, 7 + vprord ymm7, ymm7, 7 + vprord ymm4, ymm4, 7 + vpxor ymm0, ymm0, ymm8 + vpxor ymm1, ymm1, ymm9 + vpxor ymm2, ymm2, ymm10 + vpxor ymm3, ymm3, ymm11 + vpxor ymm4, ymm4, ymm12 + vpxor ymm5, ymm5, ymm13 + vpxor ymm6, ymm6, ymm14 + vpxor ymm7, ymm7, ymm15 + movzx eax, byte ptr [rbp+0x38] + jne 2b + mov rbx, qword ptr [rbp+0x50] + vunpcklps ymm8, ymm0, ymm1 + vunpcklps ymm9, ymm2, ymm3 + vunpckhps ymm10, ymm0, ymm1 + vunpcklps ymm11, ymm4, ymm5 + vunpcklps ymm0, ymm6, ymm7 + vshufps ymm12, ymm8, ymm9, 78 + vblendps ymm1, ymm8, ymm12, 0xCC + vshufps ymm8, ymm11, ymm0, 78 + vunpckhps ymm13, ymm2, ymm3 + vblendps ymm2, ymm11, ymm8, 0xCC + vblendps ymm3, ymm12, ymm9, 0xCC + vperm2f128 ymm12, ymm1, ymm2, 0x20 + vmovups ymmword ptr [rbx], ymm12 + vunpckhps ymm14, ymm4, ymm5 + vblendps ymm4, ymm8, ymm0, 0xCC + vunpckhps ymm15, ymm6, ymm7 + vperm2f128 ymm7, ymm3, ymm4, 0x20 + vmovups ymmword ptr [rbx+0x20], ymm7 + vshufps ymm5, ymm10, ymm13, 78 + vblendps ymm6, ymm5, ymm13, 0xCC + vshufps ymm13, ymm14, ymm15, 78 + vblendps ymm10, ymm10, ymm5, 0xCC + vblendps ymm14, ymm14, ymm13, 0xCC + vperm2f128 ymm8, ymm10, ymm14, 0x20 + vmovups ymmword ptr [rbx+0x40], ymm8 + vblendps ymm15, ymm13, ymm15, 0xCC + vperm2f128 ymm13, ymm6, ymm15, 0x20 + vmovups ymmword ptr [rbx+0x60], ymm13 + vperm2f128 ymm9, ymm1, ymm2, 0x31 + vperm2f128 ymm11, ymm3, ymm4, 0x31 + vmovups ymmword ptr [rbx+0x80], ymm9 + vperm2f128 ymm14, ymm10, ymm14, 0x31 + vperm2f128 ymm15, ymm6, ymm15, 0x31 + vmovups ymmword ptr [rbx+0xA0], ymm11 + vmovups ymmword ptr [rbx+0xC0], ymm14 + vmovups ymmword ptr [rbx+0xE0], ymm15 + vmovdqa ymm0, ymmword ptr [rsp] + vmovdqa ymm2, ymmword ptr [rsp+0x2*0x20] + vmovdqa32 ymm0 {k1}, ymmword ptr [rsp+0x1*0x20] + vmovdqa32 ymm2 {k1}, ymmword ptr [rsp+0x3*0x20] + vmovdqa ymmword ptr [rsp], ymm0 + vmovdqa ymmword ptr [rsp+0x2*0x20], ymm2 + add rbx, 256 + mov qword ptr [rbp+0x50], rbx + add rdi, 64 + sub rsi, 8 +3: + mov rbx, qword ptr [rbp+0x50] + mov r15, qword ptr [rsp+0x80] + movzx r13, byte ptr [rbp+0x38] + movzx r12, byte ptr [rbp+0x48] + test esi, 0x4 + je 3f + vbroadcasti32x4 zmm0, xmmword ptr [rcx] + vbroadcasti32x4 zmm1, xmmword ptr [rcx+0x1*0x10] + vmovdqa xmm12, xmmword ptr [rsp] + vmovdqa xmm13, xmmword ptr [rsp+0x4*0x10] + vpunpckldq xmm14, xmm12, xmm13 + vpunpckhdq xmm15, xmm12, xmm13 + vpermq ymm14, ymm14, 0xDC + vpermq ymm15, ymm15, 0xDC + vpbroadcastd zmm12, dword ptr [BLAKE3_BLOCK_LEN+rip] + vinserti32x8 zmm13, zmm14, ymm15, 0x01 + mov eax, 17476 + kmovw k2, eax + vpblendmd zmm13 {k2}, zmm13, zmm12 + vbroadcasti32x4 zmm15, xmmword ptr [BLAKE3_IV+rip] + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + mov eax, 43690 + kmovw k3, eax + mov eax, 34952 + kmovw k4, eax + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x88], eax + vmovdqa32 zmm2, zmm15 + vpbroadcastd zmm8, dword ptr [rsp+0x22*0x4] + vpblendmd zmm3 {k4}, zmm13, zmm8 + vmovups zmm8, zmmword ptr [r8+rdx-0x1*0x40] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x4*0x10], 0x01 + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x4*0x10], 0x02 + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x4*0x10], 0x03 + vmovups zmm9, zmmword ptr [r8+rdx-0x30] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x3*0x10], 0x01 + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x3*0x10], 0x02 + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x3*0x10], 0x03 + vshufps zmm4, zmm8, zmm9, 136 + vshufps zmm5, zmm8, zmm9, 221 + vmovups zmm8, zmmword ptr [r8+rdx-0x20] + vinserti32x4 zmm8, zmm8, xmmword ptr [r9+rdx-0x2*0x10], 0x01 + vinserti32x4 zmm8, zmm8, xmmword ptr [r10+rdx-0x2*0x10], 0x02 + vinserti32x4 zmm8, zmm8, xmmword ptr [r11+rdx-0x2*0x10], 0x03 + vmovups zmm9, zmmword ptr [r8+rdx-0x10] + vinserti32x4 zmm9, zmm9, xmmword ptr [r9+rdx-0x1*0x10], 0x01 + vinserti32x4 zmm9, zmm9, xmmword ptr [r10+rdx-0x1*0x10], 0x02 + vinserti32x4 zmm9, zmm9, xmmword ptr [r11+rdx-0x1*0x10], 0x03 + vshufps zmm6, zmm8, zmm9, 136 + vshufps zmm7, zmm8, zmm9, 221 + vpshufd zmm6, zmm6, 0x93 + vpshufd zmm7, zmm7, 0x93 + mov al, 7 +9: + vpaddd zmm0, zmm0, zmm4 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm5 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 0x93 + vpshufd zmm3, zmm3, 0x4E + vpshufd zmm2, zmm2, 0x39 + vpaddd zmm0, zmm0, zmm6 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 16 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 12 + vpaddd zmm0, zmm0, zmm7 + vpaddd zmm0, zmm0, zmm1 + vpxord zmm3, zmm3, zmm0 + vprord zmm3, zmm3, 8 + vpaddd zmm2, zmm2, zmm3 + vpxord zmm1, zmm1, zmm2 + vprord zmm1, zmm1, 7 + vpshufd zmm0, zmm0, 0x39 + vpshufd zmm3, zmm3, 0x4E + vpshufd zmm2, zmm2, 0x93 + dec al + jz 9f + vshufps zmm8, zmm4, zmm5, 214 + vpshufd zmm9, zmm4, 0x0F + vpshufd zmm4, zmm8, 0x39 + vshufps zmm8, zmm6, zmm7, 250 + vpblendmd zmm9 {k3}, zmm9, zmm8 + vpunpcklqdq zmm8, zmm7, zmm5 + vpblendmd zmm8 {k4}, zmm8, zmm6 + vpshufd zmm8, zmm8, 0x78 + vpunpckhdq zmm5, zmm5, zmm7 + vpunpckldq zmm6, zmm6, zmm5 + vpshufd zmm7, zmm6, 0x1E + vmovdqa32 zmm5, zmm9 + vmovdqa32 zmm6, zmm8 + jmp 9b +9: + vpxord zmm0, zmm0, zmm2 + vpxord zmm1, zmm1, zmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vextracti32x4 xmmword ptr [rbx+0x4*0x10], zmm0, 0x02 + vextracti32x4 xmmword ptr [rbx+0x5*0x10], zmm1, 0x02 + vextracti32x4 xmmword ptr [rbx+0x6*0x10], zmm0, 0x03 + vextracti32x4 xmmword ptr [rbx+0x7*0x10], zmm1, 0x03 + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+0x40] + vmovdqa32 xmm0 {k1}, xmmword ptr [rsp+0x1*0x10] + vmovdqa32 xmm2 {k1}, xmmword ptr [rsp+0x5*0x10] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+0x40], xmm2 + add rbx, 128 + add rdi, 32 + sub rsi, 4 +3: + test esi, 0x2 + je 3f + vbroadcasti128 ymm0, xmmword ptr [rcx] + vbroadcasti128 ymm1, xmmword ptr [rcx+0x10] + vmovd xmm13, dword ptr [rsp] + vpinsrd xmm13, xmm13, dword ptr [rsp+0x40], 1 + vpinsrd xmm13, xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovd xmm14, dword ptr [rsp+0x4] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x44], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vinserti128 ymm13, ymm13, xmm14, 0x01 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + mov dword ptr [rsp+0x88], eax + vbroadcasti128 ymm2, xmmword ptr [BLAKE3_IV+rip] + vpbroadcastd ymm8, dword ptr [rsp+0x88] + vpblendd ymm3, ymm13, ymm8, 0x88 + vmovups ymm8, ymmword ptr [r8+rdx-0x40] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x40], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x30] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x30], 0x01 + vshufps ymm4, ymm8, ymm9, 136 + vshufps ymm5, ymm8, ymm9, 221 + vmovups ymm8, ymmword ptr [r8+rdx-0x20] + vinsertf128 ymm8, ymm8, xmmword ptr [r9+rdx-0x20], 0x01 + vmovups ymm9, ymmword ptr [r8+rdx-0x10] + vinsertf128 ymm9, ymm9, xmmword ptr [r9+rdx-0x10], 0x01 + vshufps ymm6, ymm8, ymm9, 136 + vshufps ymm7, ymm8, ymm9, 221 + vpshufd ymm6, ymm6, 0x93 + vpshufd ymm7, ymm7, 0x93 + mov al, 7 +9: + vpaddd ymm0, ymm0, ymm4 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm5 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 0x93 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x39 + vpaddd ymm0, ymm0, ymm6 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 16 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 12 + vpaddd ymm0, ymm0, ymm7 + vpaddd ymm0, ymm0, ymm1 + vpxord ymm3, ymm3, ymm0 + vprord ymm3, ymm3, 8 + vpaddd ymm2, ymm2, ymm3 + vpxord ymm1, ymm1, ymm2 + vprord ymm1, ymm1, 7 + vpshufd ymm0, ymm0, 0x39 + vpshufd ymm3, ymm3, 0x4E + vpshufd ymm2, ymm2, 0x93 + dec al + jz 9f + vshufps ymm8, ymm4, ymm5, 214 + vpshufd ymm9, ymm4, 0x0F + vpshufd ymm4, ymm8, 0x39 + vshufps ymm8, ymm6, ymm7, 250 + vpblendd ymm9, ymm9, ymm8, 0xAA + vpunpcklqdq ymm8, ymm7, ymm5 + vpblendd ymm8, ymm8, ymm6, 0x88 + vpshufd ymm8, ymm8, 0x78 + vpunpckhdq ymm5, ymm5, ymm7 + vpunpckldq ymm6, ymm6, ymm5 + vpshufd ymm7, ymm6, 0x1E + vmovdqa ymm5, ymm9 + vmovdqa ymm6, ymm8 + jmp 9b +9: + vpxor ymm0, ymm0, ymm2 + vpxor ymm1, ymm1, ymm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + vextracti128 xmmword ptr [rbx+0x20], ymm0, 0x01 + vextracti128 xmmword ptr [rbx+0x30], ymm1, 0x01 + vmovdqa xmm0, xmmword ptr [rsp] + vmovdqa xmm2, xmmword ptr [rsp+0x4*0x10] + vmovdqu32 xmm0 {k1}, xmmword ptr [rsp+0x8] + vmovdqu32 xmm2 {k1}, xmmword ptr [rsp+0x48] + vmovdqa xmmword ptr [rsp], xmm0 + vmovdqa xmmword ptr [rsp+0x4*0x10], xmm2 + add rbx, 64 + add rdi, 16 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + vmovdqu xmm0, xmmword ptr [rcx] + vmovdqu xmm1, xmmword ptr [rcx+0x10] + vmovd xmm14, dword ptr [rsp] + vpinsrd xmm14, xmm14, dword ptr [rsp+0x40], 1 + vpinsrd xmm14, xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + vmovdqa xmm15, xmmword ptr [BLAKE3_IV+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +.p2align 5 +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + vpinsrd xmm3, xmm14, eax, 3 + vmovdqa xmm2, xmm15 + vmovups xmm8, xmmword ptr [r8+rdx-0x40] + vmovups xmm9, xmmword ptr [r8+rdx-0x30] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [r8+rdx-0x20] + vmovups xmm9, xmmword ptr [r8+rdx-0x10] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + vmovdqu xmmword ptr [rbx], xmm0 + vmovdqu xmmword ptr [rbx+0x10], xmm1 + jmp 4b +SET_SIZE(zfs_blake3_hash_many_avx512) + +ENTRY_ALIGN(zfs_blake3_compress_in_place_avx512, 64) + ENDBR + vmovdqu xmm0, xmmword ptr [rdi] + vmovdqu xmm1, xmmword ptr [rdi+0x10] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + vmovq xmm3, rcx + vmovq xmm4, rdx + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovups xmm8, xmmword ptr [rsi] + vmovups xmm9, xmmword ptr [rsi+0x10] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rsi+0x20] + vmovups xmm9, xmmword ptr [rsi+0x30] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vmovdqu xmmword ptr [rdi], xmm0 + vmovdqu xmmword ptr [rdi+0x10], xmm1 + RET +SET_SIZE(zfs_blake3_compress_in_place_avx512) + +ENTRY_ALIGN(zfs_blake3_compress_xof_avx512, 64) + ENDBR + vmovdqu xmm0, xmmword ptr [rdi] + vmovdqu xmm1, xmmword ptr [rdi+0x10] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + vmovq xmm3, rcx + vmovq xmm4, rdx + vpunpcklqdq xmm3, xmm3, xmm4 + vmovaps xmm2, xmmword ptr [BLAKE3_IV+rip] + vmovups xmm8, xmmword ptr [rsi] + vmovups xmm9, xmmword ptr [rsi+0x10] + vshufps xmm4, xmm8, xmm9, 136 + vshufps xmm5, xmm8, xmm9, 221 + vmovups xmm8, xmmword ptr [rsi+0x20] + vmovups xmm9, xmmword ptr [rsi+0x30] + vshufps xmm6, xmm8, xmm9, 136 + vshufps xmm7, xmm8, xmm9, 221 + vpshufd xmm6, xmm6, 0x93 + vpshufd xmm7, xmm7, 0x93 + mov al, 7 +9: + vpaddd xmm0, xmm0, xmm4 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm5 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x93 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x39 + vpaddd xmm0, xmm0, xmm6 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 16 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 12 + vpaddd xmm0, xmm0, xmm7 + vpaddd xmm0, xmm0, xmm1 + vpxord xmm3, xmm3, xmm0 + vprord xmm3, xmm3, 8 + vpaddd xmm2, xmm2, xmm3 + vpxord xmm1, xmm1, xmm2 + vprord xmm1, xmm1, 7 + vpshufd xmm0, xmm0, 0x39 + vpshufd xmm3, xmm3, 0x4E + vpshufd xmm2, xmm2, 0x93 + dec al + jz 9f + vshufps xmm8, xmm4, xmm5, 214 + vpshufd xmm9, xmm4, 0x0F + vpshufd xmm4, xmm8, 0x39 + vshufps xmm8, xmm6, xmm7, 250 + vpblendd xmm9, xmm9, xmm8, 0xAA + vpunpcklqdq xmm8, xmm7, xmm5 + vpblendd xmm8, xmm8, xmm6, 0x88 + vpshufd xmm8, xmm8, 0x78 + vpunpckhdq xmm5, xmm5, xmm7 + vpunpckldq xmm6, xmm6, xmm5 + vpshufd xmm7, xmm6, 0x1E + vmovdqa xmm5, xmm9 + vmovdqa xmm6, xmm8 + jmp 9b +9: + vpxor xmm0, xmm0, xmm2 + vpxor xmm1, xmm1, xmm3 + vpxor xmm2, xmm2, [rdi] + vpxor xmm3, xmm3, [rdi+0x10] + vmovdqu xmmword ptr [r9], xmm0 + vmovdqu xmmword ptr [r9+0x10], xmm1 + vmovdqu xmmword ptr [r9+0x20], xmm2 + vmovdqu xmmword ptr [r9+0x30], xmm3 + RET +SET_SIZE(zfs_blake3_compress_xof_avx512) + +SECTION_STATIC + +.p2align 6 +INDEX0: + .long 0, 1, 2, 3, 16, 17, 18, 19 + .long 8, 9, 10, 11, 24, 25, 26, 27 +INDEX1: + .long 4, 5, 6, 7, 20, 21, 22, 23 + .long 12, 13, 14, 15, 28, 29, 30, 31 +ADD0: + .long 0, 1, 2, 3, 4, 5, 6, 7 + .long 8, 9, 10, 11, 12, 13, 14, 15 +ADD1: .long 1 + +ADD16: .long 16 +BLAKE3_BLOCK_LEN: + .long 64 +.p2align 6 +BLAKE3_IV: +BLAKE3_IV_0: + .long 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A + +#endif /* HAVE_AVX512 */ + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S new file mode 100644 index 000000000000..78c4ffac53a8 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S @@ -0,0 +1,2299 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2020 Samuel Neves and Matthew Krupcale + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#if defined(HAVE_SSE2) + +#define _ASM +#include <sys/asm_linkage.h> + +.intel_syntax noprefix + +SECTION_TEXT + +ENTRY_ALIGN(zfs_blake3_hash_many_sse2, 64) + ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 360 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 0x00 + movdqa xmmword ptr [rsp+0x130], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0+rip] + pand xmm0, xmmword ptr [ADD1+rip] + movdqa xmmword ptr [rsp+0x150], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 0x00 + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+0x110], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 0x00 + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + mov rbx, qword ptr [rbp+0x50] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+0x38] + movzx r12d, byte ptr [rbp+0x48] + cmp rsi, 4 + jc 3f +2: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 0x00 + pshufd xmm1, xmm3, 0x55 + pshufd xmm2, xmm3, 0xAA + pshufd xmm3, xmm3, 0xFF + movdqu xmm7, xmmword ptr [rcx+0x10] + pshufd xmm4, xmm7, 0x00 + pshufd xmm5, xmm7, 0x55 + pshufd xmm6, xmm7, 0xAA + pshufd xmm7, xmm7, 0xFF + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +9: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-0x40] + movdqu xmm9, xmmword ptr [r9+rdx-0x40] + movdqu xmm10, xmmword ptr [r10+rdx-0x40] + movdqu xmm11, xmmword ptr [r11+rdx-0x40] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+0x10], xmm9 + movdqa xmmword ptr [rsp+0x20], xmm12 + movdqa xmmword ptr [rsp+0x30], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x30] + movdqu xmm9, xmmword ptr [r9+rdx-0x30] + movdqu xmm10, xmmword ptr [r10+rdx-0x30] + movdqu xmm11, xmmword ptr [r11+rdx-0x30] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x40], xmm8 + movdqa xmmword ptr [rsp+0x50], xmm9 + movdqa xmmword ptr [rsp+0x60], xmm12 + movdqa xmmword ptr [rsp+0x70], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x20] + movdqu xmm9, xmmword ptr [r9+rdx-0x20] + movdqu xmm10, xmmword ptr [r10+rdx-0x20] + movdqu xmm11, xmmword ptr [r11+rdx-0x20] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x80], xmm8 + movdqa xmmword ptr [rsp+0x90], xmm9 + movdqa xmmword ptr [rsp+0xA0], xmm12 + movdqa xmmword ptr [rsp+0xB0], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x10] + movdqu xmm9, xmmword ptr [r9+rdx-0x10] + movdqu xmm10, xmmword ptr [r10+rdx-0x10] + movdqu xmm11, xmmword ptr [r11+rdx-0x10] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0xC0], xmm8 + movdqa xmmword ptr [rsp+0xD0], xmm9 + movdqa xmmword ptr [rsp+0xE0], xmm12 + movdqa xmmword ptr [rsp+0xF0], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] + movdqa xmm12, xmmword ptr [rsp+0x110] + movdqa xmm13, xmmword ptr [rsp+0x120] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + movd xmm15, eax + pshufd xmm15, xmm15, 0x00 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x80] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x70] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xB0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x50] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xC0] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xA0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0x60] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xF0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + pshuflw xmm15, xmm15, 0xB1 + pshufhw xmm15, xmm15, 0xB1 + pshuflw xmm12, xmm12, 0xB1 + pshufhw xmm12, xmm12, 0xB1 + pshuflw xmm13, xmm13, 0xB1 + pshufhw xmm13, xmm13, 0xB1 + pshuflw xmm14, xmm14, 0xB1 + pshufhw xmm14, xmm14, 0xB1 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmm15 + psrld xmm15, 8 + pslld xmm8, 24 + pxor xmm15, xmm8 + movdqa xmm8, xmm12 + psrld xmm12, 8 + pslld xmm8, 24 + pxor xmm12, xmm8 + movdqa xmm8, xmm13 + psrld xmm13, 8 + pslld xmm8, 24 + pxor xmm13, xmm8 + movdqa xmm8, xmm14 + psrld xmm14, 8 + pslld xmm8, 24 + pxor xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne 9b + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+0x20], xmm1 + movdqu xmmword ptr [rbx+0x40], xmm9 + movdqu xmmword ptr [rbx+0x60], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+0x10], xmm4 + movdqu xmmword ptr [rbx+0x30], xmm5 + movdqu xmmword ptr [rbx+0x50], xmm9 + movdqu xmmword ptr [rbx+0x70], xmm7 + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+0x150] + movdqa xmmword ptr [rsp+0x110], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+0x120] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+0x120], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc 2b + test rsi, rsi + jnz 3f +4: + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + RET +.p2align 5 +3: + test esi, 0x2 + je 3f + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+0x110] + movd xmm14, dword ptr [rsp+0x120] + punpckldq xmm13, xmm14 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+0x114] + movd xmm13, dword ptr [rsp+0x124] + punpckldq xmm14, xmm13 + movaps xmmword ptr [rsp+0x10], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 0x93 + movups xmm12, xmmword ptr [r9+rdx-0x40] + movups xmm13, xmmword ptr [r9+rdx-0x30] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-0x20] + movups xmm15, xmmword ptr [r9+rdx-0x10] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 0x93 + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 0x93 + shl rax, 0x20 + or rax, 0x40 + movq xmm3, rax + movdqa xmmword ptr [rsp+0x20], xmm3 + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+0x10] + punpcklqdq xmm3, xmmword ptr [rsp+0x20] + punpcklqdq xmm11, xmmword ptr [rsp+0x20] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+0x20], xmm4 + movaps xmmword ptr [rsp+0x30], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + pshuflw xmm11, xmm11, 0xB1 + pshufhw xmm11, xmm11, 0xB1 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+0x40], xmm5 + movaps xmmword ptr [rsp+0x50], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x93 + pshufd xmm8, xmm8, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x39 + pshufd xmm10, xmm10, 0x39 + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + pshuflw xmm11, xmm11, 0xB1 + pshufhw xmm11, xmm11, 0xB1 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movdqa xmm13, xmm3 + psrld xmm3, 8 + pslld xmm13, 24 + pxor xmm3, xmm13 + movdqa xmm13, xmm11 + psrld xmm11, 8 + pslld xmm13, 24 + pxor xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x39 + pshufd xmm8, xmm8, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x93 + pshufd xmm10, xmm10, 0x93 + dec al + je 9f + movdqa xmm12, xmmword ptr [rsp+0x20] + movdqa xmm5, xmmword ptr [rsp+0x40] + pshufd xmm13, xmm12, 0x0F + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 0x39 + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pand xmm13, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm12, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm13, xmm12 + movdqa xmmword ptr [rsp+0x20], xmm13 + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + movdqa xmm13, xmm6 + pand xmm12, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm13, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm12, xmm13 + pshufd xmm12, xmm12, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmmword ptr [rsp+0x40], xmm12 + movdqa xmm5, xmmword ptr [rsp+0x30] + movdqa xmm13, xmmword ptr [rsp+0x50] + pshufd xmm6, xmm5, 0x0F + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 0x39 + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pand xmm6, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm5, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm6, xmm5 + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + movdqa xmmword ptr [rsp+0x30], xmm2 + movdqa xmm2, xmm14 + pand xmm5, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm2, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm5, xmm2 + movdqa xmm2, xmmword ptr [rsp+0x30] + pshufd xmm5, xmm5, 0x78 + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 0x1E + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+0x20] + movdqa xmm6, xmmword ptr [rsp+0x40] + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + movups xmmword ptr [rbx+0x20], xmm8 + movups xmmword ptr [rbx+0x30], xmm9 + mov eax, dword ptr [rsp+0x130] + neg eax + mov r10d, dword ptr [rsp+0x110+8*rax] + mov r11d, dword ptr [rsp+0x120+8*rax] + mov dword ptr [rsp+0x110], r10d + mov dword ptr [rsp+0x120], r11d + add rdi, 16 + add rbx, 64 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movd xmm13, dword ptr [rsp+0x110] + movd xmm14, dword ptr [rsp+0x120] + punpckldq xmm13, xmm14 + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + shl rax, 32 + or rax, 64 + movq xmm12, rax + movdqa xmm3, xmm13 + punpcklqdq xmm3, xmm12 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm10 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + jmp 4b +SET_SIZE(zfs_blake3_hash_many_sse2) + +ENTRY_ALIGN(zfs_blake3_compress_in_place_sse2, 64) + ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + shl r8, 32 + add rdx, r8 + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm10 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rdi], xmm0 + movups xmmword ptr [rdi+0x10], xmm1 + RET +SET_SIZE(zfs_blake3_compress_in_place_sse2) + +ENTRY_ALIGN(zfs_blake3_compress_xof_sse2, 64) + ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshuflw xmm3, xmm3, 0xB1 + pshufhw xmm3, xmm3, 0xB1 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + movdqa xmm14, xmm3 + psrld xmm3, 8 + pslld xmm14, 24 + pxor xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pand xmm9, xmmword ptr [PBLENDW_0x33_MASK+rip] + pand xmm8, xmmword ptr [PBLENDW_0xCC_MASK+rip] + por xmm9, xmm8 + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + movdqa xmm10, xmm6 + pand xmm8, xmmword ptr [PBLENDW_0x3F_MASK+rip] + pand xmm10, xmmword ptr [PBLENDW_0xC0_MASK+rip] + por xmm8, xmm10 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + movdqu xmm4, xmmword ptr [rdi] + movdqu xmm5, xmmword ptr [rdi+0x10] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r9], xmm0 + movups xmmword ptr [r9+0x10], xmm1 + movups xmmword ptr [r9+0x20], xmm2 + movups xmmword ptr [r9+0x30], xmm3 + RET +SET_SIZE(zfs_blake3_compress_xof_sse2) + +SECTION_STATIC +.p2align 6 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85 + .long 0x3C6EF372, 0xA54FF53A +ADD0: + .long 0, 1, 2, 3 +ADD1: + .long 4, 4, 4, 4 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 64, 64, 64, 64 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 +PBLENDW_0x33_MASK: + .long 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF, 0x00000000 +PBLENDW_0xCC_MASK: + .long 0x00000000, 0xFFFFFFFF, 0x00000000, 0xFFFFFFFF +PBLENDW_0x3F_MASK: + .long 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x00000000 +PBLENDW_0xC0_MASK: + .long 0x00000000, 0x00000000, 0x00000000, 0xFFFFFFFF + +#endif /* HAVE_SSE2 */ + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S new file mode 100644 index 000000000000..8ee7be75a0e1 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S @@ -0,0 +1,2037 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 + * Copyright (c) 2019-2020 Samuel Neves + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +#if defined(HAVE_SSE4_1) + +#define _ASM +#include <sys/asm_linkage.h> + +.intel_syntax noprefix + +.text + +ENTRY_ALIGN(zfs_blake3_hash_many_sse41, 64) + ENDBR + push r15 + push r14 + push r13 + push r12 + push rbx + push rbp + mov rbp, rsp + sub rsp, 360 + and rsp, 0xFFFFFFFFFFFFFFC0 + neg r9d + movd xmm0, r9d + pshufd xmm0, xmm0, 0x00 + movdqa xmmword ptr [rsp+0x130], xmm0 + movdqa xmm1, xmm0 + pand xmm1, xmmword ptr [ADD0+rip] + pand xmm0, xmmword ptr [ADD1+rip] + movdqa xmmword ptr [rsp+0x150], xmm0 + movd xmm0, r8d + pshufd xmm0, xmm0, 0x00 + paddd xmm0, xmm1 + movdqa xmmword ptr [rsp+0x110], xmm0 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm1, xmm0 + shr r8, 32 + movd xmm2, r8d + pshufd xmm2, xmm2, 0x00 + psubd xmm2, xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + mov rbx, qword ptr [rbp+0x50] + mov r15, rdx + shl r15, 6 + movzx r13d, byte ptr [rbp+0x38] + movzx r12d, byte ptr [rbp+0x48] + cmp rsi, 4 + jc 3f +2: + movdqu xmm3, xmmword ptr [rcx] + pshufd xmm0, xmm3, 0x00 + pshufd xmm1, xmm3, 0x55 + pshufd xmm2, xmm3, 0xAA + pshufd xmm3, xmm3, 0xFF + movdqu xmm7, xmmword ptr [rcx+0x10] + pshufd xmm4, xmm7, 0x00 + pshufd xmm5, xmm7, 0x55 + pshufd xmm6, xmm7, 0xAA + pshufd xmm7, xmm7, 0xFF + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + mov r10, qword ptr [rdi+0x10] + mov r11, qword ptr [rdi+0x18] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +9: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movdqu xmm8, xmmword ptr [r8+rdx-0x40] + movdqu xmm9, xmmword ptr [r9+rdx-0x40] + movdqu xmm10, xmmword ptr [r10+rdx-0x40] + movdqu xmm11, xmmword ptr [r11+rdx-0x40] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp], xmm8 + movdqa xmmword ptr [rsp+0x10], xmm9 + movdqa xmmword ptr [rsp+0x20], xmm12 + movdqa xmmword ptr [rsp+0x30], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x30] + movdqu xmm9, xmmword ptr [r9+rdx-0x30] + movdqu xmm10, xmmword ptr [r10+rdx-0x30] + movdqu xmm11, xmmword ptr [r11+rdx-0x30] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x40], xmm8 + movdqa xmmword ptr [rsp+0x50], xmm9 + movdqa xmmword ptr [rsp+0x60], xmm12 + movdqa xmmword ptr [rsp+0x70], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x20] + movdqu xmm9, xmmword ptr [r9+rdx-0x20] + movdqu xmm10, xmmword ptr [r10+rdx-0x20] + movdqu xmm11, xmmword ptr [r11+rdx-0x20] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0x80], xmm8 + movdqa xmmword ptr [rsp+0x90], xmm9 + movdqa xmmword ptr [rsp+0xA0], xmm12 + movdqa xmmword ptr [rsp+0xB0], xmm13 + movdqu xmm8, xmmword ptr [r8+rdx-0x10] + movdqu xmm9, xmmword ptr [r9+rdx-0x10] + movdqu xmm10, xmmword ptr [r10+rdx-0x10] + movdqu xmm11, xmmword ptr [r11+rdx-0x10] + movdqa xmm12, xmm8 + punpckldq xmm8, xmm9 + punpckhdq xmm12, xmm9 + movdqa xmm14, xmm10 + punpckldq xmm10, xmm11 + punpckhdq xmm14, xmm11 + movdqa xmm9, xmm8 + punpcklqdq xmm8, xmm10 + punpckhqdq xmm9, xmm10 + movdqa xmm13, xmm12 + punpcklqdq xmm12, xmm14 + punpckhqdq xmm13, xmm14 + movdqa xmmword ptr [rsp+0xC0], xmm8 + movdqa xmmword ptr [rsp+0xD0], xmm9 + movdqa xmmword ptr [rsp+0xE0], xmm12 + movdqa xmmword ptr [rsp+0xF0], xmm13 + movdqa xmm9, xmmword ptr [BLAKE3_IV_1+rip] + movdqa xmm10, xmmword ptr [BLAKE3_IV_2+rip] + movdqa xmm11, xmmword ptr [BLAKE3_IV_3+rip] + movdqa xmm12, xmmword ptr [rsp+0x110] + movdqa xmm13, xmmword ptr [rsp+0x120] + movdqa xmm14, xmmword ptr [BLAKE3_BLOCK_LEN+rip] + movd xmm15, eax + pshufd xmm15, xmm15, 0x00 + prefetcht0 [r8+rdx+0x80] + prefetcht0 [r9+rdx+0x80] + prefetcht0 [r10+rdx+0x80] + prefetcht0 [r11+rdx+0x80] + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [BLAKE3_IV_0+rip] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x80] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x70] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x10] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0xD0] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x60] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xB0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x50] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0xE0] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x40] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x50] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xC0] + paddd xmm1, xmmword ptr [rsp+0x90] + paddd xmm2, xmmword ptr [rsp+0xF0] + paddd xmm3, xmmword ptr [rsp+0xE0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0xA0] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0x70] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x20] + paddd xmm1, xmmword ptr [rsp+0x30] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x90] + paddd xmm1, xmmword ptr [rsp+0xB0] + paddd xmm2, xmmword ptr [rsp+0x80] + paddd xmm3, xmmword ptr [rsp+0xF0] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0xC0] + paddd xmm3, xmmword ptr [rsp+0x10] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xD0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x20] + paddd xmm3, xmmword ptr [rsp+0x40] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0x30] + paddd xmm1, xmmword ptr [rsp+0xA0] + paddd xmm2, xmmword ptr [rsp+0x60] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xB0] + paddd xmm1, xmmword ptr [rsp+0x50] + paddd xmm2, xmmword ptr [rsp+0x10] + paddd xmm3, xmmword ptr [rsp+0x80] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xF0] + paddd xmm1, xmmword ptr [rsp] + paddd xmm2, xmmword ptr [rsp+0x90] + paddd xmm3, xmmword ptr [rsp+0x60] + paddd xmm0, xmm4 + paddd xmm1, xmm5 + paddd xmm2, xmm6 + paddd xmm3, xmm7 + pxor xmm12, xmm0 + pxor xmm13, xmm1 + pxor xmm14, xmm2 + pxor xmm15, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + pshufb xmm15, xmm8 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm12 + paddd xmm9, xmm13 + paddd xmm10, xmm14 + paddd xmm11, xmm15 + pxor xmm4, xmm8 + pxor xmm5, xmm9 + pxor xmm6, xmm10 + pxor xmm7, xmm11 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + paddd xmm0, xmmword ptr [rsp+0xE0] + paddd xmm1, xmmword ptr [rsp+0x20] + paddd xmm2, xmmword ptr [rsp+0x30] + paddd xmm3, xmmword ptr [rsp+0x70] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT16+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + movdqa xmmword ptr [rsp+0x100], xmm8 + movdqa xmm8, xmm5 + psrld xmm8, 12 + pslld xmm5, 20 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 12 + pslld xmm6, 20 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 12 + pslld xmm7, 20 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 12 + pslld xmm4, 20 + por xmm4, xmm8 + paddd xmm0, xmmword ptr [rsp+0xA0] + paddd xmm1, xmmword ptr [rsp+0xC0] + paddd xmm2, xmmword ptr [rsp+0x40] + paddd xmm3, xmmword ptr [rsp+0xD0] + paddd xmm0, xmm5 + paddd xmm1, xmm6 + paddd xmm2, xmm7 + paddd xmm3, xmm4 + pxor xmm15, xmm0 + pxor xmm12, xmm1 + pxor xmm13, xmm2 + pxor xmm14, xmm3 + movdqa xmm8, xmmword ptr [ROT8+rip] + pshufb xmm15, xmm8 + pshufb xmm12, xmm8 + pshufb xmm13, xmm8 + pshufb xmm14, xmm8 + paddd xmm10, xmm15 + paddd xmm11, xmm12 + movdqa xmm8, xmmword ptr [rsp+0x100] + paddd xmm8, xmm13 + paddd xmm9, xmm14 + pxor xmm5, xmm10 + pxor xmm6, xmm11 + pxor xmm7, xmm8 + pxor xmm4, xmm9 + pxor xmm0, xmm8 + pxor xmm1, xmm9 + pxor xmm2, xmm10 + pxor xmm3, xmm11 + movdqa xmm8, xmm5 + psrld xmm8, 7 + pslld xmm5, 25 + por xmm5, xmm8 + movdqa xmm8, xmm6 + psrld xmm8, 7 + pslld xmm6, 25 + por xmm6, xmm8 + movdqa xmm8, xmm7 + psrld xmm8, 7 + pslld xmm7, 25 + por xmm7, xmm8 + movdqa xmm8, xmm4 + psrld xmm8, 7 + pslld xmm4, 25 + por xmm4, xmm8 + pxor xmm4, xmm12 + pxor xmm5, xmm13 + pxor xmm6, xmm14 + pxor xmm7, xmm15 + mov eax, r13d + jne 9b + movdqa xmm9, xmm0 + punpckldq xmm0, xmm1 + punpckhdq xmm9, xmm1 + movdqa xmm11, xmm2 + punpckldq xmm2, xmm3 + punpckhdq xmm11, xmm3 + movdqa xmm1, xmm0 + punpcklqdq xmm0, xmm2 + punpckhqdq xmm1, xmm2 + movdqa xmm3, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm3, xmm11 + movdqu xmmword ptr [rbx], xmm0 + movdqu xmmword ptr [rbx+0x20], xmm1 + movdqu xmmword ptr [rbx+0x40], xmm9 + movdqu xmmword ptr [rbx+0x60], xmm3 + movdqa xmm9, xmm4 + punpckldq xmm4, xmm5 + punpckhdq xmm9, xmm5 + movdqa xmm11, xmm6 + punpckldq xmm6, xmm7 + punpckhdq xmm11, xmm7 + movdqa xmm5, xmm4 + punpcklqdq xmm4, xmm6 + punpckhqdq xmm5, xmm6 + movdqa xmm7, xmm9 + punpcklqdq xmm9, xmm11 + punpckhqdq xmm7, xmm11 + movdqu xmmword ptr [rbx+0x10], xmm4 + movdqu xmmword ptr [rbx+0x30], xmm5 + movdqu xmmword ptr [rbx+0x50], xmm9 + movdqu xmmword ptr [rbx+0x70], xmm7 + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm0, xmm1 + paddd xmm1, xmmword ptr [rsp+0x150] + movdqa xmmword ptr [rsp+0x110], xmm1 + pxor xmm0, xmmword ptr [CMP_MSB_MASK+rip] + pxor xmm1, xmmword ptr [CMP_MSB_MASK+rip] + pcmpgtd xmm0, xmm1 + movdqa xmm1, xmmword ptr [rsp+0x120] + psubd xmm1, xmm0 + movdqa xmmword ptr [rsp+0x120], xmm1 + add rbx, 128 + add rdi, 32 + sub rsi, 4 + cmp rsi, 4 + jnc 2b + test rsi, rsi + jnz 3f +4: + mov rsp, rbp + pop rbp + pop rbx + pop r12 + pop r13 + pop r14 + pop r15 + RET +.p2align 5 +3: + test esi, 0x2 + je 3f + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movaps xmm8, xmm0 + movaps xmm9, xmm1 + movd xmm13, dword ptr [rsp+0x110] + pinsrd xmm13, dword ptr [rsp+0x120], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmmword ptr [rsp], xmm13 + movd xmm14, dword ptr [rsp+0x114] + pinsrd xmm14, dword ptr [rsp+0x124], 1 + pinsrd xmm14, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmmword ptr [rsp+0x10], xmm14 + mov r8, qword ptr [rdi] + mov r9, qword ptr [rdi+0x8] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm10, xmm2 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm3, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm3, xmm5, 221 + movaps xmm5, xmm3 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm3, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm3, xmm7, 221 + pshufd xmm7, xmm3, 0x93 + movups xmm12, xmmword ptr [r9+rdx-0x40] + movups xmm13, xmmword ptr [r9+rdx-0x30] + movaps xmm11, xmm12 + shufps xmm12, xmm13, 136 + shufps xmm11, xmm13, 221 + movaps xmm13, xmm11 + movups xmm14, xmmword ptr [r9+rdx-0x20] + movups xmm15, xmmword ptr [r9+rdx-0x10] + movaps xmm11, xmm14 + shufps xmm14, xmm15, 136 + pshufd xmm14, xmm14, 0x93 + shufps xmm11, xmm15, 221 + pshufd xmm15, xmm11, 0x93 + movaps xmm3, xmmword ptr [rsp] + movaps xmm11, xmmword ptr [rsp+0x10] + pinsrd xmm3, eax, 3 + pinsrd xmm11, eax, 3 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm8, xmm12 + movaps xmmword ptr [rsp+0x20], xmm4 + movaps xmmword ptr [rsp+0x30], xmm12 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm12, xmmword ptr [ROT16+rip] + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm5 + paddd xmm8, xmm13 + movaps xmmword ptr [rsp+0x40], xmm5 + movaps xmmword ptr [rsp+0x50], xmm13 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + movaps xmm13, xmmword ptr [ROT8+rip] + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x93 + pshufd xmm8, xmm8, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x39 + pshufd xmm10, xmm10, 0x39 + paddd xmm0, xmm6 + paddd xmm8, xmm14 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm12 + pshufb xmm11, xmm12 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 20 + psrld xmm4, 12 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 20 + psrld xmm4, 12 + por xmm9, xmm4 + paddd xmm0, xmm7 + paddd xmm8, xmm15 + paddd xmm0, xmm1 + paddd xmm8, xmm9 + pxor xmm3, xmm0 + pxor xmm11, xmm8 + pshufb xmm3, xmm13 + pshufb xmm11, xmm13 + paddd xmm2, xmm3 + paddd xmm10, xmm11 + pxor xmm1, xmm2 + pxor xmm9, xmm10 + movdqa xmm4, xmm1 + pslld xmm1, 25 + psrld xmm4, 7 + por xmm1, xmm4 + movdqa xmm4, xmm9 + pslld xmm9, 25 + psrld xmm4, 7 + por xmm9, xmm4 + pshufd xmm0, xmm0, 0x39 + pshufd xmm8, xmm8, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm11, xmm11, 0x4E + pshufd xmm2, xmm2, 0x93 + pshufd xmm10, xmm10, 0x93 + dec al + je 9f + movdqa xmm12, xmmword ptr [rsp+0x20] + movdqa xmm5, xmmword ptr [rsp+0x40] + pshufd xmm13, xmm12, 0x0F + shufps xmm12, xmm5, 214 + pshufd xmm4, xmm12, 0x39 + movdqa xmm12, xmm6 + shufps xmm12, xmm7, 250 + pblendw xmm13, xmm12, 0xCC + movdqa xmm12, xmm7 + punpcklqdq xmm12, xmm5 + pblendw xmm12, xmm6, 0xC0 + pshufd xmm12, xmm12, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmmword ptr [rsp+0x20], xmm13 + movdqa xmmword ptr [rsp+0x40], xmm12 + movdqa xmm5, xmmword ptr [rsp+0x30] + movdqa xmm13, xmmword ptr [rsp+0x50] + pshufd xmm6, xmm5, 0x0F + shufps xmm5, xmm13, 214 + pshufd xmm12, xmm5, 0x39 + movdqa xmm5, xmm14 + shufps xmm5, xmm15, 250 + pblendw xmm6, xmm5, 0xCC + movdqa xmm5, xmm15 + punpcklqdq xmm5, xmm13 + pblendw xmm5, xmm14, 0xC0 + pshufd xmm5, xmm5, 0x78 + punpckhdq xmm13, xmm15 + punpckldq xmm14, xmm13 + pshufd xmm15, xmm14, 0x1E + movdqa xmm13, xmm6 + movdqa xmm14, xmm5 + movdqa xmm5, xmmword ptr [rsp+0x20] + movdqa xmm6, xmmword ptr [rsp+0x40] + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm8, xmm10 + pxor xmm9, xmm11 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + movups xmmword ptr [rbx+0x20], xmm8 + movups xmmword ptr [rbx+0x30], xmm9 + movdqa xmm0, xmmword ptr [rsp+0x130] + movdqa xmm1, xmmword ptr [rsp+0x110] + movdqa xmm2, xmmword ptr [rsp+0x120] + movdqu xmm3, xmmword ptr [rsp+0x118] + movdqu xmm4, xmmword ptr [rsp+0x128] + blendvps xmm1, xmm3, xmm0 + blendvps xmm2, xmm4, xmm0 + movdqa xmmword ptr [rsp+0x110], xmm1 + movdqa xmmword ptr [rsp+0x120], xmm2 + add rdi, 16 + add rbx, 64 + sub rsi, 2 +3: + test esi, 0x1 + je 4b + movups xmm0, xmmword ptr [rcx] + movups xmm1, xmmword ptr [rcx+0x10] + movd xmm13, dword ptr [rsp+0x110] + pinsrd xmm13, dword ptr [rsp+0x120], 1 + pinsrd xmm13, dword ptr [BLAKE3_BLOCK_LEN+rip], 2 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov r8, qword ptr [rdi] + movzx eax, byte ptr [rbp+0x40] + or eax, r13d + xor edx, edx +2: + mov r14d, eax + or eax, r12d + add rdx, 64 + cmp rdx, r15 + cmovne eax, r14d + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movaps xmm3, xmm13 + pinsrd xmm3, eax, 3 + movups xmm4, xmmword ptr [r8+rdx-0x40] + movups xmm5, xmmword ptr [r8+rdx-0x30] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [r8+rdx-0x20] + movups xmm7, xmmword ptr [r8+rdx-0x10] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + mov eax, r13d + cmp rdx, r15 + jne 2b + movups xmmword ptr [rbx], xmm0 + movups xmmword ptr [rbx+0x10], xmm1 + jmp 4b +SET_SIZE(zfs_blake3_hash_many_sse41) + +ENTRY_ALIGN(zfs_blake3_compress_in_place_sse41, 64) + ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + shl r8, 32 + add rdx, r8 + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + pxor xmm0, xmm2 + pxor xmm1, xmm3 + movups xmmword ptr [rdi], xmm0 + movups xmmword ptr [rdi+0x10], xmm1 + RET +SET_SIZE(zfs_blake3_compress_in_place_sse41) + +ENTRY_ALIGN(zfs_blake3_compress_xof_sse41, 64) + ENDBR + movups xmm0, xmmword ptr [rdi] + movups xmm1, xmmword ptr [rdi+0x10] + movaps xmm2, xmmword ptr [BLAKE3_IV+rip] + movzx eax, r8b + movzx edx, dl + shl rax, 32 + add rdx, rax + movq xmm3, rcx + movq xmm4, rdx + punpcklqdq xmm3, xmm4 + movups xmm4, xmmword ptr [rsi] + movups xmm5, xmmword ptr [rsi+0x10] + movaps xmm8, xmm4 + shufps xmm4, xmm5, 136 + shufps xmm8, xmm5, 221 + movaps xmm5, xmm8 + movups xmm6, xmmword ptr [rsi+0x20] + movups xmm7, xmmword ptr [rsi+0x30] + movaps xmm8, xmm6 + shufps xmm6, xmm7, 136 + pshufd xmm6, xmm6, 0x93 + shufps xmm8, xmm7, 221 + pshufd xmm7, xmm8, 0x93 + movaps xmm14, xmmword ptr [ROT8+rip] + movaps xmm15, xmmword ptr [ROT16+rip] + mov al, 7 +9: + paddd xmm0, xmm4 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm5 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x93 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x39 + paddd xmm0, xmm6 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm15 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 20 + psrld xmm11, 12 + por xmm1, xmm11 + paddd xmm0, xmm7 + paddd xmm0, xmm1 + pxor xmm3, xmm0 + pshufb xmm3, xmm14 + paddd xmm2, xmm3 + pxor xmm1, xmm2 + movdqa xmm11, xmm1 + pslld xmm1, 25 + psrld xmm11, 7 + por xmm1, xmm11 + pshufd xmm0, xmm0, 0x39 + pshufd xmm3, xmm3, 0x4E + pshufd xmm2, xmm2, 0x93 + dec al + jz 9f + movdqa xmm8, xmm4 + shufps xmm8, xmm5, 214 + pshufd xmm9, xmm4, 0x0F + pshufd xmm4, xmm8, 0x39 + movdqa xmm8, xmm6 + shufps xmm8, xmm7, 250 + pblendw xmm9, xmm8, 0xCC + movdqa xmm8, xmm7 + punpcklqdq xmm8, xmm5 + pblendw xmm8, xmm6, 0xC0 + pshufd xmm8, xmm8, 0x78 + punpckhdq xmm5, xmm7 + punpckldq xmm6, xmm5 + pshufd xmm7, xmm6, 0x1E + movdqa xmm5, xmm9 + movdqa xmm6, xmm8 + jmp 9b +9: + movdqu xmm4, xmmword ptr [rdi] + movdqu xmm5, xmmword ptr [rdi+0x10] + pxor xmm0, xmm2 + pxor xmm1, xmm3 + pxor xmm2, xmm4 + pxor xmm3, xmm5 + movups xmmword ptr [r9], xmm0 + movups xmmword ptr [r9+0x10], xmm1 + movups xmmword ptr [r9+0x20], xmm2 + movups xmmword ptr [r9+0x30], xmm3 + RET +SET_SIZE(zfs_blake3_compress_xof_sse41) + +SECTION_STATIC + +.p2align 6 +BLAKE3_IV: + .long 0x6A09E667, 0xBB67AE85 + .long 0x3C6EF372, 0xA54FF53A +ROT16: + .byte 2, 3, 0, 1, 6, 7, 4, 5, 10, 11, 8, 9, 14, 15, 12, 13 +ROT8: + .byte 1, 2, 3, 0, 5, 6, 7, 4, 9, 10, 11, 8, 13, 14, 15, 12 +ADD0: + .long 0, 1, 2, 3 +ADD1: + .long 4, 4, 4, 4 +BLAKE3_IV_0: + .long 0x6A09E667, 0x6A09E667, 0x6A09E667, 0x6A09E667 +BLAKE3_IV_1: + .long 0xBB67AE85, 0xBB67AE85, 0xBB67AE85, 0xBB67AE85 +BLAKE3_IV_2: + .long 0x3C6EF372, 0x3C6EF372, 0x3C6EF372, 0x3C6EF372 +BLAKE3_IV_3: + .long 0xA54FF53A, 0xA54FF53A, 0xA54FF53A, 0xA54FF53A +BLAKE3_BLOCK_LEN: + .long 64, 64, 64, 64 +CMP_MSB_MASK: + .long 0x80000000, 0x80000000, 0x80000000, 0x80000000 + +#endif /* HAVE_SSE4_1 */ + +#ifdef __ELF__ +.section .note.GNU-stack,"",%progbits +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index dc71ae2c1c89..909b2147dff9 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -47,15 +47,26 @@ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define _ASM +#include <sys/asm_linkage.h> + +/* Windows userland links with OpenSSL */ +#if !defined (_WIN32) || defined (_KERNEL) + +/* Apple needs _ */ +#if defined (__APPLE__) +#define gcm_avx_can_use_movbe _gcm_avx_can_use_movbe +#endif + .extern gcm_avx_can_use_movbe .text #ifdef HAVE_MOVBE -.type _aesni_ctr32_ghash_6x,@function -.align 32 -_aesni_ctr32_ghash_6x: +.balign 32 +FUNCTION(_aesni_ctr32_ghash_6x) .cfi_startproc + ENDBR vmovdqu 32(%r11),%xmm2 subq $6,%rdx vpxor %xmm4,%xmm4,%xmm4 @@ -69,7 +80,7 @@ _aesni_ctr32_ghash_6x: vmovdqu %xmm4,16+8(%rsp) jmp .Loop6x -.align 32 +.balign 32 .Loop6x: addl $100663296,%ebx jc .Lhandle_ctr32 @@ -281,7 +292,7 @@ _aesni_ctr32_ghash_6x: vmovups 224-128(%rcx),%xmm1 jmp .Lenc_tail -.align 32 +.balign 32 .Lhandle_ctr32: vmovdqu (%r11),%xmm0 vpshufb %xmm0,%xmm1,%xmm6 @@ -303,7 +314,7 @@ _aesni_ctr32_ghash_6x: vpshufb %xmm0,%xmm1,%xmm1 jmp .Lresume_ctr32 -.align 32 +.balign 32 .Lenc_tail: vaesenc %xmm15,%xmm9,%xmm9 vmovdqu %xmm7,16+8(%rsp) @@ -363,15 +374,15 @@ _aesni_ctr32_ghash_6x: vpxor 16+8(%rsp),%xmm8,%xmm8 vpxor %xmm4,%xmm8,%xmm8 - .byte 0xf3,0xc3 + RET .cfi_endproc -.size _aesni_ctr32_ghash_6x,.-_aesni_ctr32_ghash_6x +SET_SIZE(_aesni_ctr32_ghash_6x) #endif /* ifdef HAVE_MOVBE */ -.type _aesni_ctr32_ghash_no_movbe_6x,@function -.align 32 -_aesni_ctr32_ghash_no_movbe_6x: +.balign 32 +FUNCTION(_aesni_ctr32_ghash_no_movbe_6x) .cfi_startproc + ENDBR vmovdqu 32(%r11),%xmm2 subq $6,%rdx vpxor %xmm4,%xmm4,%xmm4 @@ -385,7 +396,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vmovdqu %xmm4,16+8(%rsp) jmp .Loop6x_nmb -.align 32 +.balign 32 .Loop6x_nmb: addl $100663296,%ebx jc .Lhandle_ctr32_nmb @@ -609,7 +620,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vmovups 224-128(%rcx),%xmm1 jmp .Lenc_tail_nmb -.align 32 +.balign 32 .Lhandle_ctr32_nmb: vmovdqu (%r11),%xmm0 vpshufb %xmm0,%xmm1,%xmm6 @@ -631,7 +642,7 @@ _aesni_ctr32_ghash_no_movbe_6x: vpshufb %xmm0,%xmm1,%xmm1 jmp .Lresume_ctr32_nmb -.align 32 +.balign 32 .Lenc_tail_nmb: vaesenc %xmm15,%xmm9,%xmm9 vmovdqu %xmm7,16+8(%rsp) @@ -691,15 +702,13 @@ _aesni_ctr32_ghash_no_movbe_6x: vpxor 16+8(%rsp),%xmm8,%xmm8 vpxor %xmm4,%xmm8,%xmm8 - .byte 0xf3,0xc3 + RET .cfi_endproc -.size _aesni_ctr32_ghash_no_movbe_6x,.-_aesni_ctr32_ghash_no_movbe_6x +SET_SIZE(_aesni_ctr32_ghash_no_movbe_6x) -.globl aesni_gcm_decrypt -.type aesni_gcm_decrypt,@function -.align 32 -aesni_gcm_decrypt: +ENTRY_ALIGN(aesni_gcm_decrypt, 32) .cfi_startproc + ENDBR xorq %r10,%r10 cmpq $0x60,%rdx jb .Lgcm_dec_abort @@ -810,13 +819,14 @@ aesni_gcm_decrypt: .cfi_def_cfa_register %rsp .Lgcm_dec_abort: movq %r10,%rax - .byte 0xf3,0xc3 + RET .cfi_endproc -.size aesni_gcm_decrypt,.-aesni_gcm_decrypt -.type _aesni_ctr32_6x,@function -.align 32 -_aesni_ctr32_6x: +SET_SIZE(aesni_gcm_decrypt) + +.balign 32 +FUNCTION(_aesni_ctr32_6x) .cfi_startproc + ENDBR vmovdqu 0-128(%rcx),%xmm4 vmovdqu 32(%r11),%xmm2 leaq -2(%rbp),%r13 // ICP uses 10,12,14 not 9,11,13 for rounds. @@ -838,7 +848,7 @@ _aesni_ctr32_6x: vpxor %xmm4,%xmm14,%xmm14 jmp .Loop_ctr32 -.align 16 +.balign 16 .Loop_ctr32: vaesenc %xmm15,%xmm9,%xmm9 vaesenc %xmm15,%xmm10,%xmm10 @@ -880,8 +890,8 @@ _aesni_ctr32_6x: vmovups %xmm14,80(%rsi) leaq 96(%rsi),%rsi - .byte 0xf3,0xc3 -.align 32 + RET +.balign 32 .Lhandle_ctr32_2: vpshufb %xmm0,%xmm1,%xmm6 vmovdqu 48(%r11),%xmm5 @@ -904,13 +914,11 @@ _aesni_ctr32_6x: vpxor %xmm4,%xmm14,%xmm14 jmp .Loop_ctr32 .cfi_endproc -.size _aesni_ctr32_6x,.-_aesni_ctr32_6x +SET_SIZE(_aesni_ctr32_6x) -.globl aesni_gcm_encrypt -.type aesni_gcm_encrypt,@function -.align 32 -aesni_gcm_encrypt: +ENTRY_ALIGN(aesni_gcm_encrypt, 32) .cfi_startproc + ENDBR xorq %r10,%r10 cmpq $288,%rdx jb .Lgcm_enc_abort @@ -1186,9 +1194,11 @@ aesni_gcm_encrypt: .cfi_def_cfa_register %rsp .Lgcm_enc_abort: movq %r10,%rax - .byte 0xf3,0xc3 + RET .cfi_endproc -.size aesni_gcm_encrypt,.-aesni_gcm_encrypt +SET_SIZE(aesni_gcm_encrypt) + +#endif /* !_WIN32 || _KERNEL */ /* Some utility routines */ @@ -1196,13 +1206,10 @@ aesni_gcm_encrypt: * clear all fpu registers * void clear_fpu_regs_avx(void); */ -.globl clear_fpu_regs_avx -.type clear_fpu_regs_avx,@function -.align 32 -clear_fpu_regs_avx: +ENTRY_ALIGN(clear_fpu_regs_avx, 32) vzeroall - ret -.size clear_fpu_regs_avx,.-clear_fpu_regs_avx + RET +SET_SIZE(clear_fpu_regs_avx) /* * void gcm_xor_avx(const uint8_t *src, uint8_t *dst); @@ -1211,35 +1218,31 @@ clear_fpu_regs_avx: * stores the result at `dst'. The XOR is performed using FPU registers, * so make sure FPU state is saved when running this in the kernel. */ -.globl gcm_xor_avx -.type gcm_xor_avx,@function -.align 32 -gcm_xor_avx: +ENTRY_ALIGN(gcm_xor_avx, 32) movdqu (%rdi), %xmm0 movdqu (%rsi), %xmm1 pxor %xmm1, %xmm0 movdqu %xmm0, (%rsi) - ret -.size gcm_xor_avx,.-gcm_xor_avx + RET +SET_SIZE(gcm_xor_avx) /* * Toggle a boolean_t value atomically and return the new value. * boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); */ -.globl atomic_toggle_boolean_nv -.type atomic_toggle_boolean_nv,@function -.align 32 -atomic_toggle_boolean_nv: +ENTRY_ALIGN(atomic_toggle_boolean_nv, 32) xorl %eax, %eax lock xorl $1, (%rdi) jz 1f movl $1, %eax 1: - ret -.size atomic_toggle_boolean_nv,.-atomic_toggle_boolean_nv + RET +SET_SIZE(atomic_toggle_boolean_nv) + +SECTION_STATIC -.align 64 +.balign 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .Lpoly: @@ -1251,7 +1254,7 @@ atomic_toggle_boolean_nv: .Lone_lsb: .byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0 .byte 65,69,83,45,78,73,32,71,67,77,32,109,111,100,117,108,101,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 +.balign 64 /* Mark the stack non-executable. */ #if defined(__linux__) && defined(__ELF__) diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S index 59edc4c8d56c..dec782fda33e 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -85,9 +85,9 @@ #include <sys/types.h> -/* ARGSUSED */ void gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { + (void) x_in, (void) y, (void) res; } #elif defined(HAVE_PCLMULQDQ) /* guard by instruction set */ @@ -101,8 +101,8 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { // static uint8_t byte_swap16_mask[] = { // 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 }; -.data -.align XMM_ALIGN +SECTION_STATIC +.balign XMM_ALIGN .Lbyte_swap16_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 @@ -244,7 +244,7 @@ ENTRY_NP(gcm_mul_pclmulqdq) // // Return // - ret + RET SET_SIZE(gcm_mul_pclmulqdq) #endif /* lint || __lint */ diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S index 90cc36b43a78..f62e056d4b64 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/ghash-x86_64.S @@ -97,13 +97,18 @@ #if defined(__x86_64__) && defined(HAVE_AVX) && \ defined(HAVE_AES) && defined(HAVE_PCLMULQDQ) +#define _ASM +#include <sys/asm_linkage.h> + .text -.globl gcm_gmult_clmul -.type gcm_gmult_clmul,@function -.align 16 -gcm_gmult_clmul: +/* Windows userland links with OpenSSL */ +#if !defined (_WIN32) || defined (_KERNEL) +ENTRY_ALIGN(gcm_gmult_clmul, 16) + .cfi_startproc + ENDBR + .L_gmult_clmul: movdqu (%rdi),%xmm0 movdqa .Lbswap_mask(%rip),%xmm5 @@ -149,15 +154,14 @@ gcm_gmult_clmul: pxor %xmm1,%xmm0 .byte 102,15,56,0,197 movdqu %xmm0,(%rdi) - .byte 0xf3,0xc3 + RET .cfi_endproc -.size gcm_gmult_clmul,.-gcm_gmult_clmul +SET_SIZE(gcm_gmult_clmul) +#endif /* !_WIN32 || _KERNEL */ -.globl gcm_init_htab_avx -.type gcm_init_htab_avx,@function -.align 32 -gcm_init_htab_avx: +ENTRY_ALIGN(gcm_init_htab_avx, 32) .cfi_startproc + ENDBR vzeroupper vmovdqu (%rsi),%xmm2 @@ -184,7 +188,7 @@ gcm_init_htab_avx: vpxor %xmm2,%xmm6,%xmm6 movq $4,%r10 jmp .Linit_start_avx -.align 32 +.balign 32 .Linit_loop_avx: vpalignr $8,%xmm3,%xmm4,%xmm5 vmovdqu %xmm5,-16(%rdi) @@ -262,23 +266,21 @@ gcm_init_htab_avx: vmovdqu %xmm5,-16(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc -.size gcm_init_htab_avx,.-gcm_init_htab_avx +SET_SIZE(gcm_init_htab_avx) -.globl gcm_gmult_avx -.type gcm_gmult_avx,@function -.align 32 -gcm_gmult_avx: +#if !defined (_WIN32) || defined (_KERNEL) +ENTRY_ALIGN(gcm_gmult_avx, 32) .cfi_startproc + ENDBR jmp .L_gmult_clmul .cfi_endproc -.size gcm_gmult_avx,.-gcm_gmult_avx -.globl gcm_ghash_avx -.type gcm_ghash_avx,@function -.align 32 -gcm_ghash_avx: +SET_SIZE(gcm_gmult_avx) + +ENTRY_ALIGN(gcm_ghash_avx, 32) .cfi_startproc + ENDBR vzeroupper vmovdqu (%rdi),%xmm10 @@ -384,7 +386,7 @@ gcm_ghash_avx: subq $0x80,%rcx jmp .Loop8x_avx -.align 32 +.balign 32 .Loop8x_avx: vpunpckhqdq %xmm15,%xmm15,%xmm8 vmovdqu 112(%rdx),%xmm14 @@ -504,7 +506,7 @@ gcm_ghash_avx: addq $0x80,%rcx jmp .Ltail_no_xor_avx -.align 32 +.balign 32 .Lshort_avx: vmovdqu -16(%rdx,%rcx,1),%xmm14 leaq (%rdx,%rcx,1),%rdx @@ -608,7 +610,7 @@ gcm_ghash_avx: subq $0x10,%rcx jmp .Ltail_avx -.align 32 +.balign 32 .Ltail_avx: vpxor %xmm10,%xmm15,%xmm15 .Ltail_no_xor_avx: @@ -649,10 +651,14 @@ gcm_ghash_avx: vpshufb %xmm13,%xmm10,%xmm10 vmovdqu %xmm10,(%rdi) vzeroupper - .byte 0xf3,0xc3 + RET .cfi_endproc -.size gcm_ghash_avx,.-gcm_ghash_avx -.align 64 +SET_SIZE(gcm_ghash_avx) + +#endif /* !_WIN32 || _KERNEL */ + +SECTION_STATIC +.balign 64 .Lbswap_mask: .byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 .L0x1c2_polynomial: @@ -661,14 +667,14 @@ gcm_ghash_avx: .long 7,0,7,0 .L7_mask_poly: .long 7,0,450,0 -.align 64 -.type .Lrem_4bit,@object +.balign 64 +SET_OBJ(.Lrem_4bit) .Lrem_4bit: .long 0,0,0,471859200,0,943718400,0,610271232 .long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 .long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 .long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 -.type .Lrem_8bit,@object +SET_OBJ(.Lrem_8bit) .Lrem_8bit: .value 0x0000,0x01C2,0x0384,0x0246,0x0708,0x06CA,0x048C,0x054E .value 0x0E10,0x0FD2,0x0D94,0x0C56,0x0918,0x08DA,0x0A9C,0x0B5E @@ -704,7 +710,7 @@ gcm_ghash_avx: .value 0xBBF0,0xBA32,0xB874,0xB9B6,0xBCF8,0xBD3A,0xBF7C,0xBEBE .byte 71,72,65,83,72,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 -.align 64 +.balign 64 /* Mark the stack non-executable. */ #if defined(__linux__) && defined(__ELF__) diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S deleted file mode 100644 index fc844cd8c74f..000000000000 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha1/sha1-x86_64.S +++ /dev/null @@ -1,1369 +0,0 @@ -/* - * !/usr/bin/env perl - * - * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. The module is, however, dual licensed under OpenSSL and - * CRYPTOGAMS licenses depending on where you obtain it. For further - * details see http://www.openssl.org/~appro/cryptogams/. - * ==================================================================== - * - * sha1_block procedure for x86_64. - * - * It was brought to my attention that on EM64T compiler-generated code - * was far behind 32-bit assembler implementation. This is unlike on - * Opteron where compiler-generated code was only 15% behind 32-bit - * assembler, which originally made it hard to motivate the effort. - * There was suggestion to mechanically translate 32-bit code, but I - * dismissed it, reasoning that x86_64 offers enough register bank - * capacity to fully utilize SHA-1 parallelism. Therefore this fresh - * implementation:-) However! While 64-bit code does performs better - * on Opteron, I failed to beat 32-bit assembler on EM64T core. Well, - * x86_64 does offer larger *addressable* bank, but out-of-order core - * reaches for even more registers through dynamic aliasing, and EM64T - * core must have managed to run-time optimize even 32-bit code just as - * good as 64-bit one. Performance improvement is summarized in the - * following table: - * - * gcc 3.4 32-bit asm cycles/byte - * Opteron +45% +20% 6.8 - * Xeon P4 +65% +0% 9.9 - * Core2 +60% +10% 7.0 - * - * - * OpenSolaris OS modifications - * - * Sun elects to use this software under the BSD license. - * - * This source originates from OpenSSL file sha1-x86_64.pl at - * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz - * (presumably for future OpenSSL release 0.9.8h), with these changes: - * - * 1. Added perl "use strict" and declared variables. - * - * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from - * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. - * - * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) - * assemblers). - * - */ - -/* - * This file was generated by a perl script (sha1-x86_64.pl). The comments from - * the original file have been pasted above. - */ - -#if defined(lint) || defined(__lint) -#include <sys/stdint.h> -#include <sys/sha1.h> - - -/* ARGSUSED */ -void -sha1_block_data_order(SHA1_CTX *ctx, const void *inpp, size_t blocks) -{ -} - -#else -#define _ASM -#include <sys/asm_linkage.h> -ENTRY_NP(sha1_block_data_order) -.cfi_startproc - mov %rsp,%rax -.cfi_def_cfa_register %rax - push %rbx -.cfi_offset %rbx,-16 - push %rbp -.cfi_offset %rbp,-24 - push %r12 -.cfi_offset %r12,-32 - mov %rdi,%r8 # reassigned argument -.cfi_register %rdi, %r8 - sub $72,%rsp - mov %rsi,%r9 # reassigned argument -.cfi_register %rsi, %r9 - and $-64,%rsp - mov %rdx,%r10 # reassigned argument -.cfi_register %rdx, %r10 - mov %rax,64(%rsp) -# echo ".cfi_cfa_expression %rsp+64,deref,+8" | -# openssl/crypto/perlasm/x86_64-xlate.pl -.cfi_escape 0x0f,0x06,0x77,0xc0,0x00,0x06,0x23,0x08 - - mov 0(%r8),%edx - mov 4(%r8),%esi - mov 8(%r8),%edi - mov 12(%r8),%ebp - mov 16(%r8),%r11d -.align 4 -.Lloop: - mov 0(%r9),%eax - bswap %eax - mov %eax,0(%rsp) - lea 0x5a827999(%eax,%r11d),%r12d - mov %edi,%ebx - mov 4(%r9),%eax - mov %edx,%r11d - xor %ebp,%ebx - bswap %eax - rol $5,%r11d - and %esi,%ebx - mov %eax,4(%rsp) - add %r11d,%r12d - xor %ebp,%ebx - rol $30,%esi - add %ebx,%r12d - lea 0x5a827999(%eax,%ebp),%r11d - mov %esi,%ebx - mov 8(%r9),%eax - mov %r12d,%ebp - xor %edi,%ebx - bswap %eax - rol $5,%ebp - and %edx,%ebx - mov %eax,8(%rsp) - add %ebp,%r11d - xor %edi,%ebx - rol $30,%edx - add %ebx,%r11d - lea 0x5a827999(%eax,%edi),%ebp - mov %edx,%ebx - mov 12(%r9),%eax - mov %r11d,%edi - xor %esi,%ebx - bswap %eax - rol $5,%edi - and %r12d,%ebx - mov %eax,12(%rsp) - add %edi,%ebp - xor %esi,%ebx - rol $30,%r12d - add %ebx,%ebp - lea 0x5a827999(%eax,%esi),%edi - mov %r12d,%ebx - mov 16(%r9),%eax - mov %ebp,%esi - xor %edx,%ebx - bswap %eax - rol $5,%esi - and %r11d,%ebx - mov %eax,16(%rsp) - add %esi,%edi - xor %edx,%ebx - rol $30,%r11d - add %ebx,%edi - lea 0x5a827999(%eax,%edx),%esi - mov %r11d,%ebx - mov 20(%r9),%eax - mov %edi,%edx - xor %r12d,%ebx - bswap %eax - rol $5,%edx - and %ebp,%ebx - mov %eax,20(%rsp) - add %edx,%esi - xor %r12d,%ebx - rol $30,%ebp - add %ebx,%esi - lea 0x5a827999(%eax,%r12d),%edx - mov %ebp,%ebx - mov 24(%r9),%eax - mov %esi,%r12d - xor %r11d,%ebx - bswap %eax - rol $5,%r12d - and %edi,%ebx - mov %eax,24(%rsp) - add %r12d,%edx - xor %r11d,%ebx - rol $30,%edi - add %ebx,%edx - lea 0x5a827999(%eax,%r11d),%r12d - mov %edi,%ebx - mov 28(%r9),%eax - mov %edx,%r11d - xor %ebp,%ebx - bswap %eax - rol $5,%r11d - and %esi,%ebx - mov %eax,28(%rsp) - add %r11d,%r12d - xor %ebp,%ebx - rol $30,%esi - add %ebx,%r12d - lea 0x5a827999(%eax,%ebp),%r11d - mov %esi,%ebx - mov 32(%r9),%eax - mov %r12d,%ebp - xor %edi,%ebx - bswap %eax - rol $5,%ebp - and %edx,%ebx - mov %eax,32(%rsp) - add %ebp,%r11d - xor %edi,%ebx - rol $30,%edx - add %ebx,%r11d - lea 0x5a827999(%eax,%edi),%ebp - mov %edx,%ebx - mov 36(%r9),%eax - mov %r11d,%edi - xor %esi,%ebx - bswap %eax - rol $5,%edi - and %r12d,%ebx - mov %eax,36(%rsp) - add %edi,%ebp - xor %esi,%ebx - rol $30,%r12d - add %ebx,%ebp - lea 0x5a827999(%eax,%esi),%edi - mov %r12d,%ebx - mov 40(%r9),%eax - mov %ebp,%esi - xor %edx,%ebx - bswap %eax - rol $5,%esi - and %r11d,%ebx - mov %eax,40(%rsp) - add %esi,%edi - xor %edx,%ebx - rol $30,%r11d - add %ebx,%edi - lea 0x5a827999(%eax,%edx),%esi - mov %r11d,%ebx - mov 44(%r9),%eax - mov %edi,%edx - xor %r12d,%ebx - bswap %eax - rol $5,%edx - and %ebp,%ebx - mov %eax,44(%rsp) - add %edx,%esi - xor %r12d,%ebx - rol $30,%ebp - add %ebx,%esi - lea 0x5a827999(%eax,%r12d),%edx - mov %ebp,%ebx - mov 48(%r9),%eax - mov %esi,%r12d - xor %r11d,%ebx - bswap %eax - rol $5,%r12d - and %edi,%ebx - mov %eax,48(%rsp) - add %r12d,%edx - xor %r11d,%ebx - rol $30,%edi - add %ebx,%edx - lea 0x5a827999(%eax,%r11d),%r12d - mov %edi,%ebx - mov 52(%r9),%eax - mov %edx,%r11d - xor %ebp,%ebx - bswap %eax - rol $5,%r11d - and %esi,%ebx - mov %eax,52(%rsp) - add %r11d,%r12d - xor %ebp,%ebx - rol $30,%esi - add %ebx,%r12d - lea 0x5a827999(%eax,%ebp),%r11d - mov %esi,%ebx - mov 56(%r9),%eax - mov %r12d,%ebp - xor %edi,%ebx - bswap %eax - rol $5,%ebp - and %edx,%ebx - mov %eax,56(%rsp) - add %ebp,%r11d - xor %edi,%ebx - rol $30,%edx - add %ebx,%r11d - lea 0x5a827999(%eax,%edi),%ebp - mov %edx,%ebx - mov 60(%r9),%eax - mov %r11d,%edi - xor %esi,%ebx - bswap %eax - rol $5,%edi - and %r12d,%ebx - mov %eax,60(%rsp) - add %edi,%ebp - xor %esi,%ebx - rol $30,%r12d - add %ebx,%ebp - lea 0x5a827999(%eax,%esi),%edi - mov 0(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 8(%rsp),%eax - xor %edx,%ebx - rol $5,%esi - xor 32(%rsp),%eax - and %r11d,%ebx - add %esi,%edi - xor 52(%rsp),%eax - xor %edx,%ebx - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,0(%rsp) - lea 0x5a827999(%eax,%edx),%esi - mov 4(%rsp),%eax - mov %r11d,%ebx - mov %edi,%edx - xor 12(%rsp),%eax - xor %r12d,%ebx - rol $5,%edx - xor 36(%rsp),%eax - and %ebp,%ebx - add %edx,%esi - xor 56(%rsp),%eax - xor %r12d,%ebx - rol $30,%ebp - add %ebx,%esi - rol $1,%eax - mov %eax,4(%rsp) - lea 0x5a827999(%eax,%r12d),%edx - mov 8(%rsp),%eax - mov %ebp,%ebx - mov %esi,%r12d - xor 16(%rsp),%eax - xor %r11d,%ebx - rol $5,%r12d - xor 40(%rsp),%eax - and %edi,%ebx - add %r12d,%edx - xor 60(%rsp),%eax - xor %r11d,%ebx - rol $30,%edi - add %ebx,%edx - rol $1,%eax - mov %eax,8(%rsp) - lea 0x5a827999(%eax,%r11d),%r12d - mov 12(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 20(%rsp),%eax - xor %ebp,%ebx - rol $5,%r11d - xor 44(%rsp),%eax - and %esi,%ebx - add %r11d,%r12d - xor 0(%rsp),%eax - xor %ebp,%ebx - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - mov %eax,12(%rsp) - lea 0x5a827999(%eax,%ebp),%r11d - mov 16(%rsp),%eax - mov %esi,%ebx - mov %r12d,%ebp - xor 24(%rsp),%eax - xor %edi,%ebx - rol $5,%ebp - xor 48(%rsp),%eax - and %edx,%ebx - add %ebp,%r11d - xor 4(%rsp),%eax - xor %edi,%ebx - rol $30,%edx - add %ebx,%r11d - rol $1,%eax - mov %eax,16(%rsp) - lea 0x6ed9eba1(%eax,%edi),%ebp - mov 20(%rsp),%eax - mov %edx,%ebx - mov %r11d,%edi - xor 28(%rsp),%eax - xor %r12d,%ebx - rol $5,%edi - xor 52(%rsp),%eax - xor %esi,%ebx - add %edi,%ebp - xor 8(%rsp),%eax - rol $30,%r12d - add %ebx,%ebp - rol $1,%eax - mov %eax,20(%rsp) - lea 0x6ed9eba1(%eax,%esi),%edi - mov 24(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 32(%rsp),%eax - xor %r11d,%ebx - rol $5,%esi - xor 56(%rsp),%eax - xor %edx,%ebx - add %esi,%edi - xor 12(%rsp),%eax - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,24(%rsp) - lea 0x6ed9eba1(%eax,%edx),%esi - mov 28(%rsp),%eax - mov %r11d,%ebx - mov %edi,%edx - xor 36(%rsp),%eax - xor %ebp,%ebx - rol $5,%edx - xor 60(%rsp),%eax - xor %r12d,%ebx - add %edx,%esi - xor 16(%rsp),%eax - rol $30,%ebp - add %ebx,%esi - rol $1,%eax - mov %eax,28(%rsp) - lea 0x6ed9eba1(%eax,%r12d),%edx - mov 32(%rsp),%eax - mov %ebp,%ebx - mov %esi,%r12d - xor 40(%rsp),%eax - xor %edi,%ebx - rol $5,%r12d - xor 0(%rsp),%eax - xor %r11d,%ebx - add %r12d,%edx - xor 20(%rsp),%eax - rol $30,%edi - add %ebx,%edx - rol $1,%eax - mov %eax,32(%rsp) - lea 0x6ed9eba1(%eax,%r11d),%r12d - mov 36(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 44(%rsp),%eax - xor %esi,%ebx - rol $5,%r11d - xor 4(%rsp),%eax - xor %ebp,%ebx - add %r11d,%r12d - xor 24(%rsp),%eax - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - mov %eax,36(%rsp) - lea 0x6ed9eba1(%eax,%ebp),%r11d - mov 40(%rsp),%eax - mov %esi,%ebx - mov %r12d,%ebp - xor 48(%rsp),%eax - xor %edx,%ebx - rol $5,%ebp - xor 8(%rsp),%eax - xor %edi,%ebx - add %ebp,%r11d - xor 28(%rsp),%eax - rol $30,%edx - add %ebx,%r11d - rol $1,%eax - mov %eax,40(%rsp) - lea 0x6ed9eba1(%eax,%edi),%ebp - mov 44(%rsp),%eax - mov %edx,%ebx - mov %r11d,%edi - xor 52(%rsp),%eax - xor %r12d,%ebx - rol $5,%edi - xor 12(%rsp),%eax - xor %esi,%ebx - add %edi,%ebp - xor 32(%rsp),%eax - rol $30,%r12d - add %ebx,%ebp - rol $1,%eax - mov %eax,44(%rsp) - lea 0x6ed9eba1(%eax,%esi),%edi - mov 48(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 56(%rsp),%eax - xor %r11d,%ebx - rol $5,%esi - xor 16(%rsp),%eax - xor %edx,%ebx - add %esi,%edi - xor 36(%rsp),%eax - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,48(%rsp) - lea 0x6ed9eba1(%eax,%edx),%esi - mov 52(%rsp),%eax - mov %r11d,%ebx - mov %edi,%edx - xor 60(%rsp),%eax - xor %ebp,%ebx - rol $5,%edx - xor 20(%rsp),%eax - xor %r12d,%ebx - add %edx,%esi - xor 40(%rsp),%eax - rol $30,%ebp - add %ebx,%esi - rol $1,%eax - mov %eax,52(%rsp) - lea 0x6ed9eba1(%eax,%r12d),%edx - mov 56(%rsp),%eax - mov %ebp,%ebx - mov %esi,%r12d - xor 0(%rsp),%eax - xor %edi,%ebx - rol $5,%r12d - xor 24(%rsp),%eax - xor %r11d,%ebx - add %r12d,%edx - xor 44(%rsp),%eax - rol $30,%edi - add %ebx,%edx - rol $1,%eax - mov %eax,56(%rsp) - lea 0x6ed9eba1(%eax,%r11d),%r12d - mov 60(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 4(%rsp),%eax - xor %esi,%ebx - rol $5,%r11d - xor 28(%rsp),%eax - xor %ebp,%ebx - add %r11d,%r12d - xor 48(%rsp),%eax - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - mov %eax,60(%rsp) - lea 0x6ed9eba1(%eax,%ebp),%r11d - mov 0(%rsp),%eax - mov %esi,%ebx - mov %r12d,%ebp - xor 8(%rsp),%eax - xor %edx,%ebx - rol $5,%ebp - xor 32(%rsp),%eax - xor %edi,%ebx - add %ebp,%r11d - xor 52(%rsp),%eax - rol $30,%edx - add %ebx,%r11d - rol $1,%eax - mov %eax,0(%rsp) - lea 0x6ed9eba1(%eax,%edi),%ebp - mov 4(%rsp),%eax - mov %edx,%ebx - mov %r11d,%edi - xor 12(%rsp),%eax - xor %r12d,%ebx - rol $5,%edi - xor 36(%rsp),%eax - xor %esi,%ebx - add %edi,%ebp - xor 56(%rsp),%eax - rol $30,%r12d - add %ebx,%ebp - rol $1,%eax - mov %eax,4(%rsp) - lea 0x6ed9eba1(%eax,%esi),%edi - mov 8(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 16(%rsp),%eax - xor %r11d,%ebx - rol $5,%esi - xor 40(%rsp),%eax - xor %edx,%ebx - add %esi,%edi - xor 60(%rsp),%eax - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,8(%rsp) - lea 0x6ed9eba1(%eax,%edx),%esi - mov 12(%rsp),%eax - mov %r11d,%ebx - mov %edi,%edx - xor 20(%rsp),%eax - xor %ebp,%ebx - rol $5,%edx - xor 44(%rsp),%eax - xor %r12d,%ebx - add %edx,%esi - xor 0(%rsp),%eax - rol $30,%ebp - add %ebx,%esi - rol $1,%eax - mov %eax,12(%rsp) - lea 0x6ed9eba1(%eax,%r12d),%edx - mov 16(%rsp),%eax - mov %ebp,%ebx - mov %esi,%r12d - xor 24(%rsp),%eax - xor %edi,%ebx - rol $5,%r12d - xor 48(%rsp),%eax - xor %r11d,%ebx - add %r12d,%edx - xor 4(%rsp),%eax - rol $30,%edi - add %ebx,%edx - rol $1,%eax - mov %eax,16(%rsp) - lea 0x6ed9eba1(%eax,%r11d),%r12d - mov 20(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 28(%rsp),%eax - xor %esi,%ebx - rol $5,%r11d - xor 52(%rsp),%eax - xor %ebp,%ebx - add %r11d,%r12d - xor 8(%rsp),%eax - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - mov %eax,20(%rsp) - lea 0x6ed9eba1(%eax,%ebp),%r11d - mov 24(%rsp),%eax - mov %esi,%ebx - mov %r12d,%ebp - xor 32(%rsp),%eax - xor %edx,%ebx - rol $5,%ebp - xor 56(%rsp),%eax - xor %edi,%ebx - add %ebp,%r11d - xor 12(%rsp),%eax - rol $30,%edx - add %ebx,%r11d - rol $1,%eax - mov %eax,24(%rsp) - lea 0x6ed9eba1(%eax,%edi),%ebp - mov 28(%rsp),%eax - mov %edx,%ebx - mov %r11d,%edi - xor 36(%rsp),%eax - xor %r12d,%ebx - rol $5,%edi - xor 60(%rsp),%eax - xor %esi,%ebx - add %edi,%ebp - xor 16(%rsp),%eax - rol $30,%r12d - add %ebx,%ebp - rol $1,%eax - mov %eax,28(%rsp) - lea 0x6ed9eba1(%eax,%esi),%edi - mov 32(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 40(%rsp),%eax - xor %r11d,%ebx - rol $5,%esi - xor 0(%rsp),%eax - xor %edx,%ebx - add %esi,%edi - xor 20(%rsp),%eax - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,32(%rsp) - lea -0x70e44324(%eax,%edx),%esi - mov 36(%rsp),%eax - mov %ebp,%ebx - mov %ebp,%ecx - xor 44(%rsp),%eax - mov %edi,%edx - and %r11d,%ebx - xor 4(%rsp),%eax - or %r11d,%ecx - rol $5,%edx - xor 24(%rsp),%eax - and %r12d,%ecx - add %edx,%esi - rol $1,%eax - or %ecx,%ebx - rol $30,%ebp - mov %eax,36(%rsp) - add %ebx,%esi - lea -0x70e44324(%eax,%r12d),%edx - mov 40(%rsp),%eax - mov %edi,%ebx - mov %edi,%ecx - xor 48(%rsp),%eax - mov %esi,%r12d - and %ebp,%ebx - xor 8(%rsp),%eax - or %ebp,%ecx - rol $5,%r12d - xor 28(%rsp),%eax - and %r11d,%ecx - add %r12d,%edx - rol $1,%eax - or %ecx,%ebx - rol $30,%edi - mov %eax,40(%rsp) - add %ebx,%edx - lea -0x70e44324(%eax,%r11d),%r12d - mov 44(%rsp),%eax - mov %esi,%ebx - mov %esi,%ecx - xor 52(%rsp),%eax - mov %edx,%r11d - and %edi,%ebx - xor 12(%rsp),%eax - or %edi,%ecx - rol $5,%r11d - xor 32(%rsp),%eax - and %ebp,%ecx - add %r11d,%r12d - rol $1,%eax - or %ecx,%ebx - rol $30,%esi - mov %eax,44(%rsp) - add %ebx,%r12d - lea -0x70e44324(%eax,%ebp),%r11d - mov 48(%rsp),%eax - mov %edx,%ebx - mov %edx,%ecx - xor 56(%rsp),%eax - mov %r12d,%ebp - and %esi,%ebx - xor 16(%rsp),%eax - or %esi,%ecx - rol $5,%ebp - xor 36(%rsp),%eax - and %edi,%ecx - add %ebp,%r11d - rol $1,%eax - or %ecx,%ebx - rol $30,%edx - mov %eax,48(%rsp) - add %ebx,%r11d - lea -0x70e44324(%eax,%edi),%ebp - mov 52(%rsp),%eax - mov %r12d,%ebx - mov %r12d,%ecx - xor 60(%rsp),%eax - mov %r11d,%edi - and %edx,%ebx - xor 20(%rsp),%eax - or %edx,%ecx - rol $5,%edi - xor 40(%rsp),%eax - and %esi,%ecx - add %edi,%ebp - rol $1,%eax - or %ecx,%ebx - rol $30,%r12d - mov %eax,52(%rsp) - add %ebx,%ebp - lea -0x70e44324(%eax,%esi),%edi - mov 56(%rsp),%eax - mov %r11d,%ebx - mov %r11d,%ecx - xor 0(%rsp),%eax - mov %ebp,%esi - and %r12d,%ebx - xor 24(%rsp),%eax - or %r12d,%ecx - rol $5,%esi - xor 44(%rsp),%eax - and %edx,%ecx - add %esi,%edi - rol $1,%eax - or %ecx,%ebx - rol $30,%r11d - mov %eax,56(%rsp) - add %ebx,%edi - lea -0x70e44324(%eax,%edx),%esi - mov 60(%rsp),%eax - mov %ebp,%ebx - mov %ebp,%ecx - xor 4(%rsp),%eax - mov %edi,%edx - and %r11d,%ebx - xor 28(%rsp),%eax - or %r11d,%ecx - rol $5,%edx - xor 48(%rsp),%eax - and %r12d,%ecx - add %edx,%esi - rol $1,%eax - or %ecx,%ebx - rol $30,%ebp - mov %eax,60(%rsp) - add %ebx,%esi - lea -0x70e44324(%eax,%r12d),%edx - mov 0(%rsp),%eax - mov %edi,%ebx - mov %edi,%ecx - xor 8(%rsp),%eax - mov %esi,%r12d - and %ebp,%ebx - xor 32(%rsp),%eax - or %ebp,%ecx - rol $5,%r12d - xor 52(%rsp),%eax - and %r11d,%ecx - add %r12d,%edx - rol $1,%eax - or %ecx,%ebx - rol $30,%edi - mov %eax,0(%rsp) - add %ebx,%edx - lea -0x70e44324(%eax,%r11d),%r12d - mov 4(%rsp),%eax - mov %esi,%ebx - mov %esi,%ecx - xor 12(%rsp),%eax - mov %edx,%r11d - and %edi,%ebx - xor 36(%rsp),%eax - or %edi,%ecx - rol $5,%r11d - xor 56(%rsp),%eax - and %ebp,%ecx - add %r11d,%r12d - rol $1,%eax - or %ecx,%ebx - rol $30,%esi - mov %eax,4(%rsp) - add %ebx,%r12d - lea -0x70e44324(%eax,%ebp),%r11d - mov 8(%rsp),%eax - mov %edx,%ebx - mov %edx,%ecx - xor 16(%rsp),%eax - mov %r12d,%ebp - and %esi,%ebx - xor 40(%rsp),%eax - or %esi,%ecx - rol $5,%ebp - xor 60(%rsp),%eax - and %edi,%ecx - add %ebp,%r11d - rol $1,%eax - or %ecx,%ebx - rol $30,%edx - mov %eax,8(%rsp) - add %ebx,%r11d - lea -0x70e44324(%eax,%edi),%ebp - mov 12(%rsp),%eax - mov %r12d,%ebx - mov %r12d,%ecx - xor 20(%rsp),%eax - mov %r11d,%edi - and %edx,%ebx - xor 44(%rsp),%eax - or %edx,%ecx - rol $5,%edi - xor 0(%rsp),%eax - and %esi,%ecx - add %edi,%ebp - rol $1,%eax - or %ecx,%ebx - rol $30,%r12d - mov %eax,12(%rsp) - add %ebx,%ebp - lea -0x70e44324(%eax,%esi),%edi - mov 16(%rsp),%eax - mov %r11d,%ebx - mov %r11d,%ecx - xor 24(%rsp),%eax - mov %ebp,%esi - and %r12d,%ebx - xor 48(%rsp),%eax - or %r12d,%ecx - rol $5,%esi - xor 4(%rsp),%eax - and %edx,%ecx - add %esi,%edi - rol $1,%eax - or %ecx,%ebx - rol $30,%r11d - mov %eax,16(%rsp) - add %ebx,%edi - lea -0x70e44324(%eax,%edx),%esi - mov 20(%rsp),%eax - mov %ebp,%ebx - mov %ebp,%ecx - xor 28(%rsp),%eax - mov %edi,%edx - and %r11d,%ebx - xor 52(%rsp),%eax - or %r11d,%ecx - rol $5,%edx - xor 8(%rsp),%eax - and %r12d,%ecx - add %edx,%esi - rol $1,%eax - or %ecx,%ebx - rol $30,%ebp - mov %eax,20(%rsp) - add %ebx,%esi - lea -0x70e44324(%eax,%r12d),%edx - mov 24(%rsp),%eax - mov %edi,%ebx - mov %edi,%ecx - xor 32(%rsp),%eax - mov %esi,%r12d - and %ebp,%ebx - xor 56(%rsp),%eax - or %ebp,%ecx - rol $5,%r12d - xor 12(%rsp),%eax - and %r11d,%ecx - add %r12d,%edx - rol $1,%eax - or %ecx,%ebx - rol $30,%edi - mov %eax,24(%rsp) - add %ebx,%edx - lea -0x70e44324(%eax,%r11d),%r12d - mov 28(%rsp),%eax - mov %esi,%ebx - mov %esi,%ecx - xor 36(%rsp),%eax - mov %edx,%r11d - and %edi,%ebx - xor 60(%rsp),%eax - or %edi,%ecx - rol $5,%r11d - xor 16(%rsp),%eax - and %ebp,%ecx - add %r11d,%r12d - rol $1,%eax - or %ecx,%ebx - rol $30,%esi - mov %eax,28(%rsp) - add %ebx,%r12d - lea -0x70e44324(%eax,%ebp),%r11d - mov 32(%rsp),%eax - mov %edx,%ebx - mov %edx,%ecx - xor 40(%rsp),%eax - mov %r12d,%ebp - and %esi,%ebx - xor 0(%rsp),%eax - or %esi,%ecx - rol $5,%ebp - xor 20(%rsp),%eax - and %edi,%ecx - add %ebp,%r11d - rol $1,%eax - or %ecx,%ebx - rol $30,%edx - mov %eax,32(%rsp) - add %ebx,%r11d - lea -0x70e44324(%eax,%edi),%ebp - mov 36(%rsp),%eax - mov %r12d,%ebx - mov %r12d,%ecx - xor 44(%rsp),%eax - mov %r11d,%edi - and %edx,%ebx - xor 4(%rsp),%eax - or %edx,%ecx - rol $5,%edi - xor 24(%rsp),%eax - and %esi,%ecx - add %edi,%ebp - rol $1,%eax - or %ecx,%ebx - rol $30,%r12d - mov %eax,36(%rsp) - add %ebx,%ebp - lea -0x70e44324(%eax,%esi),%edi - mov 40(%rsp),%eax - mov %r11d,%ebx - mov %r11d,%ecx - xor 48(%rsp),%eax - mov %ebp,%esi - and %r12d,%ebx - xor 8(%rsp),%eax - or %r12d,%ecx - rol $5,%esi - xor 28(%rsp),%eax - and %edx,%ecx - add %esi,%edi - rol $1,%eax - or %ecx,%ebx - rol $30,%r11d - mov %eax,40(%rsp) - add %ebx,%edi - lea -0x70e44324(%eax,%edx),%esi - mov 44(%rsp),%eax - mov %ebp,%ebx - mov %ebp,%ecx - xor 52(%rsp),%eax - mov %edi,%edx - and %r11d,%ebx - xor 12(%rsp),%eax - or %r11d,%ecx - rol $5,%edx - xor 32(%rsp),%eax - and %r12d,%ecx - add %edx,%esi - rol $1,%eax - or %ecx,%ebx - rol $30,%ebp - mov %eax,44(%rsp) - add %ebx,%esi - lea -0x70e44324(%eax,%r12d),%edx - mov 48(%rsp),%eax - mov %edi,%ebx - mov %edi,%ecx - xor 56(%rsp),%eax - mov %esi,%r12d - and %ebp,%ebx - xor 16(%rsp),%eax - or %ebp,%ecx - rol $5,%r12d - xor 36(%rsp),%eax - and %r11d,%ecx - add %r12d,%edx - rol $1,%eax - or %ecx,%ebx - rol $30,%edi - mov %eax,48(%rsp) - add %ebx,%edx - lea -0x359d3e2a(%eax,%r11d),%r12d - mov 52(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 60(%rsp),%eax - xor %esi,%ebx - rol $5,%r11d - xor 20(%rsp),%eax - xor %ebp,%ebx - add %r11d,%r12d - xor 40(%rsp),%eax - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - mov %eax,52(%rsp) - lea -0x359d3e2a(%eax,%ebp),%r11d - mov 56(%rsp),%eax - mov %esi,%ebx - mov %r12d,%ebp - xor 0(%rsp),%eax - xor %edx,%ebx - rol $5,%ebp - xor 24(%rsp),%eax - xor %edi,%ebx - add %ebp,%r11d - xor 44(%rsp),%eax - rol $30,%edx - add %ebx,%r11d - rol $1,%eax - mov %eax,56(%rsp) - lea -0x359d3e2a(%eax,%edi),%ebp - mov 60(%rsp),%eax - mov %edx,%ebx - mov %r11d,%edi - xor 4(%rsp),%eax - xor %r12d,%ebx - rol $5,%edi - xor 28(%rsp),%eax - xor %esi,%ebx - add %edi,%ebp - xor 48(%rsp),%eax - rol $30,%r12d - add %ebx,%ebp - rol $1,%eax - mov %eax,60(%rsp) - lea -0x359d3e2a(%eax,%esi),%edi - mov 0(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 8(%rsp),%eax - xor %r11d,%ebx - rol $5,%esi - xor 32(%rsp),%eax - xor %edx,%ebx - add %esi,%edi - xor 52(%rsp),%eax - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,0(%rsp) - lea -0x359d3e2a(%eax,%edx),%esi - mov 4(%rsp),%eax - mov %r11d,%ebx - mov %edi,%edx - xor 12(%rsp),%eax - xor %ebp,%ebx - rol $5,%edx - xor 36(%rsp),%eax - xor %r12d,%ebx - add %edx,%esi - xor 56(%rsp),%eax - rol $30,%ebp - add %ebx,%esi - rol $1,%eax - mov %eax,4(%rsp) - lea -0x359d3e2a(%eax,%r12d),%edx - mov 8(%rsp),%eax - mov %ebp,%ebx - mov %esi,%r12d - xor 16(%rsp),%eax - xor %edi,%ebx - rol $5,%r12d - xor 40(%rsp),%eax - xor %r11d,%ebx - add %r12d,%edx - xor 60(%rsp),%eax - rol $30,%edi - add %ebx,%edx - rol $1,%eax - mov %eax,8(%rsp) - lea -0x359d3e2a(%eax,%r11d),%r12d - mov 12(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 20(%rsp),%eax - xor %esi,%ebx - rol $5,%r11d - xor 44(%rsp),%eax - xor %ebp,%ebx - add %r11d,%r12d - xor 0(%rsp),%eax - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - mov %eax,12(%rsp) - lea -0x359d3e2a(%eax,%ebp),%r11d - mov 16(%rsp),%eax - mov %esi,%ebx - mov %r12d,%ebp - xor 24(%rsp),%eax - xor %edx,%ebx - rol $5,%ebp - xor 48(%rsp),%eax - xor %edi,%ebx - add %ebp,%r11d - xor 4(%rsp),%eax - rol $30,%edx - add %ebx,%r11d - rol $1,%eax - mov %eax,16(%rsp) - lea -0x359d3e2a(%eax,%edi),%ebp - mov 20(%rsp),%eax - mov %edx,%ebx - mov %r11d,%edi - xor 28(%rsp),%eax - xor %r12d,%ebx - rol $5,%edi - xor 52(%rsp),%eax - xor %esi,%ebx - add %edi,%ebp - xor 8(%rsp),%eax - rol $30,%r12d - add %ebx,%ebp - rol $1,%eax - mov %eax,20(%rsp) - lea -0x359d3e2a(%eax,%esi),%edi - mov 24(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 32(%rsp),%eax - xor %r11d,%ebx - rol $5,%esi - xor 56(%rsp),%eax - xor %edx,%ebx - add %esi,%edi - xor 12(%rsp),%eax - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,24(%rsp) - lea -0x359d3e2a(%eax,%edx),%esi - mov 28(%rsp),%eax - mov %r11d,%ebx - mov %edi,%edx - xor 36(%rsp),%eax - xor %ebp,%ebx - rol $5,%edx - xor 60(%rsp),%eax - xor %r12d,%ebx - add %edx,%esi - xor 16(%rsp),%eax - rol $30,%ebp - add %ebx,%esi - rol $1,%eax - mov %eax,28(%rsp) - lea -0x359d3e2a(%eax,%r12d),%edx - mov 32(%rsp),%eax - mov %ebp,%ebx - mov %esi,%r12d - xor 40(%rsp),%eax - xor %edi,%ebx - rol $5,%r12d - xor 0(%rsp),%eax - xor %r11d,%ebx - add %r12d,%edx - xor 20(%rsp),%eax - rol $30,%edi - add %ebx,%edx - rol $1,%eax - mov %eax,32(%rsp) - lea -0x359d3e2a(%eax,%r11d),%r12d - mov 36(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 44(%rsp),%eax - xor %esi,%ebx - rol $5,%r11d - xor 4(%rsp),%eax - xor %ebp,%ebx - add %r11d,%r12d - xor 24(%rsp),%eax - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - mov %eax,36(%rsp) - lea -0x359d3e2a(%eax,%ebp),%r11d - mov 40(%rsp),%eax - mov %esi,%ebx - mov %r12d,%ebp - xor 48(%rsp),%eax - xor %edx,%ebx - rol $5,%ebp - xor 8(%rsp),%eax - xor %edi,%ebx - add %ebp,%r11d - xor 28(%rsp),%eax - rol $30,%edx - add %ebx,%r11d - rol $1,%eax - mov %eax,40(%rsp) - lea -0x359d3e2a(%eax,%edi),%ebp - mov 44(%rsp),%eax - mov %edx,%ebx - mov %r11d,%edi - xor 52(%rsp),%eax - xor %r12d,%ebx - rol $5,%edi - xor 12(%rsp),%eax - xor %esi,%ebx - add %edi,%ebp - xor 32(%rsp),%eax - rol $30,%r12d - add %ebx,%ebp - rol $1,%eax - mov %eax,44(%rsp) - lea -0x359d3e2a(%eax,%esi),%edi - mov 48(%rsp),%eax - mov %r12d,%ebx - mov %ebp,%esi - xor 56(%rsp),%eax - xor %r11d,%ebx - rol $5,%esi - xor 16(%rsp),%eax - xor %edx,%ebx - add %esi,%edi - xor 36(%rsp),%eax - rol $30,%r11d - add %ebx,%edi - rol $1,%eax - mov %eax,48(%rsp) - lea -0x359d3e2a(%eax,%edx),%esi - mov 52(%rsp),%eax - mov %r11d,%ebx - mov %edi,%edx - xor 60(%rsp),%eax - xor %ebp,%ebx - rol $5,%edx - xor 20(%rsp),%eax - xor %r12d,%ebx - add %edx,%esi - xor 40(%rsp),%eax - rol $30,%ebp - add %ebx,%esi - rol $1,%eax - lea -0x359d3e2a(%eax,%r12d),%edx - mov 56(%rsp),%eax - mov %ebp,%ebx - mov %esi,%r12d - xor 0(%rsp),%eax - xor %edi,%ebx - rol $5,%r12d - xor 24(%rsp),%eax - xor %r11d,%ebx - add %r12d,%edx - xor 44(%rsp),%eax - rol $30,%edi - add %ebx,%edx - rol $1,%eax - lea -0x359d3e2a(%eax,%r11d),%r12d - mov 60(%rsp),%eax - mov %edi,%ebx - mov %edx,%r11d - xor 4(%rsp),%eax - xor %esi,%ebx - rol $5,%r11d - xor 28(%rsp),%eax - xor %ebp,%ebx - add %r11d,%r12d - xor 48(%rsp),%eax - rol $30,%esi - add %ebx,%r12d - rol $1,%eax - lea -0x359d3e2a(%eax,%ebp),%r11d - mov %esi,%ebx - mov %r12d,%ebp - xor %edx,%ebx - rol $5,%ebp - xor %edi,%ebx - add %ebp,%r11d - rol $30,%edx - add %ebx,%r11d - // Update and save state information in SHA-1 context - add 0(%r8),%r11d - add 4(%r8),%r12d - add 8(%r8),%edx - add 12(%r8),%esi - add 16(%r8),%edi - mov %r11d,0(%r8) - mov %r12d,4(%r8) - mov %edx,8(%r8) - mov %esi,12(%r8) - mov %edi,16(%r8) - - xchg %r11d,%edx # mov %r11d,%edx - xchg %r12d,%esi # mov %r12d,%esi - xchg %r11d,%edi # mov %edx,%edi - xchg %r12d,%ebp # mov %esi,%ebp - # mov %edi,%r11d - lea 64(%r9),%r9 - sub $1,%r10 - jnz .Lloop - mov 64(%rsp),%rsp -.cfi_def_cfa %rsp,8 - movq -24(%rsp),%r12 -.cfi_restore %r12 - movq -16(%rsp),%rbp -.cfi_restore %rbp - movq -8(%rsp),%rbx -.cfi_restore %rbx - ret -.cfi_endproc -SET_SIZE(sha1_block_data_order) - -.data -.asciz "SHA1 block transform for x86_64, CRYPTOGAMS by <appro@openssl.org>" - -#endif /* lint || __lint */ - -#ifdef __ELF__ -.section .note.GNU-stack,"",%progbits -#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S new file mode 100644 index 000000000000..d3e5e3f0d080 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256-x86_64.S @@ -0,0 +1,5104 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if defined(__x86_64) + +#define _ASM +#include <sys/asm_linkage.h> + +SECTION_STATIC + +.balign 64 +SET_OBJ(K256) +K256: +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 + +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x00010203,0x04050607,0x08090a0b,0x0c0d0e0f +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0x03020100,0x0b0a0908,0xffffffff,0xffffffff +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 +.long 0xffffffff,0xffffffff,0x03020100,0x0b0a0908 + +ENTRY_ALIGN(zfs_sha256_transform_x64, 16) +.cfi_startproc + ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $64+32,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue: + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + jmp .Lloop +.balign 16 +.Lloop: + movl %ebx,%edi + leaq K256(%rip),%rbp + xorl %ecx,%edi + movl 0(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 4(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 8(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 12(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 16(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 20(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 24(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 28(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + leaq 20(%rbp),%rbp + addl %r14d,%eax + movl 32(%rsi),%r12d + movl %r8d,%r13d + movl %eax,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r9d,%r15d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + leaq 4(%rbp),%rbp + addl %r14d,%r11d + movl 36(%rsi),%r12d + movl %edx,%r13d + movl %r11d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r8d,%edi + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + leaq 4(%rbp),%rbp + addl %r14d,%r10d + movl 40(%rsi),%r12d + movl %ecx,%r13d + movl %r10d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %edx,%r15d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + leaq 4(%rbp),%rbp + addl %r14d,%r9d + movl 44(%rsi),%r12d + movl %ebx,%r13d + movl %r9d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ecx,%edi + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + leaq 20(%rbp),%rbp + addl %r14d,%r8d + movl 48(%rsi),%r12d + movl %eax,%r13d + movl %r8d,%r14d + bswapl %r12d + rorl $14,%r13d + movl %ebx,%r15d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + leaq 4(%rbp),%rbp + addl %r14d,%edx + movl 52(%rsi),%r12d + movl %r11d,%r13d + movl %edx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %eax,%edi + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + leaq 4(%rbp),%rbp + addl %r14d,%ecx + movl 56(%rsi),%r12d + movl %r10d,%r13d + movl %ecx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r11d,%r15d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + leaq 4(%rbp),%rbp + addl %r14d,%ebx + movl 60(%rsi),%r12d + movl %r9d,%r13d + movl %ebx,%r14d + bswapl %r12d + rorl $14,%r13d + movl %r10d,%edi + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + leaq 20(%rbp),%rbp + jmp .Lrounds_16_xx +.balign 16 +.Lrounds_16_xx: + movl 4(%rsp),%r13d + movl 56(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 36(%rsp),%r12d + addl 0(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + movl %r12d,0(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + leaq 4(%rbp),%rbp + movl 8(%rsp),%r13d + movl 60(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 40(%rsp),%r12d + addl 4(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + movl %r12d,4(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + leaq 4(%rbp),%rbp + movl 12(%rsp),%r13d + movl 0(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 44(%rsp),%r12d + addl 8(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + movl %r12d,8(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + leaq 4(%rbp),%rbp + movl 16(%rsp),%r13d + movl 4(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 48(%rsp),%r12d + addl 12(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + movl %r12d,12(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + leaq 20(%rbp),%rbp + movl 20(%rsp),%r13d + movl 8(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 52(%rsp),%r12d + addl 16(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + movl %r12d,16(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + leaq 4(%rbp),%rbp + movl 24(%rsp),%r13d + movl 12(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 56(%rsp),%r12d + addl 20(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + movl %r12d,20(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + leaq 4(%rbp),%rbp + movl 28(%rsp),%r13d + movl 16(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 60(%rsp),%r12d + addl 24(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + movl %r12d,24(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + leaq 4(%rbp),%rbp + movl 32(%rsp),%r13d + movl 20(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 0(%rsp),%r12d + addl 28(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + movl %r12d,28(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + leaq 20(%rbp),%rbp + movl 36(%rsp),%r13d + movl 24(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%eax + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 4(%rsp),%r12d + addl 32(%rsp),%r12d + movl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r14d + rorl $14,%r13d + movl %r9d,%r15d + xorl %r8d,%r13d + rorl $9,%r14d + xorl %r10d,%r15d + movl %r12d,32(%rsp) + xorl %eax,%r14d + andl %r8d,%r15d + rorl $5,%r13d + addl %r11d,%r12d + xorl %r10d,%r15d + rorl $11,%r14d + xorl %r8d,%r13d + addl %r15d,%r12d + movl %eax,%r15d + addl (%rbp),%r12d + xorl %eax,%r14d + xorl %ebx,%r15d + rorl $6,%r13d + movl %ebx,%r11d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r11d + addl %r12d,%edx + addl %r12d,%r11d + leaq 4(%rbp),%rbp + movl 40(%rsp),%r13d + movl 28(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r11d + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 8(%rsp),%r12d + addl 36(%rsp),%r12d + movl %edx,%r13d + addl %edi,%r12d + movl %r11d,%r14d + rorl $14,%r13d + movl %r8d,%edi + xorl %edx,%r13d + rorl $9,%r14d + xorl %r9d,%edi + movl %r12d,36(%rsp) + xorl %r11d,%r14d + andl %edx,%edi + rorl $5,%r13d + addl %r10d,%r12d + xorl %r9d,%edi + rorl $11,%r14d + xorl %edx,%r13d + addl %edi,%r12d + movl %r11d,%edi + addl (%rbp),%r12d + xorl %r11d,%r14d + xorl %eax,%edi + rorl $6,%r13d + movl %eax,%r10d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r10d + addl %r12d,%ecx + addl %r12d,%r10d + leaq 4(%rbp),%rbp + movl 44(%rsp),%r13d + movl 32(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r10d + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 12(%rsp),%r12d + addl 40(%rsp),%r12d + movl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r14d + rorl $14,%r13d + movl %edx,%r15d + xorl %ecx,%r13d + rorl $9,%r14d + xorl %r8d,%r15d + movl %r12d,40(%rsp) + xorl %r10d,%r14d + andl %ecx,%r15d + rorl $5,%r13d + addl %r9d,%r12d + xorl %r8d,%r15d + rorl $11,%r14d + xorl %ecx,%r13d + addl %r15d,%r12d + movl %r10d,%r15d + addl (%rbp),%r12d + xorl %r10d,%r14d + xorl %r11d,%r15d + rorl $6,%r13d + movl %r11d,%r9d + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%r9d + addl %r12d,%ebx + addl %r12d,%r9d + leaq 4(%rbp),%rbp + movl 48(%rsp),%r13d + movl 36(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r9d + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 16(%rsp),%r12d + addl 44(%rsp),%r12d + movl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%r14d + rorl $14,%r13d + movl %ecx,%edi + xorl %ebx,%r13d + rorl $9,%r14d + xorl %edx,%edi + movl %r12d,44(%rsp) + xorl %r9d,%r14d + andl %ebx,%edi + rorl $5,%r13d + addl %r8d,%r12d + xorl %edx,%edi + rorl $11,%r14d + xorl %ebx,%r13d + addl %edi,%r12d + movl %r9d,%edi + addl (%rbp),%r12d + xorl %r9d,%r14d + xorl %r10d,%edi + rorl $6,%r13d + movl %r10d,%r8d + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%r8d + addl %r12d,%eax + addl %r12d,%r8d + leaq 20(%rbp),%rbp + movl 52(%rsp),%r13d + movl 40(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%r8d + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 20(%rsp),%r12d + addl 48(%rsp),%r12d + movl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r14d + rorl $14,%r13d + movl %ebx,%r15d + xorl %eax,%r13d + rorl $9,%r14d + xorl %ecx,%r15d + movl %r12d,48(%rsp) + xorl %r8d,%r14d + andl %eax,%r15d + rorl $5,%r13d + addl %edx,%r12d + xorl %ecx,%r15d + rorl $11,%r14d + xorl %eax,%r13d + addl %r15d,%r12d + movl %r8d,%r15d + addl (%rbp),%r12d + xorl %r8d,%r14d + xorl %r9d,%r15d + rorl $6,%r13d + movl %r9d,%edx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%edx + addl %r12d,%r11d + addl %r12d,%edx + leaq 4(%rbp),%rbp + movl 56(%rsp),%r13d + movl 44(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%edx + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 24(%rsp),%r12d + addl 52(%rsp),%r12d + movl %r11d,%r13d + addl %edi,%r12d + movl %edx,%r14d + rorl $14,%r13d + movl %eax,%edi + xorl %r11d,%r13d + rorl $9,%r14d + xorl %ebx,%edi + movl %r12d,52(%rsp) + xorl %edx,%r14d + andl %r11d,%edi + rorl $5,%r13d + addl %ecx,%r12d + xorl %ebx,%edi + rorl $11,%r14d + xorl %r11d,%r13d + addl %edi,%r12d + movl %edx,%edi + addl (%rbp),%r12d + xorl %edx,%r14d + xorl %r8d,%edi + rorl $6,%r13d + movl %r8d,%ecx + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%ecx + addl %r12d,%r10d + addl %r12d,%ecx + leaq 4(%rbp),%rbp + movl 60(%rsp),%r13d + movl 48(%rsp),%r15d + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ecx + movl %r15d,%r14d + rorl $2,%r15d + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%r15d + shrl $10,%r14d + rorl $17,%r15d + xorl %r13d,%r12d + xorl %r14d,%r15d + addl 28(%rsp),%r12d + addl 56(%rsp),%r12d + movl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r14d + rorl $14,%r13d + movl %r11d,%r15d + xorl %r10d,%r13d + rorl $9,%r14d + xorl %eax,%r15d + movl %r12d,56(%rsp) + xorl %ecx,%r14d + andl %r10d,%r15d + rorl $5,%r13d + addl %ebx,%r12d + xorl %eax,%r15d + rorl $11,%r14d + xorl %r10d,%r13d + addl %r15d,%r12d + movl %ecx,%r15d + addl (%rbp),%r12d + xorl %ecx,%r14d + xorl %edx,%r15d + rorl $6,%r13d + movl %edx,%ebx + andl %r15d,%edi + rorl $2,%r14d + addl %r13d,%r12d + xorl %edi,%ebx + addl %r12d,%r9d + addl %r12d,%ebx + leaq 4(%rbp),%rbp + movl 0(%rsp),%r13d + movl 52(%rsp),%edi + movl %r13d,%r12d + rorl $11,%r13d + addl %r14d,%ebx + movl %edi,%r14d + rorl $2,%edi + xorl %r12d,%r13d + shrl $3,%r12d + rorl $7,%r13d + xorl %r14d,%edi + shrl $10,%r14d + rorl $17,%edi + xorl %r13d,%r12d + xorl %r14d,%edi + addl 32(%rsp),%r12d + addl 60(%rsp),%r12d + movl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%r14d + rorl $14,%r13d + movl %r10d,%edi + xorl %r9d,%r13d + rorl $9,%r14d + xorl %r11d,%edi + movl %r12d,60(%rsp) + xorl %ebx,%r14d + andl %r9d,%edi + rorl $5,%r13d + addl %eax,%r12d + xorl %r11d,%edi + rorl $11,%r14d + xorl %r9d,%r13d + addl %edi,%r12d + movl %ebx,%edi + addl (%rbp),%r12d + xorl %ebx,%r14d + xorl %ecx,%edi + rorl $6,%r13d + movl %ecx,%eax + andl %edi,%r15d + rorl $2,%r14d + addl %r13d,%r12d + xorl %r15d,%eax + addl %r12d,%r8d + addl %r12d,%eax + leaq 20(%rbp),%rbp + cmpb $0,3(%rbp) + jnz .Lrounds_16_xx + movq 64+0(%rsp),%rdi + addl %r14d,%eax + leaq 64(%rsi),%rsi + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + cmpq 64+16(%rsp),%rsi + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + RET +.cfi_endproc +SET_SIZE(zfs_sha256_transform_x64) + +ENTRY_ALIGN(zfs_sha256_transform_shani, 64) +.cfi_startproc + ENDBR + leaq K256+128(%rip),%rcx + movdqu (%rdi),%xmm1 + movdqu 16(%rdi),%xmm2 + movdqa 512-128(%rcx),%xmm7 + + pshufd $0x1b,%xmm1,%xmm0 + pshufd $0xb1,%xmm1,%xmm1 + pshufd $0x1b,%xmm2,%xmm2 + movdqa %xmm7,%xmm8 +.byte 102,15,58,15,202,8 + punpcklqdq %xmm0,%xmm2 + jmp .Loop_shani + +.balign 16 +.Loop_shani: + movdqu (%rsi),%xmm3 + movdqu 16(%rsi),%xmm4 + movdqu 32(%rsi),%xmm5 +.byte 102,15,56,0,223 + movdqu 48(%rsi),%xmm6 + + movdqa 0-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 102,15,56,0,231 + movdqa %xmm2,%xmm10 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + nop + movdqa %xmm1,%xmm9 +.byte 15,56,203,202 + + movdqa 32-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 102,15,56,0,239 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + leaq 64(%rsi),%rsi +.byte 15,56,204,220 +.byte 15,56,203,202 + + movdqa 64-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 102,15,56,0,247 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + + movdqa 96-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 128-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 160-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 192-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 224-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 256-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 288-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 + nop + paddd %xmm7,%xmm6 +.byte 15,56,204,220 +.byte 15,56,203,202 + movdqa 320-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,205,245 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm6,%xmm7 +.byte 102,15,58,15,253,4 + nop + paddd %xmm7,%xmm3 +.byte 15,56,204,229 +.byte 15,56,203,202 + movdqa 352-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 +.byte 15,56,205,222 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm3,%xmm7 +.byte 102,15,58,15,254,4 + nop + paddd %xmm7,%xmm4 +.byte 15,56,204,238 +.byte 15,56,203,202 + movdqa 384-128(%rcx),%xmm0 + paddd %xmm3,%xmm0 +.byte 15,56,205,227 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm4,%xmm7 +.byte 102,15,58,15,251,4 + nop + paddd %xmm7,%xmm5 +.byte 15,56,204,243 +.byte 15,56,203,202 + movdqa 416-128(%rcx),%xmm0 + paddd %xmm4,%xmm0 +.byte 15,56,205,236 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + movdqa %xmm5,%xmm7 +.byte 102,15,58,15,252,4 +.byte 15,56,203,202 + paddd %xmm7,%xmm6 + + movdqa 448-128(%rcx),%xmm0 + paddd %xmm5,%xmm0 +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 +.byte 15,56,205,245 + movdqa %xmm8,%xmm7 +.byte 15,56,203,202 + + movdqa 480-128(%rcx),%xmm0 + paddd %xmm6,%xmm0 + nop +.byte 15,56,203,209 + pshufd $0x0e,%xmm0,%xmm0 + decq %rdx + nop +.byte 15,56,203,202 + + paddd %xmm10,%xmm2 + paddd %xmm9,%xmm1 + jnz .Loop_shani + + pshufd $0xb1,%xmm2,%xmm2 + pshufd $0x1b,%xmm1,%xmm7 + pshufd $0xb1,%xmm1,%xmm1 + punpckhqdq %xmm2,%xmm1 +.byte 102,15,58,15,215,8 + + movdqu %xmm1,(%rdi) + movdqu %xmm2,16(%rdi) + RET +.cfi_endproc +SET_SIZE(zfs_sha256_transform_shani) + +ENTRY_ALIGN(zfs_sha256_transform_ssse3, 64) +.cfi_startproc + ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_ssse3: + + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + + jmp .Lloop_ssse3 +.balign 16 +.Lloop_ssse3: + movdqa K256+512(%rip),%xmm7 + movdqu 0(%rsi),%xmm0 + movdqu 16(%rsi),%xmm1 + movdqu 32(%rsi),%xmm2 +.byte 102,15,56,0,199 + movdqu 48(%rsi),%xmm3 + leaq K256(%rip),%rbp +.byte 102,15,56,0,207 + movdqa 0(%rbp),%xmm4 + movdqa 32(%rbp),%xmm5 +.byte 102,15,56,0,215 + paddd %xmm0,%xmm4 + movdqa 64(%rbp),%xmm6 +.byte 102,15,56,0,223 + movdqa 96(%rbp),%xmm7 + paddd %xmm1,%xmm5 + paddd %xmm2,%xmm6 + paddd %xmm3,%xmm7 + movdqa %xmm4,0(%rsp) + movl %eax,%r14d + movdqa %xmm5,16(%rsp) + movl %ebx,%edi + movdqa %xmm6,32(%rsp) + xorl %ecx,%edi + movdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lssse3_00_47 + +.balign 16 +.Lssse3_00_47: + subq $-128,%rbp + rorl $14,%r13d + movdqa %xmm1,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm3,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,224,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,250,4 + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm3,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 4(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm0 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm0 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm0,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 0(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm0 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm0,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,0(%rsp) + rorl $14,%r13d + movdqa %xmm2,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm0,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,225,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,251,4 + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm0,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 20(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm1 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm1 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm1,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 32(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm1 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm1,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,16(%rsp) + rorl $14,%r13d + movdqa %xmm3,%xmm4 + movl %r14d,%eax + movl %r9d,%r12d + movdqa %xmm1,%xmm7 + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d +.byte 102,15,58,15,226,4 + andl %r8d,%r12d + xorl %r8d,%r13d +.byte 102,15,58,15,248,4 + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %ebx,%r15d + addl %r12d,%r11d + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r11d,%edx + psrld $7,%xmm6 + addl %edi,%r11d + movl %edx,%r13d + pshufd $250,%xmm1,%xmm7 + addl %r11d,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%r11d + movl %r8d,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %r11d,%r14d + pxor %xmm5,%xmm4 + andl %edx,%r12d + xorl %edx,%r13d + pslld $11,%xmm5 + addl 36(%rsp),%r10d + movl %r11d,%edi + pxor %xmm6,%xmm4 + xorl %r9d,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %eax,%edi + addl %r12d,%r10d + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + psrld $10,%xmm7 + addl %r13d,%r10d + xorl %eax,%r15d + paddd %xmm4,%xmm2 + rorl $2,%r14d + addl %r10d,%ecx + psrlq $17,%xmm6 + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %ecx,%r13d + xorl %r8d,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + psrldq $8,%xmm7 + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + paddd %xmm7,%xmm2 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + pshufd $80,%xmm2,%xmm7 + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + movdqa %xmm7,%xmm6 + addl %edi,%r9d + movl %ebx,%r13d + psrld $10,%xmm7 + addl %r9d,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%r9d + movl %ecx,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + psrlq $2,%xmm6 + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + pxor %xmm6,%xmm7 + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %r10d,%edi + addl %r12d,%r8d + movdqa 64(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + paddd %xmm7,%xmm2 + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + paddd %xmm2,%xmm6 + movl %eax,%r13d + addl %r8d,%r14d + movdqa %xmm6,32(%rsp) + rorl $14,%r13d + movdqa %xmm0,%xmm4 + movl %r14d,%r8d + movl %ebx,%r12d + movdqa %xmm2,%xmm7 + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d +.byte 102,15,58,15,227,4 + andl %eax,%r12d + xorl %eax,%r13d +.byte 102,15,58,15,249,4 + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + movdqa %xmm4,%xmm5 + xorl %r9d,%r15d + addl %r12d,%edx + movdqa %xmm4,%xmm6 + rorl $6,%r13d + andl %r15d,%edi + psrld $3,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %edx,%r11d + psrld $7,%xmm6 + addl %edi,%edx + movl %r11d,%r13d + pshufd $250,%xmm2,%xmm7 + addl %edx,%r14d + rorl $14,%r13d + pslld $14,%xmm5 + movl %r14d,%edx + movl %eax,%r12d + pxor %xmm6,%xmm4 + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + psrld $11,%xmm6 + xorl %edx,%r14d + pxor %xmm5,%xmm4 + andl %r11d,%r12d + xorl %r11d,%r13d + pslld $11,%xmm5 + addl 52(%rsp),%ecx + movl %edx,%edi + pxor %xmm6,%xmm4 + xorl %ebx,%r12d + rorl $11,%r14d + movdqa %xmm7,%xmm6 + xorl %r8d,%edi + addl %r12d,%ecx + pxor %xmm5,%xmm4 + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + psrld $10,%xmm7 + addl %r13d,%ecx + xorl %r8d,%r15d + paddd %xmm4,%xmm3 + rorl $2,%r14d + addl %ecx,%r10d + psrlq $17,%xmm6 + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + pxor %xmm6,%xmm7 + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + psrlq $2,%xmm6 + xorl %r10d,%r13d + xorl %eax,%r12d + pxor %xmm6,%xmm7 + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + pshufd $128,%xmm7,%xmm7 + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + psrldq $8,%xmm7 + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + paddd %xmm7,%xmm3 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + pshufd $80,%xmm3,%xmm7 + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + movdqa %xmm7,%xmm6 + addl %edi,%ebx + movl %r9d,%r13d + psrld $10,%xmm7 + addl %ebx,%r14d + rorl $14,%r13d + psrlq $17,%xmm6 + movl %r14d,%ebx + movl %r10d,%r12d + pxor %xmm6,%xmm7 + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + psrlq $2,%xmm6 + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + pxor %xmm6,%xmm7 + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + pshufd $8,%xmm7,%xmm7 + xorl %ecx,%edi + addl %r12d,%eax + movdqa 96(%rbp),%xmm6 + rorl $6,%r13d + andl %edi,%r15d + pslldq $8,%xmm7 + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + paddd %xmm7,%xmm3 + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + paddd %xmm3,%xmm6 + movl %r8d,%r13d + addl %eax,%r14d + movdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lssse3_00_47 + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + rorl $14,%r13d + movl %r14d,%eax + movl %r9d,%r12d + rorl $9,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + rorl $5,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + rorl $11,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + rorl $6,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + rorl $2,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + rorl $14,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + rorl $9,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + rorl $5,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + rorl $11,%r14d + xorl %eax,%edi + addl %r12d,%r10d + rorl $6,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + rorl $2,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + rorl $14,%r13d + movl %r14d,%r10d + movl %edx,%r12d + rorl $9,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + rorl $5,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + rorl $11,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + rorl $6,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + rorl $2,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + rorl $14,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + rorl $9,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + rorl $5,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + rorl $11,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + rorl $6,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + rorl $2,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + rorl $14,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + rorl $9,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + rorl $5,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + rorl $11,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + rorl $6,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + rorl $2,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + rorl $14,%r13d + movl %r14d,%edx + movl %eax,%r12d + rorl $9,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + rorl $5,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + rorl $11,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + rorl $6,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + rorl $2,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + rorl $14,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + rorl $9,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + rorl $5,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + rorl $11,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + rorl $6,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + rorl $2,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + rorl $14,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + rorl $9,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + rorl $5,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + rorl $11,%r14d + xorl %ecx,%edi + addl %r12d,%eax + rorl $6,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + rorl $2,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_ssse3 + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_ssse3: + RET +.cfi_endproc +SET_SIZE(zfs_sha256_transform_ssse3) + +ENTRY_ALIGN(zfs_sha256_transform_avx, 64) +.cfi_startproc + ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $96,%rsp + leaq (%rsi,%rdx,4),%rdx + andq $-64,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_avx: + + vzeroupper + movl 0(%rdi),%eax + movl 4(%rdi),%ebx + movl 8(%rdi),%ecx + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%xmm8 + vmovdqa K256+512+64(%rip),%xmm9 + jmp .Lloop_avx +.balign 16 +.Lloop_avx: + vmovdqa K256+512(%rip),%xmm7 + vmovdqu 0(%rsi),%xmm0 + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm7,%xmm0,%xmm0 + leaq K256(%rip),%rbp + vpshufb %xmm7,%xmm1,%xmm1 + vpshufb %xmm7,%xmm2,%xmm2 + vpaddd 0(%rbp),%xmm0,%xmm4 + vpshufb %xmm7,%xmm3,%xmm3 + vpaddd 32(%rbp),%xmm1,%xmm5 + vpaddd 64(%rbp),%xmm2,%xmm6 + vpaddd 96(%rbp),%xmm3,%xmm7 + vmovdqa %xmm4,0(%rsp) + movl %eax,%r14d + vmovdqa %xmm5,16(%rsp) + movl %ebx,%edi + vmovdqa %xmm6,32(%rsp) + xorl %ecx,%edi + vmovdqa %xmm7,48(%rsp) + movl %r8d,%r13d + jmp .Lavx_00_47 + +.balign 16 +.Lavx_00_47: + subq $-128,%rbp + vpalignr $4,%xmm0,%xmm1,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm2,%xmm3,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm0,%xmm0 + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm3,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm0,%xmm0 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm0,%xmm0 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + vpshufd $80,%xmm0,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm0,%xmm0 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 0(%rbp),%xmm0,%xmm6 + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,0(%rsp) + vpalignr $4,%xmm1,%xmm2,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm3,%xmm0,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm1,%xmm1 + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm0,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm1,%xmm1 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm1,%xmm1 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + vpshufd $80,%xmm1,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm1,%xmm1 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 32(%rbp),%xmm1,%xmm6 + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,16(%rsp) + vpalignr $4,%xmm2,%xmm3,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + vpalignr $4,%xmm0,%xmm1,%xmm7 + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + vpaddd %xmm7,%xmm2,%xmm2 + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + vpshufd $250,%xmm1,%xmm7 + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + vpsrld $11,%xmm6,%xmm6 + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + vpaddd %xmm4,%xmm2,%xmm2 + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + vpxor %xmm7,%xmm6,%xmm6 + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + vpaddd %xmm6,%xmm2,%xmm2 + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + vpshufd $80,%xmm2,%xmm7 + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + vpxor %xmm7,%xmm6,%xmm6 + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + vpaddd %xmm6,%xmm2,%xmm2 + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + vpaddd 64(%rbp),%xmm2,%xmm6 + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + vmovdqa %xmm6,32(%rsp) + vpalignr $4,%xmm3,%xmm0,%xmm4 + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + vpalignr $4,%xmm1,%xmm2,%xmm7 + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + vpsrld $7,%xmm4,%xmm6 + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + vpaddd %xmm7,%xmm3,%xmm3 + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + vpsrld $3,%xmm4,%xmm7 + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + vpslld $14,%xmm4,%xmm5 + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + vpxor %xmm6,%xmm7,%xmm4 + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + vpshufd $250,%xmm2,%xmm7 + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + vpsrld $11,%xmm6,%xmm6 + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + vpxor %xmm5,%xmm4,%xmm4 + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + vpslld $11,%xmm5,%xmm5 + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + vpxor %xmm6,%xmm4,%xmm4 + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + vpsrld $10,%xmm7,%xmm6 + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + vpxor %xmm5,%xmm4,%xmm4 + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + vpsrlq $17,%xmm7,%xmm7 + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + vpaddd %xmm4,%xmm3,%xmm3 + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + vpxor %xmm7,%xmm6,%xmm6 + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + vpsrlq $2,%xmm7,%xmm7 + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + vpxor %xmm7,%xmm6,%xmm6 + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + vpshufb %xmm8,%xmm6,%xmm6 + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + vpaddd %xmm6,%xmm3,%xmm3 + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + vpshufd $80,%xmm3,%xmm7 + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + vpsrld $10,%xmm7,%xmm6 + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + vpsrlq $17,%xmm7,%xmm7 + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + vpxor %xmm7,%xmm6,%xmm6 + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + vpsrlq $2,%xmm7,%xmm7 + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + vpxor %xmm7,%xmm6,%xmm6 + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + vpshufb %xmm9,%xmm6,%xmm6 + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + vpaddd %xmm6,%xmm3,%xmm3 + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + vpaddd 96(%rbp),%xmm3,%xmm6 + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + vmovdqa %xmm6,48(%rsp) + cmpb $0,131(%rbp) + jne .Lavx_00_47 + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 0(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 4(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 8(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 12(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 16(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 20(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 24(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 28(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%eax + movl %r9d,%r12d + shrdl $9,%r14d,%r14d + xorl %r8d,%r13d + xorl %r10d,%r12d + shrdl $5,%r13d,%r13d + xorl %eax,%r14d + andl %r8d,%r12d + xorl %r8d,%r13d + addl 32(%rsp),%r11d + movl %eax,%r15d + xorl %r10d,%r12d + shrdl $11,%r14d,%r14d + xorl %ebx,%r15d + addl %r12d,%r11d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %eax,%r14d + addl %r13d,%r11d + xorl %ebx,%edi + shrdl $2,%r14d,%r14d + addl %r11d,%edx + addl %edi,%r11d + movl %edx,%r13d + addl %r11d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r11d + movl %r8d,%r12d + shrdl $9,%r14d,%r14d + xorl %edx,%r13d + xorl %r9d,%r12d + shrdl $5,%r13d,%r13d + xorl %r11d,%r14d + andl %edx,%r12d + xorl %edx,%r13d + addl 36(%rsp),%r10d + movl %r11d,%edi + xorl %r9d,%r12d + shrdl $11,%r14d,%r14d + xorl %eax,%edi + addl %r12d,%r10d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r11d,%r14d + addl %r13d,%r10d + xorl %eax,%r15d + shrdl $2,%r14d,%r14d + addl %r10d,%ecx + addl %r15d,%r10d + movl %ecx,%r13d + addl %r10d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r10d + movl %edx,%r12d + shrdl $9,%r14d,%r14d + xorl %ecx,%r13d + xorl %r8d,%r12d + shrdl $5,%r13d,%r13d + xorl %r10d,%r14d + andl %ecx,%r12d + xorl %ecx,%r13d + addl 40(%rsp),%r9d + movl %r10d,%r15d + xorl %r8d,%r12d + shrdl $11,%r14d,%r14d + xorl %r11d,%r15d + addl %r12d,%r9d + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r10d,%r14d + addl %r13d,%r9d + xorl %r11d,%edi + shrdl $2,%r14d,%r14d + addl %r9d,%ebx + addl %edi,%r9d + movl %ebx,%r13d + addl %r9d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r9d + movl %ecx,%r12d + shrdl $9,%r14d,%r14d + xorl %ebx,%r13d + xorl %edx,%r12d + shrdl $5,%r13d,%r13d + xorl %r9d,%r14d + andl %ebx,%r12d + xorl %ebx,%r13d + addl 44(%rsp),%r8d + movl %r9d,%edi + xorl %edx,%r12d + shrdl $11,%r14d,%r14d + xorl %r10d,%edi + addl %r12d,%r8d + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %r9d,%r14d + addl %r13d,%r8d + xorl %r10d,%r15d + shrdl $2,%r14d,%r14d + addl %r8d,%eax + addl %r15d,%r8d + movl %eax,%r13d + addl %r8d,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%r8d + movl %ebx,%r12d + shrdl $9,%r14d,%r14d + xorl %eax,%r13d + xorl %ecx,%r12d + shrdl $5,%r13d,%r13d + xorl %r8d,%r14d + andl %eax,%r12d + xorl %eax,%r13d + addl 48(%rsp),%edx + movl %r8d,%r15d + xorl %ecx,%r12d + shrdl $11,%r14d,%r14d + xorl %r9d,%r15d + addl %r12d,%edx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %r8d,%r14d + addl %r13d,%edx + xorl %r9d,%edi + shrdl $2,%r14d,%r14d + addl %edx,%r11d + addl %edi,%edx + movl %r11d,%r13d + addl %edx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%edx + movl %eax,%r12d + shrdl $9,%r14d,%r14d + xorl %r11d,%r13d + xorl %ebx,%r12d + shrdl $5,%r13d,%r13d + xorl %edx,%r14d + andl %r11d,%r12d + xorl %r11d,%r13d + addl 52(%rsp),%ecx + movl %edx,%edi + xorl %ebx,%r12d + shrdl $11,%r14d,%r14d + xorl %r8d,%edi + addl %r12d,%ecx + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %edx,%r14d + addl %r13d,%ecx + xorl %r8d,%r15d + shrdl $2,%r14d,%r14d + addl %ecx,%r10d + addl %r15d,%ecx + movl %r10d,%r13d + addl %ecx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ecx + movl %r11d,%r12d + shrdl $9,%r14d,%r14d + xorl %r10d,%r13d + xorl %eax,%r12d + shrdl $5,%r13d,%r13d + xorl %ecx,%r14d + andl %r10d,%r12d + xorl %r10d,%r13d + addl 56(%rsp),%ebx + movl %ecx,%r15d + xorl %eax,%r12d + shrdl $11,%r14d,%r14d + xorl %edx,%r15d + addl %r12d,%ebx + shrdl $6,%r13d,%r13d + andl %r15d,%edi + xorl %ecx,%r14d + addl %r13d,%ebx + xorl %edx,%edi + shrdl $2,%r14d,%r14d + addl %ebx,%r9d + addl %edi,%ebx + movl %r9d,%r13d + addl %ebx,%r14d + shrdl $14,%r13d,%r13d + movl %r14d,%ebx + movl %r10d,%r12d + shrdl $9,%r14d,%r14d + xorl %r9d,%r13d + xorl %r11d,%r12d + shrdl $5,%r13d,%r13d + xorl %ebx,%r14d + andl %r9d,%r12d + xorl %r9d,%r13d + addl 60(%rsp),%eax + movl %ebx,%edi + xorl %r11d,%r12d + shrdl $11,%r14d,%r14d + xorl %ecx,%edi + addl %r12d,%eax + shrdl $6,%r13d,%r13d + andl %edi,%r15d + xorl %ebx,%r14d + addl %r13d,%eax + xorl %ecx,%r15d + shrdl $2,%r14d,%r14d + addl %eax,%r8d + addl %r15d,%eax + movl %r8d,%r13d + addl %eax,%r14d + movq 64+0(%rsp),%rdi + movl %r14d,%eax + + addl 0(%rdi),%eax + leaq 64(%rsi),%rsi + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + jb .Lloop_avx + + movq 88(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + RET +.cfi_endproc +SET_SIZE(zfs_sha256_transform_avx) + +ENTRY_ALIGN(zfs_sha256_transform_avx2, 64) +.cfi_startproc + ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $544,%rsp + shlq $4,%rdx + andq $-1024,%rsp + leaq (%rsi,%rdx,4),%rdx + addq $448,%rsp + movq %rdi,64+0(%rsp) + movq %rsi,64+8(%rsp) + movq %rdx,64+16(%rsp) + movq %rax,88(%rsp) +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 +.Lprologue_avx2: + + vzeroupper + subq $-64,%rsi + movl 0(%rdi),%eax + movq %rsi,%r12 + movl 4(%rdi),%ebx + cmpq %rdx,%rsi + movl 8(%rdi),%ecx + cmoveq %rsp,%r12 + movl 12(%rdi),%edx + movl 16(%rdi),%r8d + movl 20(%rdi),%r9d + movl 24(%rdi),%r10d + movl 28(%rdi),%r11d + vmovdqa K256+512+32(%rip),%ymm8 + vmovdqa K256+512+64(%rip),%ymm9 + jmp .Loop_avx2 +.balign 16 +.Loop_avx2: + vmovdqa K256+512(%rip),%ymm7 + vmovdqu -64+0(%rsi),%xmm0 + vmovdqu -64+16(%rsi),%xmm1 + vmovdqu -64+32(%rsi),%xmm2 + vmovdqu -64+48(%rsi),%xmm3 + + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm7,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm7,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + + leaq K256(%rip),%rbp + vpshufb %ymm7,%ymm2,%ymm2 + vpaddd 0(%rbp),%ymm0,%ymm4 + vpshufb %ymm7,%ymm3,%ymm3 + vpaddd 32(%rbp),%ymm1,%ymm5 + vpaddd 64(%rbp),%ymm2,%ymm6 + vpaddd 96(%rbp),%ymm3,%ymm7 + vmovdqa %ymm4,0(%rsp) + xorl %r14d,%r14d + vmovdqa %ymm5,32(%rsp) + + movq 88(%rsp),%rdi +.cfi_def_cfa %rdi,8 + leaq -64(%rsp),%rsp + + + + movq %rdi,-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + movl %ebx,%edi + vmovdqa %ymm6,0(%rsp) + xorl %ecx,%edi + vmovdqa %ymm7,32(%rsp) + movl %r9d,%r12d + subq $-32*4,%rbp + jmp .Lavx2_00_47 + +.balign 16 +.Lavx2_00_47: + leaq -64(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08 + + pushq 64-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $4,%ymm0,%ymm1,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm2,%ymm3,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm0,%ymm0 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm3,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm0,%ymm0 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm0,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm0,%ymm0 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 0(%rbp),%ymm0,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm1,%ymm2,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm3,%ymm0,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm1,%ymm1 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm0,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm1,%ymm1 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm1,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm1,%ymm1 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 32(%rbp),%ymm1,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq -64(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x38,0x06,0x23,0x08 + + pushq 64-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $4,%ymm2,%ymm3,%ymm4 + addl 0+128(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + vpalignr $4,%ymm0,%ymm1,%ymm7 + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + vpsrld $7,%ymm4,%ymm6 + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + vpaddd %ymm7,%ymm2,%ymm2 + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + vpshufd $250,%ymm1,%ymm7 + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 4+128(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + vpslld $11,%ymm5,%ymm5 + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + vpaddd %ymm4,%ymm2,%ymm2 + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 8+128(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + vpxor %ymm7,%ymm6,%ymm6 + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + vpshufd $80,%ymm2,%ymm7 + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 12+128(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + vpxor %ymm7,%ymm6,%ymm6 + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + vpaddd %ymm6,%ymm2,%ymm2 + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + vpaddd 64(%rbp),%ymm2,%ymm6 + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + vmovdqa %ymm6,0(%rsp) + vpalignr $4,%ymm3,%ymm0,%ymm4 + addl 32+128(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + vpalignr $4,%ymm1,%ymm2,%ymm7 + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + vpsrld $7,%ymm4,%ymm6 + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + vpaddd %ymm7,%ymm3,%ymm3 + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + vpsrld $3,%ymm4,%ymm7 + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + vpslld $14,%ymm4,%ymm5 + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + vpxor %ymm6,%ymm7,%ymm4 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + vpshufd $250,%ymm2,%ymm7 + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + vpsrld $11,%ymm6,%ymm6 + addl 36+128(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + vpxor %ymm5,%ymm4,%ymm4 + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + vpslld $11,%ymm5,%ymm5 + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + vpxor %ymm6,%ymm4,%ymm4 + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + vpsrld $10,%ymm7,%ymm6 + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + vpxor %ymm5,%ymm4,%ymm4 + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + vpsrlq $17,%ymm7,%ymm7 + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + vpaddd %ymm4,%ymm3,%ymm3 + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 40+128(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + vpxor %ymm7,%ymm6,%ymm6 + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + vpshufb %ymm8,%ymm6,%ymm6 + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + vpshufd $80,%ymm3,%ymm7 + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + vpsrld $10,%ymm7,%ymm6 + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + vpsrlq $17,%ymm7,%ymm7 + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + vpxor %ymm7,%ymm6,%ymm6 + addl 44+128(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + vpsrlq $2,%ymm7,%ymm7 + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + vpxor %ymm7,%ymm6,%ymm6 + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + vpshufb %ymm9,%ymm6,%ymm6 + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + vpaddd %ymm6,%ymm3,%ymm3 + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + vpaddd 96(%rbp),%ymm3,%ymm6 + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + vmovdqa %ymm6,32(%rsp) + leaq 128(%rbp),%rbp + cmpb $0,3(%rbp) + jne .Lavx2_00_47 + addl 0+64(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+64(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+64(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+64(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+64(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+64(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+64(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+64(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + addl 0(%rsp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4(%rsp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8(%rsp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12(%rsp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32(%rsp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36(%rsp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40(%rsp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44(%rsp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rbp + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + addl 24(%rdi),%r10d + addl 28(%rdi),%r11d + + movl %eax,0(%rdi) + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + cmpq 80(%rbp),%rsi + je .Ldone_avx2 + + xorl %r14d,%r14d + movl %ebx,%edi + xorl %ecx,%edi + movl %r9d,%r12d + jmp .Lower_avx2 +.balign 16 +.Lower_avx2: + addl 0+16(%rbp),%r11d + andl %r8d,%r12d + rorxl $25,%r8d,%r13d + rorxl $11,%r8d,%r15d + leal (%rax,%r14,1),%eax + leal (%r11,%r12,1),%r11d + andnl %r10d,%r8d,%r12d + xorl %r15d,%r13d + rorxl $6,%r8d,%r14d + leal (%r11,%r12,1),%r11d + xorl %r14d,%r13d + movl %eax,%r15d + rorxl $22,%eax,%r12d + leal (%r11,%r13,1),%r11d + xorl %ebx,%r15d + rorxl $13,%eax,%r14d + rorxl $2,%eax,%r13d + leal (%rdx,%r11,1),%edx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %ebx,%edi + xorl %r13d,%r14d + leal (%r11,%rdi,1),%r11d + movl %r8d,%r12d + addl 4+16(%rbp),%r10d + andl %edx,%r12d + rorxl $25,%edx,%r13d + rorxl $11,%edx,%edi + leal (%r11,%r14,1),%r11d + leal (%r10,%r12,1),%r10d + andnl %r9d,%edx,%r12d + xorl %edi,%r13d + rorxl $6,%edx,%r14d + leal (%r10,%r12,1),%r10d + xorl %r14d,%r13d + movl %r11d,%edi + rorxl $22,%r11d,%r12d + leal (%r10,%r13,1),%r10d + xorl %eax,%edi + rorxl $13,%r11d,%r14d + rorxl $2,%r11d,%r13d + leal (%rcx,%r10,1),%ecx + andl %edi,%r15d + xorl %r12d,%r14d + xorl %eax,%r15d + xorl %r13d,%r14d + leal (%r10,%r15,1),%r10d + movl %edx,%r12d + addl 8+16(%rbp),%r9d + andl %ecx,%r12d + rorxl $25,%ecx,%r13d + rorxl $11,%ecx,%r15d + leal (%r10,%r14,1),%r10d + leal (%r9,%r12,1),%r9d + andnl %r8d,%ecx,%r12d + xorl %r15d,%r13d + rorxl $6,%ecx,%r14d + leal (%r9,%r12,1),%r9d + xorl %r14d,%r13d + movl %r10d,%r15d + rorxl $22,%r10d,%r12d + leal (%r9,%r13,1),%r9d + xorl %r11d,%r15d + rorxl $13,%r10d,%r14d + rorxl $2,%r10d,%r13d + leal (%rbx,%r9,1),%ebx + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r11d,%edi + xorl %r13d,%r14d + leal (%r9,%rdi,1),%r9d + movl %ecx,%r12d + addl 12+16(%rbp),%r8d + andl %ebx,%r12d + rorxl $25,%ebx,%r13d + rorxl $11,%ebx,%edi + leal (%r9,%r14,1),%r9d + leal (%r8,%r12,1),%r8d + andnl %edx,%ebx,%r12d + xorl %edi,%r13d + rorxl $6,%ebx,%r14d + leal (%r8,%r12,1),%r8d + xorl %r14d,%r13d + movl %r9d,%edi + rorxl $22,%r9d,%r12d + leal (%r8,%r13,1),%r8d + xorl %r10d,%edi + rorxl $13,%r9d,%r14d + rorxl $2,%r9d,%r13d + leal (%rax,%r8,1),%eax + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r10d,%r15d + xorl %r13d,%r14d + leal (%r8,%r15,1),%r8d + movl %ebx,%r12d + addl 32+16(%rbp),%edx + andl %eax,%r12d + rorxl $25,%eax,%r13d + rorxl $11,%eax,%r15d + leal (%r8,%r14,1),%r8d + leal (%rdx,%r12,1),%edx + andnl %ecx,%eax,%r12d + xorl %r15d,%r13d + rorxl $6,%eax,%r14d + leal (%rdx,%r12,1),%edx + xorl %r14d,%r13d + movl %r8d,%r15d + rorxl $22,%r8d,%r12d + leal (%rdx,%r13,1),%edx + xorl %r9d,%r15d + rorxl $13,%r8d,%r14d + rorxl $2,%r8d,%r13d + leal (%r11,%rdx,1),%r11d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %r9d,%edi + xorl %r13d,%r14d + leal (%rdx,%rdi,1),%edx + movl %eax,%r12d + addl 36+16(%rbp),%ecx + andl %r11d,%r12d + rorxl $25,%r11d,%r13d + rorxl $11,%r11d,%edi + leal (%rdx,%r14,1),%edx + leal (%rcx,%r12,1),%ecx + andnl %ebx,%r11d,%r12d + xorl %edi,%r13d + rorxl $6,%r11d,%r14d + leal (%rcx,%r12,1),%ecx + xorl %r14d,%r13d + movl %edx,%edi + rorxl $22,%edx,%r12d + leal (%rcx,%r13,1),%ecx + xorl %r8d,%edi + rorxl $13,%edx,%r14d + rorxl $2,%edx,%r13d + leal (%r10,%rcx,1),%r10d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %r8d,%r15d + xorl %r13d,%r14d + leal (%rcx,%r15,1),%ecx + movl %r11d,%r12d + addl 40+16(%rbp),%ebx + andl %r10d,%r12d + rorxl $25,%r10d,%r13d + rorxl $11,%r10d,%r15d + leal (%rcx,%r14,1),%ecx + leal (%rbx,%r12,1),%ebx + andnl %eax,%r10d,%r12d + xorl %r15d,%r13d + rorxl $6,%r10d,%r14d + leal (%rbx,%r12,1),%ebx + xorl %r14d,%r13d + movl %ecx,%r15d + rorxl $22,%ecx,%r12d + leal (%rbx,%r13,1),%ebx + xorl %edx,%r15d + rorxl $13,%ecx,%r14d + rorxl $2,%ecx,%r13d + leal (%r9,%rbx,1),%r9d + andl %r15d,%edi + xorl %r12d,%r14d + xorl %edx,%edi + xorl %r13d,%r14d + leal (%rbx,%rdi,1),%ebx + movl %r10d,%r12d + addl 44+16(%rbp),%eax + andl %r9d,%r12d + rorxl $25,%r9d,%r13d + rorxl $11,%r9d,%edi + leal (%rbx,%r14,1),%ebx + leal (%rax,%r12,1),%eax + andnl %r11d,%r9d,%r12d + xorl %edi,%r13d + rorxl $6,%r9d,%r14d + leal (%rax,%r12,1),%eax + xorl %r14d,%r13d + movl %ebx,%edi + rorxl $22,%ebx,%r12d + leal (%rax,%r13,1),%eax + xorl %ecx,%edi + rorxl $13,%ebx,%r14d + rorxl $2,%ebx,%r13d + leal (%r8,%rax,1),%r8d + andl %edi,%r15d + xorl %r12d,%r14d + xorl %ecx,%r15d + xorl %r13d,%r14d + leal (%rax,%r15,1),%eax + movl %r9d,%r12d + leaq -64(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 512(%rsp),%rdi + addl %r14d,%eax + + leaq 448(%rsp),%rsp + +.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x08 + + addl 0(%rdi),%eax + addl 4(%rdi),%ebx + addl 8(%rdi),%ecx + addl 12(%rdi),%edx + addl 16(%rdi),%r8d + addl 20(%rdi),%r9d + leaq 128(%rsi),%rsi + addl 24(%rdi),%r10d + movq %rsi,%r12 + addl 28(%rdi),%r11d + cmpq 64+16(%rsp),%rsi + + movl %eax,0(%rdi) + cmoveq %rsp,%r12 + movl %ebx,4(%rdi) + movl %ecx,8(%rdi) + movl %edx,12(%rdi) + movl %r8d,16(%rdi) + movl %r9d,20(%rdi) + movl %r10d,24(%rdi) + movl %r11d,28(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + + +.cfi_escape 0x0f,0x06,0x76,0xd8,0x00,0x06,0x23,0x08 + +.Ldone_avx2: + movq 88(%rbp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + RET +.cfi_endproc +SET_SIZE(zfs_sha256_transform_avx2) + +#if defined(__ELF__) + .section .note.GNU-stack,"",%progbits +#endif +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S deleted file mode 100644 index 28b048d2db24..000000000000 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S +++ /dev/null @@ -1,2089 +0,0 @@ -/* - * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. Rights for redistribution and usage in source and binary - * forms are granted according to the OpenSSL license. - * ==================================================================== - * - * sha256/512_block procedure for x86_64. - * - * 40% improvement over compiler-generated code on Opteron. On EM64T - * sha256 was observed to run >80% faster and sha512 - >40%. No magical - * tricks, just straight implementation... I really wonder why gcc - * [being armed with inline assembler] fails to generate as fast code. - * The only thing which is cool about this module is that it's very - * same instruction sequence used for both SHA-256 and SHA-512. In - * former case the instructions operate on 32-bit operands, while in - * latter - on 64-bit ones. All I had to do is to get one flavor right, - * the other one passed the test right away:-) - * - * sha256_block runs in ~1005 cycles on Opteron, which gives you - * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock - * frequency in GHz. sha512_block runs in ~1275 cycles, which results - * in 128*1000/1275=100MBps per GHz. Is there room for improvement? - * Well, if you compare it to IA-64 implementation, which maintains - * X[16] in register bank[!], tends to 4 instructions per CPU clock - * cycle and runs in 1003 cycles, 1275 is very good result for 3-way - * issue Opteron pipeline and X[16] maintained in memory. So that *if* - * there is a way to improve it, *then* the only way would be to try to - * offload X[16] updates to SSE unit, but that would require "deeper" - * loop unroll, which in turn would naturally cause size blow-up, not - * to mention increased complexity! And once again, only *if* it's - * actually possible to noticeably improve overall ILP, instruction - * level parallelism, on a given CPU implementation in this case. - * - * Special note on Intel EM64T. While Opteron CPU exhibits perfect - * performance ratio of 1.5 between 64- and 32-bit flavors [see above], - * [currently available] EM64T CPUs apparently are far from it. On the - * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit - * sha256_block:-( This is presumably because 64-bit shifts/rotates - * apparently are not atomic instructions, but implemented in microcode. - */ - -/* - * OpenSolaris OS modifications - * - * Sun elects to use this software under the BSD license. - * - * This source originates from OpenSSL file sha512-x86_64.pl at - * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz - * (presumably for future OpenSSL release 0.9.8h), with these changes: - * - * 1. Added perl "use strict" and declared variables. - * - * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from - * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. - * - * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) - * assemblers). Replaced the .picmeup macro with assembler code. - * - * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", - * at the beginning of SHA2_CTX (the next field is 8-byte aligned). - */ - -/* - * This file was generated by a perl script (sha512-x86_64.pl) that were - * used to generate sha256 and sha512 variants from the same code base. - * The comments from the original file have been pasted above. - */ - -#if defined(lint) || defined(__lint) -#include <sys/stdint.h> -#include <sha2/sha2.h> - -/* ARGSUSED */ -void -SHA256TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) -{ -} - - -#else -#define _ASM -#include <sys/asm_linkage.h> - -ENTRY_NP(SHA256TransformBlocks) -.cfi_startproc - movq %rsp, %rax -.cfi_def_cfa_register %rax - push %rbx -.cfi_offset %rbx,-16 - push %rbp -.cfi_offset %rbp,-24 - push %r12 -.cfi_offset %r12,-32 - push %r13 -.cfi_offset %r13,-40 - push %r14 -.cfi_offset %r14,-48 - push %r15 -.cfi_offset %r15,-56 - mov %rsp,%rbp # copy %rsp - shl $4,%rdx # num*16 - sub $16*4+4*8,%rsp - lea (%rsi,%rdx,4),%rdx # inp+num*16*4 - and $-64,%rsp # align stack frame - add $8,%rdi # Skip OpenSolaris field, "algotype" - mov %rdi,16*4+0*8(%rsp) # save ctx, 1st arg - mov %rsi,16*4+1*8(%rsp) # save inp, 2nd arg - mov %rdx,16*4+2*8(%rsp) # save end pointer, "3rd" arg - mov %rbp,16*4+3*8(%rsp) # save copy of %rsp -# echo ".cfi_cfa_expression %rsp+88,deref,+56" | -# openssl/crypto/perlasm/x86_64-xlate.pl -.cfi_escape 0x0f,0x06,0x77,0xd8,0x00,0x06,0x23,0x38 - - #.picmeup %rbp - # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts - # the address of the "next" instruction into the target register - # (%rbp). This generates these 2 instructions: - lea .Llea(%rip),%rbp - #nop # .picmeup generates a nop for mod 8 alignment--not needed here - -.Llea: - lea K256-.(%rbp),%rbp - - mov 4*0(%rdi),%eax - mov 4*1(%rdi),%ebx - mov 4*2(%rdi),%ecx - mov 4*3(%rdi),%edx - mov 4*4(%rdi),%r8d - mov 4*5(%rdi),%r9d - mov 4*6(%rdi),%r10d - mov 4*7(%rdi),%r11d - jmp .Lloop - -.align 16 -.Lloop: - xor %rdi,%rdi - mov 4*0(%rsi),%r12d - bswap %r12d - mov %r8d,%r13d - mov %r8d,%r14d - mov %r9d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r10d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r8d,%r15d # (f^g)&e - mov %r12d,0(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r11d,%r12d # T1+=h - - mov %eax,%r11d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %eax,%r13d - mov %eax,%r14d - - ror $2,%r11d - ror $13,%r13d - mov %eax,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r11d - ror $9,%r13d - or %ecx,%r14d # a|c - - xor %r13d,%r11d # h=Sigma0(a) - and %ecx,%r15d # a&c - add %r12d,%edx # d+=T1 - - and %ebx,%r14d # (a|c)&b - add %r12d,%r11d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r11d # h+=Maj(a,b,c) - mov 4*1(%rsi),%r12d - bswap %r12d - mov %edx,%r13d - mov %edx,%r14d - mov %r8d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r9d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %edx,%r15d # (f^g)&e - mov %r12d,4(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r10d,%r12d # T1+=h - - mov %r11d,%r10d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r11d,%r13d - mov %r11d,%r14d - - ror $2,%r10d - ror $13,%r13d - mov %r11d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r10d - ror $9,%r13d - or %ebx,%r14d # a|c - - xor %r13d,%r10d # h=Sigma0(a) - and %ebx,%r15d # a&c - add %r12d,%ecx # d+=T1 - - and %eax,%r14d # (a|c)&b - add %r12d,%r10d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r10d # h+=Maj(a,b,c) - mov 4*2(%rsi),%r12d - bswap %r12d - mov %ecx,%r13d - mov %ecx,%r14d - mov %edx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r8d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ecx,%r15d # (f^g)&e - mov %r12d,8(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r9d,%r12d # T1+=h - - mov %r10d,%r9d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r10d,%r13d - mov %r10d,%r14d - - ror $2,%r9d - ror $13,%r13d - mov %r10d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r9d - ror $9,%r13d - or %eax,%r14d # a|c - - xor %r13d,%r9d # h=Sigma0(a) - and %eax,%r15d # a&c - add %r12d,%ebx # d+=T1 - - and %r11d,%r14d # (a|c)&b - add %r12d,%r9d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r9d # h+=Maj(a,b,c) - mov 4*3(%rsi),%r12d - bswap %r12d - mov %ebx,%r13d - mov %ebx,%r14d - mov %ecx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %edx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ebx,%r15d # (f^g)&e - mov %r12d,12(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r8d,%r12d # T1+=h - - mov %r9d,%r8d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r9d,%r13d - mov %r9d,%r14d - - ror $2,%r8d - ror $13,%r13d - mov %r9d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r8d - ror $9,%r13d - or %r11d,%r14d # a|c - - xor %r13d,%r8d # h=Sigma0(a) - and %r11d,%r15d # a&c - add %r12d,%eax # d+=T1 - - and %r10d,%r14d # (a|c)&b - add %r12d,%r8d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r8d # h+=Maj(a,b,c) - mov 4*4(%rsi),%r12d - bswap %r12d - mov %eax,%r13d - mov %eax,%r14d - mov %ebx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ecx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %eax,%r15d # (f^g)&e - mov %r12d,16(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %edx,%r12d # T1+=h - - mov %r8d,%edx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r8d,%r13d - mov %r8d,%r14d - - ror $2,%edx - ror $13,%r13d - mov %r8d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%edx - ror $9,%r13d - or %r10d,%r14d # a|c - - xor %r13d,%edx # h=Sigma0(a) - and %r10d,%r15d # a&c - add %r12d,%r11d # d+=T1 - - and %r9d,%r14d # (a|c)&b - add %r12d,%edx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%edx # h+=Maj(a,b,c) - mov 4*5(%rsi),%r12d - bswap %r12d - mov %r11d,%r13d - mov %r11d,%r14d - mov %eax,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ebx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r11d,%r15d # (f^g)&e - mov %r12d,20(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ecx,%r12d # T1+=h - - mov %edx,%ecx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %edx,%r13d - mov %edx,%r14d - - ror $2,%ecx - ror $13,%r13d - mov %edx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ecx - ror $9,%r13d - or %r9d,%r14d # a|c - - xor %r13d,%ecx # h=Sigma0(a) - and %r9d,%r15d # a&c - add %r12d,%r10d # d+=T1 - - and %r8d,%r14d # (a|c)&b - add %r12d,%ecx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ecx # h+=Maj(a,b,c) - mov 4*6(%rsi),%r12d - bswap %r12d - mov %r10d,%r13d - mov %r10d,%r14d - mov %r11d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %eax,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r10d,%r15d # (f^g)&e - mov %r12d,24(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ebx,%r12d # T1+=h - - mov %ecx,%ebx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ecx,%r13d - mov %ecx,%r14d - - ror $2,%ebx - ror $13,%r13d - mov %ecx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ebx - ror $9,%r13d - or %r8d,%r14d # a|c - - xor %r13d,%ebx # h=Sigma0(a) - and %r8d,%r15d # a&c - add %r12d,%r9d # d+=T1 - - and %edx,%r14d # (a|c)&b - add %r12d,%ebx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ebx # h+=Maj(a,b,c) - mov 4*7(%rsi),%r12d - bswap %r12d - mov %r9d,%r13d - mov %r9d,%r14d - mov %r10d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r11d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r9d,%r15d # (f^g)&e - mov %r12d,28(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %eax,%r12d # T1+=h - - mov %ebx,%eax - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ebx,%r13d - mov %ebx,%r14d - - ror $2,%eax - ror $13,%r13d - mov %ebx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%eax - ror $9,%r13d - or %edx,%r14d # a|c - - xor %r13d,%eax # h=Sigma0(a) - and %edx,%r15d # a&c - add %r12d,%r8d # d+=T1 - - and %ecx,%r14d # (a|c)&b - add %r12d,%eax # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%eax # h+=Maj(a,b,c) - mov 4*8(%rsi),%r12d - bswap %r12d - mov %r8d,%r13d - mov %r8d,%r14d - mov %r9d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r10d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r8d,%r15d # (f^g)&e - mov %r12d,32(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r11d,%r12d # T1+=h - - mov %eax,%r11d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %eax,%r13d - mov %eax,%r14d - - ror $2,%r11d - ror $13,%r13d - mov %eax,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r11d - ror $9,%r13d - or %ecx,%r14d # a|c - - xor %r13d,%r11d # h=Sigma0(a) - and %ecx,%r15d # a&c - add %r12d,%edx # d+=T1 - - and %ebx,%r14d # (a|c)&b - add %r12d,%r11d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r11d # h+=Maj(a,b,c) - mov 4*9(%rsi),%r12d - bswap %r12d - mov %edx,%r13d - mov %edx,%r14d - mov %r8d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r9d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %edx,%r15d # (f^g)&e - mov %r12d,36(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r10d,%r12d # T1+=h - - mov %r11d,%r10d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r11d,%r13d - mov %r11d,%r14d - - ror $2,%r10d - ror $13,%r13d - mov %r11d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r10d - ror $9,%r13d - or %ebx,%r14d # a|c - - xor %r13d,%r10d # h=Sigma0(a) - and %ebx,%r15d # a&c - add %r12d,%ecx # d+=T1 - - and %eax,%r14d # (a|c)&b - add %r12d,%r10d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r10d # h+=Maj(a,b,c) - mov 4*10(%rsi),%r12d - bswap %r12d - mov %ecx,%r13d - mov %ecx,%r14d - mov %edx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r8d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ecx,%r15d # (f^g)&e - mov %r12d,40(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r9d,%r12d # T1+=h - - mov %r10d,%r9d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r10d,%r13d - mov %r10d,%r14d - - ror $2,%r9d - ror $13,%r13d - mov %r10d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r9d - ror $9,%r13d - or %eax,%r14d # a|c - - xor %r13d,%r9d # h=Sigma0(a) - and %eax,%r15d # a&c - add %r12d,%ebx # d+=T1 - - and %r11d,%r14d # (a|c)&b - add %r12d,%r9d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r9d # h+=Maj(a,b,c) - mov 4*11(%rsi),%r12d - bswap %r12d - mov %ebx,%r13d - mov %ebx,%r14d - mov %ecx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %edx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ebx,%r15d # (f^g)&e - mov %r12d,44(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r8d,%r12d # T1+=h - - mov %r9d,%r8d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r9d,%r13d - mov %r9d,%r14d - - ror $2,%r8d - ror $13,%r13d - mov %r9d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r8d - ror $9,%r13d - or %r11d,%r14d # a|c - - xor %r13d,%r8d # h=Sigma0(a) - and %r11d,%r15d # a&c - add %r12d,%eax # d+=T1 - - and %r10d,%r14d # (a|c)&b - add %r12d,%r8d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r8d # h+=Maj(a,b,c) - mov 4*12(%rsi),%r12d - bswap %r12d - mov %eax,%r13d - mov %eax,%r14d - mov %ebx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ecx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %eax,%r15d # (f^g)&e - mov %r12d,48(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %edx,%r12d # T1+=h - - mov %r8d,%edx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r8d,%r13d - mov %r8d,%r14d - - ror $2,%edx - ror $13,%r13d - mov %r8d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%edx - ror $9,%r13d - or %r10d,%r14d # a|c - - xor %r13d,%edx # h=Sigma0(a) - and %r10d,%r15d # a&c - add %r12d,%r11d # d+=T1 - - and %r9d,%r14d # (a|c)&b - add %r12d,%edx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%edx # h+=Maj(a,b,c) - mov 4*13(%rsi),%r12d - bswap %r12d - mov %r11d,%r13d - mov %r11d,%r14d - mov %eax,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ebx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r11d,%r15d # (f^g)&e - mov %r12d,52(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ecx,%r12d # T1+=h - - mov %edx,%ecx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %edx,%r13d - mov %edx,%r14d - - ror $2,%ecx - ror $13,%r13d - mov %edx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ecx - ror $9,%r13d - or %r9d,%r14d # a|c - - xor %r13d,%ecx # h=Sigma0(a) - and %r9d,%r15d # a&c - add %r12d,%r10d # d+=T1 - - and %r8d,%r14d # (a|c)&b - add %r12d,%ecx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ecx # h+=Maj(a,b,c) - mov 4*14(%rsi),%r12d - bswap %r12d - mov %r10d,%r13d - mov %r10d,%r14d - mov %r11d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %eax,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r10d,%r15d # (f^g)&e - mov %r12d,56(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ebx,%r12d # T1+=h - - mov %ecx,%ebx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ecx,%r13d - mov %ecx,%r14d - - ror $2,%ebx - ror $13,%r13d - mov %ecx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ebx - ror $9,%r13d - or %r8d,%r14d # a|c - - xor %r13d,%ebx # h=Sigma0(a) - and %r8d,%r15d # a&c - add %r12d,%r9d # d+=T1 - - and %edx,%r14d # (a|c)&b - add %r12d,%ebx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ebx # h+=Maj(a,b,c) - mov 4*15(%rsi),%r12d - bswap %r12d - mov %r9d,%r13d - mov %r9d,%r14d - mov %r10d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r11d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r9d,%r15d # (f^g)&e - mov %r12d,60(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %eax,%r12d # T1+=h - - mov %ebx,%eax - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ebx,%r13d - mov %ebx,%r14d - - ror $2,%eax - ror $13,%r13d - mov %ebx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%eax - ror $9,%r13d - or %edx,%r14d # a|c - - xor %r13d,%eax # h=Sigma0(a) - and %edx,%r15d # a&c - add %r12d,%r8d # d+=T1 - - and %ecx,%r14d # (a|c)&b - add %r12d,%eax # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%eax # h+=Maj(a,b,c) - jmp .Lrounds_16_xx -.align 16 -.Lrounds_16_xx: - mov 4(%rsp),%r13d - mov 56(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 36(%rsp),%r12d - - add 0(%rsp),%r12d - mov %r8d,%r13d - mov %r8d,%r14d - mov %r9d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r10d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r8d,%r15d # (f^g)&e - mov %r12d,0(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r11d,%r12d # T1+=h - - mov %eax,%r11d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %eax,%r13d - mov %eax,%r14d - - ror $2,%r11d - ror $13,%r13d - mov %eax,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r11d - ror $9,%r13d - or %ecx,%r14d # a|c - - xor %r13d,%r11d # h=Sigma0(a) - and %ecx,%r15d # a&c - add %r12d,%edx # d+=T1 - - and %ebx,%r14d # (a|c)&b - add %r12d,%r11d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r11d # h+=Maj(a,b,c) - mov 8(%rsp),%r13d - mov 60(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 40(%rsp),%r12d - - add 4(%rsp),%r12d - mov %edx,%r13d - mov %edx,%r14d - mov %r8d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r9d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %edx,%r15d # (f^g)&e - mov %r12d,4(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r10d,%r12d # T1+=h - - mov %r11d,%r10d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r11d,%r13d - mov %r11d,%r14d - - ror $2,%r10d - ror $13,%r13d - mov %r11d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r10d - ror $9,%r13d - or %ebx,%r14d # a|c - - xor %r13d,%r10d # h=Sigma0(a) - and %ebx,%r15d # a&c - add %r12d,%ecx # d+=T1 - - and %eax,%r14d # (a|c)&b - add %r12d,%r10d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r10d # h+=Maj(a,b,c) - mov 12(%rsp),%r13d - mov 0(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 44(%rsp),%r12d - - add 8(%rsp),%r12d - mov %ecx,%r13d - mov %ecx,%r14d - mov %edx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r8d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ecx,%r15d # (f^g)&e - mov %r12d,8(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r9d,%r12d # T1+=h - - mov %r10d,%r9d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r10d,%r13d - mov %r10d,%r14d - - ror $2,%r9d - ror $13,%r13d - mov %r10d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r9d - ror $9,%r13d - or %eax,%r14d # a|c - - xor %r13d,%r9d # h=Sigma0(a) - and %eax,%r15d # a&c - add %r12d,%ebx # d+=T1 - - and %r11d,%r14d # (a|c)&b - add %r12d,%r9d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r9d # h+=Maj(a,b,c) - mov 16(%rsp),%r13d - mov 4(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 48(%rsp),%r12d - - add 12(%rsp),%r12d - mov %ebx,%r13d - mov %ebx,%r14d - mov %ecx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %edx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ebx,%r15d # (f^g)&e - mov %r12d,12(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r8d,%r12d # T1+=h - - mov %r9d,%r8d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r9d,%r13d - mov %r9d,%r14d - - ror $2,%r8d - ror $13,%r13d - mov %r9d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r8d - ror $9,%r13d - or %r11d,%r14d # a|c - - xor %r13d,%r8d # h=Sigma0(a) - and %r11d,%r15d # a&c - add %r12d,%eax # d+=T1 - - and %r10d,%r14d # (a|c)&b - add %r12d,%r8d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r8d # h+=Maj(a,b,c) - mov 20(%rsp),%r13d - mov 8(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 52(%rsp),%r12d - - add 16(%rsp),%r12d - mov %eax,%r13d - mov %eax,%r14d - mov %ebx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ecx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %eax,%r15d # (f^g)&e - mov %r12d,16(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %edx,%r12d # T1+=h - - mov %r8d,%edx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r8d,%r13d - mov %r8d,%r14d - - ror $2,%edx - ror $13,%r13d - mov %r8d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%edx - ror $9,%r13d - or %r10d,%r14d # a|c - - xor %r13d,%edx # h=Sigma0(a) - and %r10d,%r15d # a&c - add %r12d,%r11d # d+=T1 - - and %r9d,%r14d # (a|c)&b - add %r12d,%edx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%edx # h+=Maj(a,b,c) - mov 24(%rsp),%r13d - mov 12(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 56(%rsp),%r12d - - add 20(%rsp),%r12d - mov %r11d,%r13d - mov %r11d,%r14d - mov %eax,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ebx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r11d,%r15d # (f^g)&e - mov %r12d,20(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ecx,%r12d # T1+=h - - mov %edx,%ecx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %edx,%r13d - mov %edx,%r14d - - ror $2,%ecx - ror $13,%r13d - mov %edx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ecx - ror $9,%r13d - or %r9d,%r14d # a|c - - xor %r13d,%ecx # h=Sigma0(a) - and %r9d,%r15d # a&c - add %r12d,%r10d # d+=T1 - - and %r8d,%r14d # (a|c)&b - add %r12d,%ecx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ecx # h+=Maj(a,b,c) - mov 28(%rsp),%r13d - mov 16(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 60(%rsp),%r12d - - add 24(%rsp),%r12d - mov %r10d,%r13d - mov %r10d,%r14d - mov %r11d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %eax,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r10d,%r15d # (f^g)&e - mov %r12d,24(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ebx,%r12d # T1+=h - - mov %ecx,%ebx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ecx,%r13d - mov %ecx,%r14d - - ror $2,%ebx - ror $13,%r13d - mov %ecx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ebx - ror $9,%r13d - or %r8d,%r14d # a|c - - xor %r13d,%ebx # h=Sigma0(a) - and %r8d,%r15d # a&c - add %r12d,%r9d # d+=T1 - - and %edx,%r14d # (a|c)&b - add %r12d,%ebx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ebx # h+=Maj(a,b,c) - mov 32(%rsp),%r13d - mov 20(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 0(%rsp),%r12d - - add 28(%rsp),%r12d - mov %r9d,%r13d - mov %r9d,%r14d - mov %r10d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r11d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r9d,%r15d # (f^g)&e - mov %r12d,28(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %eax,%r12d # T1+=h - - mov %ebx,%eax - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ebx,%r13d - mov %ebx,%r14d - - ror $2,%eax - ror $13,%r13d - mov %ebx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%eax - ror $9,%r13d - or %edx,%r14d # a|c - - xor %r13d,%eax # h=Sigma0(a) - and %edx,%r15d # a&c - add %r12d,%r8d # d+=T1 - - and %ecx,%r14d # (a|c)&b - add %r12d,%eax # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%eax # h+=Maj(a,b,c) - mov 36(%rsp),%r13d - mov 24(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 4(%rsp),%r12d - - add 32(%rsp),%r12d - mov %r8d,%r13d - mov %r8d,%r14d - mov %r9d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r10d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r8d,%r15d # (f^g)&e - mov %r12d,32(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r10d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r11d,%r12d # T1+=h - - mov %eax,%r11d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %eax,%r13d - mov %eax,%r14d - - ror $2,%r11d - ror $13,%r13d - mov %eax,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r11d - ror $9,%r13d - or %ecx,%r14d # a|c - - xor %r13d,%r11d # h=Sigma0(a) - and %ecx,%r15d # a&c - add %r12d,%edx # d+=T1 - - and %ebx,%r14d # (a|c)&b - add %r12d,%r11d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r11d # h+=Maj(a,b,c) - mov 40(%rsp),%r13d - mov 28(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 8(%rsp),%r12d - - add 36(%rsp),%r12d - mov %edx,%r13d - mov %edx,%r14d - mov %r8d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r9d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %edx,%r15d # (f^g)&e - mov %r12d,36(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r9d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r10d,%r12d # T1+=h - - mov %r11d,%r10d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r11d,%r13d - mov %r11d,%r14d - - ror $2,%r10d - ror $13,%r13d - mov %r11d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r10d - ror $9,%r13d - or %ebx,%r14d # a|c - - xor %r13d,%r10d # h=Sigma0(a) - and %ebx,%r15d # a&c - add %r12d,%ecx # d+=T1 - - and %eax,%r14d # (a|c)&b - add %r12d,%r10d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r10d # h+=Maj(a,b,c) - mov 44(%rsp),%r13d - mov 32(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 12(%rsp),%r12d - - add 40(%rsp),%r12d - mov %ecx,%r13d - mov %ecx,%r14d - mov %edx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r8d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ecx,%r15d # (f^g)&e - mov %r12d,40(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r8d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r9d,%r12d # T1+=h - - mov %r10d,%r9d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r10d,%r13d - mov %r10d,%r14d - - ror $2,%r9d - ror $13,%r13d - mov %r10d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r9d - ror $9,%r13d - or %eax,%r14d # a|c - - xor %r13d,%r9d # h=Sigma0(a) - and %eax,%r15d # a&c - add %r12d,%ebx # d+=T1 - - and %r11d,%r14d # (a|c)&b - add %r12d,%r9d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r9d # h+=Maj(a,b,c) - mov 48(%rsp),%r13d - mov 36(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 16(%rsp),%r12d - - add 44(%rsp),%r12d - mov %ebx,%r13d - mov %ebx,%r14d - mov %ecx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %edx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %ebx,%r15d # (f^g)&e - mov %r12d,44(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %edx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %r8d,%r12d # T1+=h - - mov %r9d,%r8d - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r9d,%r13d - mov %r9d,%r14d - - ror $2,%r8d - ror $13,%r13d - mov %r9d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%r8d - ror $9,%r13d - or %r11d,%r14d # a|c - - xor %r13d,%r8d # h=Sigma0(a) - and %r11d,%r15d # a&c - add %r12d,%eax # d+=T1 - - and %r10d,%r14d # (a|c)&b - add %r12d,%r8d # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%r8d # h+=Maj(a,b,c) - mov 52(%rsp),%r13d - mov 40(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 20(%rsp),%r12d - - add 48(%rsp),%r12d - mov %eax,%r13d - mov %eax,%r14d - mov %ebx,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ecx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %eax,%r15d # (f^g)&e - mov %r12d,48(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ecx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %edx,%r12d # T1+=h - - mov %r8d,%edx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %r8d,%r13d - mov %r8d,%r14d - - ror $2,%edx - ror $13,%r13d - mov %r8d,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%edx - ror $9,%r13d - or %r10d,%r14d # a|c - - xor %r13d,%edx # h=Sigma0(a) - and %r10d,%r15d # a&c - add %r12d,%r11d # d+=T1 - - and %r9d,%r14d # (a|c)&b - add %r12d,%edx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%edx # h+=Maj(a,b,c) - mov 56(%rsp),%r13d - mov 44(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 24(%rsp),%r12d - - add 52(%rsp),%r12d - mov %r11d,%r13d - mov %r11d,%r14d - mov %eax,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %ebx,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r11d,%r15d # (f^g)&e - mov %r12d,52(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %ebx,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ecx,%r12d # T1+=h - - mov %edx,%ecx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %edx,%r13d - mov %edx,%r14d - - ror $2,%ecx - ror $13,%r13d - mov %edx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ecx - ror $9,%r13d - or %r9d,%r14d # a|c - - xor %r13d,%ecx # h=Sigma0(a) - and %r9d,%r15d # a&c - add %r12d,%r10d # d+=T1 - - and %r8d,%r14d # (a|c)&b - add %r12d,%ecx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ecx # h+=Maj(a,b,c) - mov 60(%rsp),%r13d - mov 48(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 28(%rsp),%r12d - - add 56(%rsp),%r12d - mov %r10d,%r13d - mov %r10d,%r14d - mov %r11d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %eax,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r10d,%r15d # (f^g)&e - mov %r12d,56(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %eax,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %ebx,%r12d # T1+=h - - mov %ecx,%ebx - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ecx,%r13d - mov %ecx,%r14d - - ror $2,%ebx - ror $13,%r13d - mov %ecx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%ebx - ror $9,%r13d - or %r8d,%r14d # a|c - - xor %r13d,%ebx # h=Sigma0(a) - and %r8d,%r15d # a&c - add %r12d,%r9d # d+=T1 - - and %edx,%r14d # (a|c)&b - add %r12d,%ebx # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%ebx # h+=Maj(a,b,c) - mov 0(%rsp),%r13d - mov 52(%rsp),%r12d - - mov %r13d,%r15d - - shr $3,%r13d - ror $7,%r15d - - xor %r15d,%r13d - ror $11,%r15d - - xor %r15d,%r13d # sigma0(X[(i+1)&0xf]) - mov %r12d,%r14d - - shr $10,%r12d - ror $17,%r14d - - xor %r14d,%r12d - ror $2,%r14d - - xor %r14d,%r12d # sigma1(X[(i+14)&0xf]) - - add %r13d,%r12d - - add 32(%rsp),%r12d - - add 60(%rsp),%r12d - mov %r9d,%r13d - mov %r9d,%r14d - mov %r10d,%r15d - - ror $6,%r13d - ror $11,%r14d - xor %r11d,%r15d # f^g - - xor %r14d,%r13d - ror $14,%r14d - and %r9d,%r15d # (f^g)&e - mov %r12d,60(%rsp) - - xor %r14d,%r13d # Sigma1(e) - xor %r11d,%r15d # Ch(e,f,g)=((f^g)&e)^g - add %eax,%r12d # T1+=h - - mov %ebx,%eax - add %r13d,%r12d # T1+=Sigma1(e) - - add %r15d,%r12d # T1+=Ch(e,f,g) - mov %ebx,%r13d - mov %ebx,%r14d - - ror $2,%eax - ror $13,%r13d - mov %ebx,%r15d - add (%rbp,%rdi,4),%r12d # T1+=K[round] - - xor %r13d,%eax - ror $9,%r13d - or %edx,%r14d # a|c - - xor %r13d,%eax # h=Sigma0(a) - and %edx,%r15d # a&c - add %r12d,%r8d # d+=T1 - - and %ecx,%r14d # (a|c)&b - add %r12d,%eax # h+=T1 - - or %r15d,%r14d # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14d,%eax # h+=Maj(a,b,c) - cmp $64,%rdi - jb .Lrounds_16_xx - - mov 16*4+0*8(%rsp),%rdi - lea 16*4(%rsi),%rsi - - add 4*0(%rdi),%eax - add 4*1(%rdi),%ebx - add 4*2(%rdi),%ecx - add 4*3(%rdi),%edx - add 4*4(%rdi),%r8d - add 4*5(%rdi),%r9d - add 4*6(%rdi),%r10d - add 4*7(%rdi),%r11d - - cmp 16*4+2*8(%rsp),%rsi - - mov %eax,4*0(%rdi) - mov %ebx,4*1(%rdi) - mov %ecx,4*2(%rdi) - mov %edx,4*3(%rdi) - mov %r8d,4*4(%rdi) - mov %r9d,4*5(%rdi) - mov %r10d,4*6(%rdi) - mov %r11d,4*7(%rdi) - jb .Lloop - - mov 16*4+3*8(%rsp),%rsp -.cfi_def_cfa %rsp,56 - pop %r15 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r15 - pop %r14 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r14 - pop %r13 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r13 - pop %r12 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r12 - pop %rbp -.cfi_adjust_cfa_offset -8 -.cfi_restore %rbp - pop %rbx -.cfi_adjust_cfa_offset -8 -.cfi_restore %rbx - - ret -.cfi_endproc -SET_SIZE(SHA256TransformBlocks) - -.data -.align 64 -.type K256,@object -K256: - .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 - .long 0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5 - .long 0xd807aa98,0x12835b01,0x243185be,0x550c7dc3 - .long 0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174 - .long 0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc - .long 0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da - .long 0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7 - .long 0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967 - .long 0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13 - .long 0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85 - .long 0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3 - .long 0xd192e819,0xd6990624,0xf40e3585,0x106aa070 - .long 0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5 - .long 0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3 - .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 - .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 -#endif /* !lint && !__lint */ - -#ifdef __ELF__ -.section .note.GNU-stack,"",%progbits -#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512-x86_64.S new file mode 100644 index 000000000000..fbbcca650d10 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512-x86_64.S @@ -0,0 +1,4011 @@ +/* + * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* + * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + * - modified assembly to fit into OpenZFS + */ + +#if defined(__x86_64) + +#define _ASM +#include <sys/asm_linkage.h> + +SECTION_STATIC + +.balign 64 +SET_OBJ(K512) +K512: +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0x428a2f98d728ae22,0x7137449123ef65cd +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x3956c25bf348b538,0x59f111f1b605d019 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0xd807aa98a3030242,0x12835b0145706fbe +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0x9bdc06a725c71235,0xc19bf174cf692694 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0x983e5152ee66dfab,0xa831c66d2db43210 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x06ca6351e003826f,0x142929670a0e6e70 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x650a73548baf63de,0x766a0abb3c77b2a8 +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0x81c2c92e47edaee6,0x92722c851482353b +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xc24b8b70d0f89791,0xc76c51a30654be30 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xd192e819d6ef5218,0xd69906245565a910 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0xf40e35855771202a,0x106aa07032bbd1b8 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x748f82ee5defb2fc,0x78a5636f43172f60 +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x84c87814a1f0ab72,0x8cc702081a6439ec +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0x90befffa23631e28,0xa4506cebde82bde9 +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xbef9a3f7b2c67915,0xc67178f2e372532b +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xca273eceea26619c,0xd186b8c721c0c207 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x113f9804bef90dae,0x1b710b35131c471b +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x28db77f523047d84,0x32caab7b40c72493 +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.quad 0x0001020304050607,0x08090a0b0c0d0e0f +.quad 0x0001020304050607,0x08090a0b0c0d0e0f + +ENTRY_ALIGN(zfs_sha512_transform_x64, 16) +.cfi_startproc + ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $128+32,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue: + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop +.balign 16 +.Lloop: + movq %rbx,%rdi + leaq K512(%rip),%rbp + xorq %rcx,%rdi + movq 0(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 8(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 16(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 24(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 32(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 40(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 48(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 56(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + leaq 24(%rbp),%rbp + addq %r14,%rax + movq 64(%rsi),%r12 + movq %r8,%r13 + movq %rax,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r9,%r15 + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + leaq 8(%rbp),%rbp + addq %r14,%r11 + movq 72(%rsi),%r12 + movq %rdx,%r13 + movq %r11,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r8,%rdi + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + leaq 24(%rbp),%rbp + addq %r14,%r10 + movq 80(%rsi),%r12 + movq %rcx,%r13 + movq %r10,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rdx,%r15 + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + leaq 8(%rbp),%rbp + addq %r14,%r9 + movq 88(%rsi),%r12 + movq %rbx,%r13 + movq %r9,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rcx,%rdi + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + leaq 24(%rbp),%rbp + addq %r14,%r8 + movq 96(%rsi),%r12 + movq %rax,%r13 + movq %r8,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rbx,%r15 + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + leaq 8(%rbp),%rbp + addq %r14,%rdx + movq 104(%rsi),%r12 + movq %r11,%r13 + movq %rdx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %rax,%rdi + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + leaq 24(%rbp),%rbp + addq %r14,%rcx + movq 112(%rsi),%r12 + movq %r10,%r13 + movq %rcx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r11,%r15 + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + leaq 8(%rbp),%rbp + addq %r14,%rbx + movq 120(%rsi),%r12 + movq %r9,%r13 + movq %rbx,%r14 + bswapq %r12 + rorq $23,%r13 + movq %r10,%rdi + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + leaq 24(%rbp),%rbp + jmp .Lrounds_16_xx +.balign 16 +.Lrounds_16_xx: + movq 8(%rsp),%r13 + movq 112(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 72(%rsp),%r12 + addq 0(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + movq %r12,0(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + leaq 8(%rbp),%rbp + movq 16(%rsp),%r13 + movq 120(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 80(%rsp),%r12 + addq 8(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + movq %r12,8(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + leaq 24(%rbp),%rbp + movq 24(%rsp),%r13 + movq 0(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 88(%rsp),%r12 + addq 16(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + movq %r12,16(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + leaq 8(%rbp),%rbp + movq 32(%rsp),%r13 + movq 8(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 96(%rsp),%r12 + addq 24(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + movq %r12,24(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + leaq 24(%rbp),%rbp + movq 40(%rsp),%r13 + movq 16(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 104(%rsp),%r12 + addq 32(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + movq %r12,32(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + leaq 8(%rbp),%rbp + movq 48(%rsp),%r13 + movq 24(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 112(%rsp),%r12 + addq 40(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + movq %r12,40(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + leaq 24(%rbp),%rbp + movq 56(%rsp),%r13 + movq 32(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 120(%rsp),%r12 + addq 48(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + movq %r12,48(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + leaq 8(%rbp),%rbp + movq 64(%rsp),%r13 + movq 40(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 0(%rsp),%r12 + addq 56(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + movq %r12,56(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + leaq 24(%rbp),%rbp + movq 72(%rsp),%r13 + movq 48(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rax + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 8(%rsp),%r12 + addq 64(%rsp),%r12 + movq %r8,%r13 + addq %r15,%r12 + movq %rax,%r14 + rorq $23,%r13 + movq %r9,%r15 + xorq %r8,%r13 + rorq $5,%r14 + xorq %r10,%r15 + movq %r12,64(%rsp) + xorq %rax,%r14 + andq %r8,%r15 + rorq $4,%r13 + addq %r11,%r12 + xorq %r10,%r15 + rorq $6,%r14 + xorq %r8,%r13 + addq %r15,%r12 + movq %rax,%r15 + addq (%rbp),%r12 + xorq %rax,%r14 + xorq %rbx,%r15 + rorq $14,%r13 + movq %rbx,%r11 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r11 + addq %r12,%rdx + addq %r12,%r11 + leaq 8(%rbp),%rbp + movq 80(%rsp),%r13 + movq 56(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r11 + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 16(%rsp),%r12 + addq 72(%rsp),%r12 + movq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%r14 + rorq $23,%r13 + movq %r8,%rdi + xorq %rdx,%r13 + rorq $5,%r14 + xorq %r9,%rdi + movq %r12,72(%rsp) + xorq %r11,%r14 + andq %rdx,%rdi + rorq $4,%r13 + addq %r10,%r12 + xorq %r9,%rdi + rorq $6,%r14 + xorq %rdx,%r13 + addq %rdi,%r12 + movq %r11,%rdi + addq (%rbp),%r12 + xorq %r11,%r14 + xorq %rax,%rdi + rorq $14,%r13 + movq %rax,%r10 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r10 + addq %r12,%rcx + addq %r12,%r10 + leaq 24(%rbp),%rbp + movq 88(%rsp),%r13 + movq 64(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r10 + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 24(%rsp),%r12 + addq 80(%rsp),%r12 + movq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r14 + rorq $23,%r13 + movq %rdx,%r15 + xorq %rcx,%r13 + rorq $5,%r14 + xorq %r8,%r15 + movq %r12,80(%rsp) + xorq %r10,%r14 + andq %rcx,%r15 + rorq $4,%r13 + addq %r9,%r12 + xorq %r8,%r15 + rorq $6,%r14 + xorq %rcx,%r13 + addq %r15,%r12 + movq %r10,%r15 + addq (%rbp),%r12 + xorq %r10,%r14 + xorq %r11,%r15 + rorq $14,%r13 + movq %r11,%r9 + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%r9 + addq %r12,%rbx + addq %r12,%r9 + leaq 8(%rbp),%rbp + movq 96(%rsp),%r13 + movq 72(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r9 + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 32(%rsp),%r12 + addq 88(%rsp),%r12 + movq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%r14 + rorq $23,%r13 + movq %rcx,%rdi + xorq %rbx,%r13 + rorq $5,%r14 + xorq %rdx,%rdi + movq %r12,88(%rsp) + xorq %r9,%r14 + andq %rbx,%rdi + rorq $4,%r13 + addq %r8,%r12 + xorq %rdx,%rdi + rorq $6,%r14 + xorq %rbx,%r13 + addq %rdi,%r12 + movq %r9,%rdi + addq (%rbp),%r12 + xorq %r9,%r14 + xorq %r10,%rdi + rorq $14,%r13 + movq %r10,%r8 + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%r8 + addq %r12,%rax + addq %r12,%r8 + leaq 24(%rbp),%rbp + movq 104(%rsp),%r13 + movq 80(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%r8 + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 40(%rsp),%r12 + addq 96(%rsp),%r12 + movq %rax,%r13 + addq %r15,%r12 + movq %r8,%r14 + rorq $23,%r13 + movq %rbx,%r15 + xorq %rax,%r13 + rorq $5,%r14 + xorq %rcx,%r15 + movq %r12,96(%rsp) + xorq %r8,%r14 + andq %rax,%r15 + rorq $4,%r13 + addq %rdx,%r12 + xorq %rcx,%r15 + rorq $6,%r14 + xorq %rax,%r13 + addq %r15,%r12 + movq %r8,%r15 + addq (%rbp),%r12 + xorq %r8,%r14 + xorq %r9,%r15 + rorq $14,%r13 + movq %r9,%rdx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rdx + addq %r12,%r11 + addq %r12,%rdx + leaq 8(%rbp),%rbp + movq 112(%rsp),%r13 + movq 88(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rdx + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 48(%rsp),%r12 + addq 104(%rsp),%r12 + movq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%r14 + rorq $23,%r13 + movq %rax,%rdi + xorq %r11,%r13 + rorq $5,%r14 + xorq %rbx,%rdi + movq %r12,104(%rsp) + xorq %rdx,%r14 + andq %r11,%rdi + rorq $4,%r13 + addq %rcx,%r12 + xorq %rbx,%rdi + rorq $6,%r14 + xorq %r11,%r13 + addq %rdi,%r12 + movq %rdx,%rdi + addq (%rbp),%r12 + xorq %rdx,%r14 + xorq %r8,%rdi + rorq $14,%r13 + movq %r8,%rcx + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rcx + addq %r12,%r10 + addq %r12,%rcx + leaq 24(%rbp),%rbp + movq 120(%rsp),%r13 + movq 96(%rsp),%r15 + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rcx + movq %r15,%r14 + rorq $42,%r15 + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%r15 + shrq $6,%r14 + rorq $19,%r15 + xorq %r13,%r12 + xorq %r14,%r15 + addq 56(%rsp),%r12 + addq 112(%rsp),%r12 + movq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r14 + rorq $23,%r13 + movq %r11,%r15 + xorq %r10,%r13 + rorq $5,%r14 + xorq %rax,%r15 + movq %r12,112(%rsp) + xorq %rcx,%r14 + andq %r10,%r15 + rorq $4,%r13 + addq %rbx,%r12 + xorq %rax,%r15 + rorq $6,%r14 + xorq %r10,%r13 + addq %r15,%r12 + movq %rcx,%r15 + addq (%rbp),%r12 + xorq %rcx,%r14 + xorq %rdx,%r15 + rorq $14,%r13 + movq %rdx,%rbx + andq %r15,%rdi + rorq $28,%r14 + addq %r13,%r12 + xorq %rdi,%rbx + addq %r12,%r9 + addq %r12,%rbx + leaq 8(%rbp),%rbp + movq 0(%rsp),%r13 + movq 104(%rsp),%rdi + movq %r13,%r12 + rorq $7,%r13 + addq %r14,%rbx + movq %rdi,%r14 + rorq $42,%rdi + xorq %r12,%r13 + shrq $7,%r12 + rorq $1,%r13 + xorq %r14,%rdi + shrq $6,%r14 + rorq $19,%rdi + xorq %r13,%r12 + xorq %r14,%rdi + addq 64(%rsp),%r12 + addq 120(%rsp),%r12 + movq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%r14 + rorq $23,%r13 + movq %r10,%rdi + xorq %r9,%r13 + rorq $5,%r14 + xorq %r11,%rdi + movq %r12,120(%rsp) + xorq %rbx,%r14 + andq %r9,%rdi + rorq $4,%r13 + addq %rax,%r12 + xorq %r11,%rdi + rorq $6,%r14 + xorq %r9,%r13 + addq %rdi,%r12 + movq %rbx,%rdi + addq (%rbp),%r12 + xorq %rbx,%r14 + xorq %rcx,%rdi + rorq $14,%r13 + movq %rcx,%rax + andq %rdi,%r15 + rorq $28,%r14 + addq %r13,%r12 + xorq %r15,%rax + addq %r12,%r8 + addq %r12,%rax + leaq 24(%rbp),%rbp + cmpb $0,7(%rbp) + jnz .Lrounds_16_xx + movq 128+0(%rsp),%rdi + addq %r14,%rax + leaq 128(%rsi),%rsi + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + cmpq 128+16(%rsp),%rsi + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue: + RET +.cfi_endproc +SET_SIZE(zfs_sha512_transform_x64) + +ENTRY_ALIGN(zfs_sha512_transform_avx, 64) +.cfi_startproc + ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + shlq $4,%rdx + subq $160,%rsp + leaq (%rsi,%rdx,8),%rdx + andq $-64,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue_avx: + + vzeroupper + movq 0(%rdi),%rax + movq 8(%rdi),%rbx + movq 16(%rdi),%rcx + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Lloop_avx +.balign 16 +.Lloop_avx: + vmovdqa K512+1280(%rip),%xmm11 + vmovdqu 0(%rsi),%xmm0 + leaq K512+128(%rip),%rbp + vmovdqu 16(%rsi),%xmm1 + vmovdqu 32(%rsi),%xmm2 + vpshufb %xmm11,%xmm0,%xmm0 + vmovdqu 48(%rsi),%xmm3 + vpshufb %xmm11,%xmm1,%xmm1 + vmovdqu 64(%rsi),%xmm4 + vpshufb %xmm11,%xmm2,%xmm2 + vmovdqu 80(%rsi),%xmm5 + vpshufb %xmm11,%xmm3,%xmm3 + vmovdqu 96(%rsi),%xmm6 + vpshufb %xmm11,%xmm4,%xmm4 + vmovdqu 112(%rsi),%xmm7 + vpshufb %xmm11,%xmm5,%xmm5 + vpaddq -128(%rbp),%xmm0,%xmm8 + vpshufb %xmm11,%xmm6,%xmm6 + vpaddq -96(%rbp),%xmm1,%xmm9 + vpshufb %xmm11,%xmm7,%xmm7 + vpaddq -64(%rbp),%xmm2,%xmm10 + vpaddq -32(%rbp),%xmm3,%xmm11 + vmovdqa %xmm8,0(%rsp) + vpaddq 0(%rbp),%xmm4,%xmm8 + vmovdqa %xmm9,16(%rsp) + vpaddq 32(%rbp),%xmm5,%xmm9 + vmovdqa %xmm10,32(%rsp) + vpaddq 64(%rbp),%xmm6,%xmm10 + vmovdqa %xmm11,48(%rsp) + vpaddq 96(%rbp),%xmm7,%xmm11 + vmovdqa %xmm8,64(%rsp) + movq %rax,%r14 + vmovdqa %xmm9,80(%rsp) + movq %rbx,%rdi + vmovdqa %xmm10,96(%rsp) + xorq %rcx,%rdi + vmovdqa %xmm11,112(%rsp) + movq %r8,%r13 + jmp .Lavx_00_47 + +.balign 16 +.Lavx_00_47: + addq $256,%rbp + vpalignr $8,%xmm0,%xmm1,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm4,%xmm5,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm0,%xmm0 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 0(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm7,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm7,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm0,%xmm0 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm7,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 8(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm0,%xmm0 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq -128(%rbp),%xmm0,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,0(%rsp) + vpalignr $8,%xmm1,%xmm2,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm5,%xmm6,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm1,%xmm1 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 16(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm0,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm0,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm1,%xmm1 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm0,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 24(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm1,%xmm1 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq -96(%rbp),%xmm1,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,16(%rsp) + vpalignr $8,%xmm2,%xmm3,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm6,%xmm7,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm2,%xmm2 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 32(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm1,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm1,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm2,%xmm2 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm1,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 40(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm2,%xmm2 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq -64(%rbp),%xmm2,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,32(%rsp) + vpalignr $8,%xmm3,%xmm4,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm7,%xmm0,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm3,%xmm3 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 48(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm2,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm2,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm3,%xmm3 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm2,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 56(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm3,%xmm3 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq -32(%rbp),%xmm3,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,48(%rsp) + vpalignr $8,%xmm4,%xmm5,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rax + vpalignr $8,%xmm0,%xmm1,%xmm11 + movq %r9,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r8,%r13 + xorq %r10,%r12 + vpaddq %xmm11,%xmm4,%xmm4 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r8,%r12 + xorq %r8,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 64(%rsp),%r11 + movq %rax,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rbx,%r15 + addq %r12,%r11 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rax,%r14 + addq %r13,%r11 + vpxor %xmm10,%xmm8,%xmm8 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm3,%xmm11 + addq %r11,%rdx + addq %rdi,%r11 + vpxor %xmm9,%xmm8,%xmm8 + movq %rdx,%r13 + addq %r11,%r14 + vpsllq $3,%xmm3,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r11 + vpaddq %xmm8,%xmm4,%xmm4 + movq %r8,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm3,%xmm9 + xorq %rdx,%r13 + xorq %r9,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rdx,%r12 + xorq %rdx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 72(%rsp),%r10 + movq %r11,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r9,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rax,%rdi + addq %r12,%r10 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm4,%xmm4 + xorq %r11,%r14 + addq %r13,%r10 + vpaddq 0(%rbp),%xmm4,%xmm10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + vmovdqa %xmm10,64(%rsp) + vpalignr $8,%xmm5,%xmm6,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r10 + vpalignr $8,%xmm1,%xmm2,%xmm11 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rcx,%r13 + xorq %r8,%r12 + vpaddq %xmm11,%xmm5,%xmm5 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rcx,%r12 + xorq %rcx,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 80(%rsp),%r9 + movq %r10,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r11,%r15 + addq %r12,%r9 + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r10,%r14 + addq %r13,%r9 + vpxor %xmm10,%xmm8,%xmm8 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm4,%xmm11 + addq %r9,%rbx + addq %rdi,%r9 + vpxor %xmm9,%xmm8,%xmm8 + movq %rbx,%r13 + addq %r9,%r14 + vpsllq $3,%xmm4,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%r9 + vpaddq %xmm8,%xmm5,%xmm5 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm4,%xmm9 + xorq %rbx,%r13 + xorq %rdx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %rbx,%r12 + xorq %rbx,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 88(%rsp),%r8 + movq %r9,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r10,%rdi + addq %r12,%r8 + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm5,%xmm5 + xorq %r9,%r14 + addq %r13,%r8 + vpaddq 32(%rbp),%xmm5,%xmm10 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + vmovdqa %xmm10,80(%rsp) + vpalignr $8,%xmm6,%xmm7,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%r8 + vpalignr $8,%xmm2,%xmm3,%xmm11 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %rax,%r13 + xorq %rcx,%r12 + vpaddq %xmm11,%xmm6,%xmm6 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %rax,%r12 + xorq %rax,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 96(%rsp),%rdx + movq %r8,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %r9,%r15 + addq %r12,%rdx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %r8,%r14 + addq %r13,%rdx + vpxor %xmm10,%xmm8,%xmm8 + xorq %r9,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm5,%xmm11 + addq %rdx,%r11 + addq %rdi,%rdx + vpxor %xmm9,%xmm8,%xmm8 + movq %r11,%r13 + addq %rdx,%r14 + vpsllq $3,%xmm5,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rdx + vpaddq %xmm8,%xmm6,%xmm6 + movq %rax,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm5,%xmm9 + xorq %r11,%r13 + xorq %rbx,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r11,%r12 + xorq %r11,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 104(%rsp),%rcx + movq %rdx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %r8,%rdi + addq %r12,%rcx + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm6,%xmm6 + xorq %rdx,%r14 + addq %r13,%rcx + vpaddq 64(%rbp),%xmm6,%xmm10 + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + vmovdqa %xmm10,96(%rsp) + vpalignr $8,%xmm7,%xmm0,%xmm8 + shrdq $23,%r13,%r13 + movq %r14,%rcx + vpalignr $8,%xmm3,%xmm4,%xmm11 + movq %r11,%r12 + shrdq $5,%r14,%r14 + vpsrlq $1,%xmm8,%xmm10 + xorq %r10,%r13 + xorq %rax,%r12 + vpaddq %xmm11,%xmm7,%xmm7 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + vpsrlq $7,%xmm8,%xmm11 + andq %r10,%r12 + xorq %r10,%r13 + vpsllq $56,%xmm8,%xmm9 + addq 112(%rsp),%rbx + movq %rcx,%r15 + vpxor %xmm10,%xmm11,%xmm8 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + vpsrlq $7,%xmm10,%xmm10 + xorq %rdx,%r15 + addq %r12,%rbx + vpxor %xmm9,%xmm8,%xmm8 + shrdq $14,%r13,%r13 + andq %r15,%rdi + vpsllq $7,%xmm9,%xmm9 + xorq %rcx,%r14 + addq %r13,%rbx + vpxor %xmm10,%xmm8,%xmm8 + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + vpsrlq $6,%xmm6,%xmm11 + addq %rbx,%r9 + addq %rdi,%rbx + vpxor %xmm9,%xmm8,%xmm8 + movq %r9,%r13 + addq %rbx,%r14 + vpsllq $3,%xmm6,%xmm10 + shrdq $23,%r13,%r13 + movq %r14,%rbx + vpaddq %xmm8,%xmm7,%xmm7 + movq %r10,%r12 + shrdq $5,%r14,%r14 + vpsrlq $19,%xmm6,%xmm9 + xorq %r9,%r13 + xorq %r11,%r12 + vpxor %xmm10,%xmm11,%xmm11 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + vpsllq $42,%xmm10,%xmm10 + andq %r9,%r12 + xorq %r9,%r13 + vpxor %xmm9,%xmm11,%xmm11 + addq 120(%rsp),%rax + movq %rbx,%rdi + vpsrlq $42,%xmm9,%xmm9 + xorq %r11,%r12 + shrdq $6,%r14,%r14 + vpxor %xmm10,%xmm11,%xmm11 + xorq %rcx,%rdi + addq %r12,%rax + vpxor %xmm9,%xmm11,%xmm11 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + vpaddq %xmm11,%xmm7,%xmm7 + xorq %rbx,%r14 + addq %r13,%rax + vpaddq 96(%rbp),%xmm7,%xmm10 + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + vmovdqa %xmm10,112(%rsp) + cmpb $0,135(%rbp) + jne .Lavx_00_47 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 0(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 8(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 16(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 24(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 32(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 40(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 48(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 56(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rax + movq %r9,%r12 + shrdq $5,%r14,%r14 + xorq %r8,%r13 + xorq %r10,%r12 + shrdq $4,%r13,%r13 + xorq %rax,%r14 + andq %r8,%r12 + xorq %r8,%r13 + addq 64(%rsp),%r11 + movq %rax,%r15 + xorq %r10,%r12 + shrdq $6,%r14,%r14 + xorq %rbx,%r15 + addq %r12,%r11 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rax,%r14 + addq %r13,%r11 + xorq %rbx,%rdi + shrdq $28,%r14,%r14 + addq %r11,%rdx + addq %rdi,%r11 + movq %rdx,%r13 + addq %r11,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r11 + movq %r8,%r12 + shrdq $5,%r14,%r14 + xorq %rdx,%r13 + xorq %r9,%r12 + shrdq $4,%r13,%r13 + xorq %r11,%r14 + andq %rdx,%r12 + xorq %rdx,%r13 + addq 72(%rsp),%r10 + movq %r11,%rdi + xorq %r9,%r12 + shrdq $6,%r14,%r14 + xorq %rax,%rdi + addq %r12,%r10 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r11,%r14 + addq %r13,%r10 + xorq %rax,%r15 + shrdq $28,%r14,%r14 + addq %r10,%rcx + addq %r15,%r10 + movq %rcx,%r13 + addq %r10,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r10 + movq %rdx,%r12 + shrdq $5,%r14,%r14 + xorq %rcx,%r13 + xorq %r8,%r12 + shrdq $4,%r13,%r13 + xorq %r10,%r14 + andq %rcx,%r12 + xorq %rcx,%r13 + addq 80(%rsp),%r9 + movq %r10,%r15 + xorq %r8,%r12 + shrdq $6,%r14,%r14 + xorq %r11,%r15 + addq %r12,%r9 + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r10,%r14 + addq %r13,%r9 + xorq %r11,%rdi + shrdq $28,%r14,%r14 + addq %r9,%rbx + addq %rdi,%r9 + movq %rbx,%r13 + addq %r9,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r9 + movq %rcx,%r12 + shrdq $5,%r14,%r14 + xorq %rbx,%r13 + xorq %rdx,%r12 + shrdq $4,%r13,%r13 + xorq %r9,%r14 + andq %rbx,%r12 + xorq %rbx,%r13 + addq 88(%rsp),%r8 + movq %r9,%rdi + xorq %rdx,%r12 + shrdq $6,%r14,%r14 + xorq %r10,%rdi + addq %r12,%r8 + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %r9,%r14 + addq %r13,%r8 + xorq %r10,%r15 + shrdq $28,%r14,%r14 + addq %r8,%rax + addq %r15,%r8 + movq %rax,%r13 + addq %r8,%r14 + shrdq $23,%r13,%r13 + movq %r14,%r8 + movq %rbx,%r12 + shrdq $5,%r14,%r14 + xorq %rax,%r13 + xorq %rcx,%r12 + shrdq $4,%r13,%r13 + xorq %r8,%r14 + andq %rax,%r12 + xorq %rax,%r13 + addq 96(%rsp),%rdx + movq %r8,%r15 + xorq %rcx,%r12 + shrdq $6,%r14,%r14 + xorq %r9,%r15 + addq %r12,%rdx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %r8,%r14 + addq %r13,%rdx + xorq %r9,%rdi + shrdq $28,%r14,%r14 + addq %rdx,%r11 + addq %rdi,%rdx + movq %r11,%r13 + addq %rdx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rdx + movq %rax,%r12 + shrdq $5,%r14,%r14 + xorq %r11,%r13 + xorq %rbx,%r12 + shrdq $4,%r13,%r13 + xorq %rdx,%r14 + andq %r11,%r12 + xorq %r11,%r13 + addq 104(%rsp),%rcx + movq %rdx,%rdi + xorq %rbx,%r12 + shrdq $6,%r14,%r14 + xorq %r8,%rdi + addq %r12,%rcx + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rdx,%r14 + addq %r13,%rcx + xorq %r8,%r15 + shrdq $28,%r14,%r14 + addq %rcx,%r10 + addq %r15,%rcx + movq %r10,%r13 + addq %rcx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rcx + movq %r11,%r12 + shrdq $5,%r14,%r14 + xorq %r10,%r13 + xorq %rax,%r12 + shrdq $4,%r13,%r13 + xorq %rcx,%r14 + andq %r10,%r12 + xorq %r10,%r13 + addq 112(%rsp),%rbx + movq %rcx,%r15 + xorq %rax,%r12 + shrdq $6,%r14,%r14 + xorq %rdx,%r15 + addq %r12,%rbx + shrdq $14,%r13,%r13 + andq %r15,%rdi + xorq %rcx,%r14 + addq %r13,%rbx + xorq %rdx,%rdi + shrdq $28,%r14,%r14 + addq %rbx,%r9 + addq %rdi,%rbx + movq %r9,%r13 + addq %rbx,%r14 + shrdq $23,%r13,%r13 + movq %r14,%rbx + movq %r10,%r12 + shrdq $5,%r14,%r14 + xorq %r9,%r13 + xorq %r11,%r12 + shrdq $4,%r13,%r13 + xorq %rbx,%r14 + andq %r9,%r12 + xorq %r9,%r13 + addq 120(%rsp),%rax + movq %rbx,%rdi + xorq %r11,%r12 + shrdq $6,%r14,%r14 + xorq %rcx,%rdi + addq %r12,%rax + shrdq $14,%r13,%r13 + andq %rdi,%r15 + xorq %rbx,%r14 + addq %r13,%rax + xorq %rcx,%r15 + shrdq $28,%r14,%r14 + addq %rax,%r8 + addq %r15,%rax + movq %r8,%r13 + addq %rax,%r14 + movq 128+0(%rsp),%rdi + movq %r14,%rax + + addq 0(%rdi),%rax + leaq 128(%rsi),%rsi + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + jb .Lloop_avx + + movq 152(%rsp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx: + RET +.cfi_endproc +SET_SIZE(zfs_sha512_transform_avx) + +ENTRY_ALIGN(zfs_sha512_transform_avx2, 64) +.cfi_startproc + ENDBR + movq %rsp,%rax +.cfi_def_cfa_register %rax + pushq %rbx +.cfi_offset %rbx,-16 + pushq %rbp +.cfi_offset %rbp,-24 + pushq %r12 +.cfi_offset %r12,-32 + pushq %r13 +.cfi_offset %r13,-40 + pushq %r14 +.cfi_offset %r14,-48 + pushq %r15 +.cfi_offset %r15,-56 + subq $1312,%rsp + shlq $4,%rdx + andq $-2048,%rsp + leaq (%rsi,%rdx,8),%rdx + addq $1152,%rsp + movq %rdi,128+0(%rsp) + movq %rsi,128+8(%rsp) + movq %rdx,128+16(%rsp) + movq %rax,152(%rsp) +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 +.Lprologue_avx2: + + vzeroupper + subq $-128,%rsi + movq 0(%rdi),%rax + movq %rsi,%r12 + movq 8(%rdi),%rbx + cmpq %rdx,%rsi + movq 16(%rdi),%rcx + cmoveq %rsp,%r12 + movq 24(%rdi),%rdx + movq 32(%rdi),%r8 + movq 40(%rdi),%r9 + movq 48(%rdi),%r10 + movq 56(%rdi),%r11 + jmp .Loop_avx2 +.balign 16 +.Loop_avx2: + vmovdqu -128(%rsi),%xmm0 + vmovdqu -128+16(%rsi),%xmm1 + vmovdqu -128+32(%rsi),%xmm2 + leaq K512+128(%rip),%rbp + vmovdqu -128+48(%rsi),%xmm3 + vmovdqu -128+64(%rsi),%xmm4 + vmovdqu -128+80(%rsi),%xmm5 + vmovdqu -128+96(%rsi),%xmm6 + vmovdqu -128+112(%rsi),%xmm7 + + vmovdqa 1152(%rbp),%ymm10 + vinserti128 $1,(%r12),%ymm0,%ymm0 + vinserti128 $1,16(%r12),%ymm1,%ymm1 + vpshufb %ymm10,%ymm0,%ymm0 + vinserti128 $1,32(%r12),%ymm2,%ymm2 + vpshufb %ymm10,%ymm1,%ymm1 + vinserti128 $1,48(%r12),%ymm3,%ymm3 + vpshufb %ymm10,%ymm2,%ymm2 + vinserti128 $1,64(%r12),%ymm4,%ymm4 + vpshufb %ymm10,%ymm3,%ymm3 + vinserti128 $1,80(%r12),%ymm5,%ymm5 + vpshufb %ymm10,%ymm4,%ymm4 + vinserti128 $1,96(%r12),%ymm6,%ymm6 + vpshufb %ymm10,%ymm5,%ymm5 + vinserti128 $1,112(%r12),%ymm7,%ymm7 + + vpaddq -128(%rbp),%ymm0,%ymm8 + vpshufb %ymm10,%ymm6,%ymm6 + vpaddq -96(%rbp),%ymm1,%ymm9 + vpshufb %ymm10,%ymm7,%ymm7 + vpaddq -64(%rbp),%ymm2,%ymm10 + vpaddq -32(%rbp),%ymm3,%ymm11 + vmovdqa %ymm8,0(%rsp) + vpaddq 0(%rbp),%ymm4,%ymm8 + vmovdqa %ymm9,32(%rsp) + vpaddq 32(%rbp),%ymm5,%ymm9 + vmovdqa %ymm10,64(%rsp) + vpaddq 64(%rbp),%ymm6,%ymm10 + vmovdqa %ymm11,96(%rsp) + + movq 152(%rsp),%rdi +.cfi_def_cfa %rdi,8 + leaq -128(%rsp),%rsp + + + + movq %rdi,-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpaddq 96(%rbp),%ymm7,%ymm11 + vmovdqa %ymm8,0(%rsp) + xorq %r14,%r14 + vmovdqa %ymm9,32(%rsp) + movq %rbx,%rdi + vmovdqa %ymm10,64(%rsp) + xorq %rcx,%rdi + vmovdqa %ymm11,96(%rsp) + movq %r9,%r12 + addq $32*8,%rbp + jmp .Lavx2_00_47 + +.balign 16 +.Lavx2_00_47: + leaq -128(%rsp),%rsp +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 + + pushq 128-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $8,%ymm0,%ymm1,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm4,%ymm5,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm0,%ymm0 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm7,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm7,%ymm10 + vpaddq %ymm8,%ymm0,%ymm0 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm7,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm0,%ymm0 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq -128(%rbp),%ymm0,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm1,%ymm2,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm5,%ymm6,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm1,%ymm1 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm0,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm0,%ymm10 + vpaddq %ymm8,%ymm1,%ymm1 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm0,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm1,%ymm1 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq -96(%rbp),%ymm1,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm2,%ymm3,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm6,%ymm7,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm2,%ymm2 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm1,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm1,%ymm10 + vpaddq %ymm8,%ymm2,%ymm2 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm1,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm2,%ymm2 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq -64(%rbp),%ymm2,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm3,%ymm4,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm7,%ymm0,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm3,%ymm3 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm2,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm2,%ymm10 + vpaddq %ymm8,%ymm3,%ymm3 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm2,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm3,%ymm3 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq -32(%rbp),%ymm3,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq -128(%rsp),%rsp +.cfi_escape 0x0f,0x06,0x77,0xf8,0x00,0x06,0x23,0x08 + + pushq 128-8(%rsp) +.cfi_escape 0x0f,0x05,0x77,0x00,0x06,0x23,0x08 + leaq 8(%rsp),%rsp +.cfi_escape 0x0f,0x05,0x77,0x78,0x06,0x23,0x08 + vpalignr $8,%ymm4,%ymm5,%ymm8 + addq 0+256(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + vpalignr $8,%ymm0,%ymm1,%ymm11 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + vpsrlq $1,%ymm8,%ymm10 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + vpaddq %ymm11,%ymm4,%ymm4 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + vpsrlq $6,%ymm3,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + vpsllq $3,%ymm3,%ymm10 + vpaddq %ymm8,%ymm4,%ymm4 + addq 8+256(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + vpsrlq $19,%ymm3,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + vpaddq %ymm11,%ymm4,%ymm4 + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + vpaddq 0(%rbp),%ymm4,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + vmovdqa %ymm10,0(%rsp) + vpalignr $8,%ymm5,%ymm6,%ymm8 + addq 32+256(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + vpalignr $8,%ymm1,%ymm2,%ymm11 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + vpsrlq $1,%ymm8,%ymm10 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + vpaddq %ymm11,%ymm5,%ymm5 + vpsrlq $7,%ymm8,%ymm11 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + vpsrlq $6,%ymm4,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + vpsllq $3,%ymm4,%ymm10 + vpaddq %ymm8,%ymm5,%ymm5 + addq 40+256(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + vpsrlq $19,%ymm4,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + vpaddq %ymm11,%ymm5,%ymm5 + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + vpaddq 32(%rbp),%ymm5,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + vmovdqa %ymm10,32(%rsp) + vpalignr $8,%ymm6,%ymm7,%ymm8 + addq 64+256(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + vpalignr $8,%ymm2,%ymm3,%ymm11 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + vpsrlq $1,%ymm8,%ymm10 + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + vpaddq %ymm11,%ymm6,%ymm6 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + vpsrlq $6,%ymm5,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + vpsllq $3,%ymm5,%ymm10 + vpaddq %ymm8,%ymm6,%ymm6 + addq 72+256(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + vpsrlq $19,%ymm5,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + vpaddq %ymm11,%ymm6,%ymm6 + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + vpaddq 64(%rbp),%ymm6,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + vmovdqa %ymm10,64(%rsp) + vpalignr $8,%ymm7,%ymm0,%ymm8 + addq 96+256(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + vpalignr $8,%ymm3,%ymm4,%ymm11 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + vpsrlq $1,%ymm8,%ymm10 + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + vpaddq %ymm11,%ymm7,%ymm7 + vpsrlq $7,%ymm8,%ymm11 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + vpsllq $56,%ymm8,%ymm9 + vpxor %ymm10,%ymm11,%ymm8 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + vpsrlq $7,%ymm10,%ymm10 + vpxor %ymm9,%ymm8,%ymm8 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + vpsllq $7,%ymm9,%ymm9 + vpxor %ymm10,%ymm8,%ymm8 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + vpsrlq $6,%ymm6,%ymm11 + vpxor %ymm9,%ymm8,%ymm8 + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + vpsllq $3,%ymm6,%ymm10 + vpaddq %ymm8,%ymm7,%ymm7 + addq 104+256(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + vpsrlq $19,%ymm6,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + vpsllq $42,%ymm10,%ymm10 + vpxor %ymm9,%ymm11,%ymm11 + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + vpsrlq $42,%ymm9,%ymm9 + vpxor %ymm10,%ymm11,%ymm11 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + vpxor %ymm9,%ymm11,%ymm11 + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + vpaddq %ymm11,%ymm7,%ymm7 + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + vpaddq 96(%rbp),%ymm7,%ymm10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + vmovdqa %ymm10,96(%rsp) + leaq 256(%rbp),%rbp + cmpb $0,-121(%rbp) + jne .Lavx2_00_47 + addq 0+128(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+128(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+128(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+128(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+128(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+128(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+128(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+128(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + addq 0(%rsp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8(%rsp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32(%rsp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40(%rsp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64(%rsp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72(%rsp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96(%rsp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104(%rsp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rbp + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + addq 48(%rdi),%r10 + addq 56(%rdi),%r11 + + movq %rax,0(%rdi) + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + cmpq 144(%rbp),%rsi + je .Ldone_avx2 + + xorq %r14,%r14 + movq %rbx,%rdi + xorq %rcx,%rdi + movq %r9,%r12 + jmp .Lower_avx2 +.balign 16 +.Lower_avx2: + addq 0+16(%rbp),%r11 + andq %r8,%r12 + rorxq $41,%r8,%r13 + rorxq $18,%r8,%r15 + leaq (%rax,%r14,1),%rax + leaq (%r11,%r12,1),%r11 + andnq %r10,%r8,%r12 + xorq %r15,%r13 + rorxq $14,%r8,%r14 + leaq (%r11,%r12,1),%r11 + xorq %r14,%r13 + movq %rax,%r15 + rorxq $39,%rax,%r12 + leaq (%r11,%r13,1),%r11 + xorq %rbx,%r15 + rorxq $34,%rax,%r14 + rorxq $28,%rax,%r13 + leaq (%rdx,%r11,1),%rdx + andq %r15,%rdi + xorq %r12,%r14 + xorq %rbx,%rdi + xorq %r13,%r14 + leaq (%r11,%rdi,1),%r11 + movq %r8,%r12 + addq 8+16(%rbp),%r10 + andq %rdx,%r12 + rorxq $41,%rdx,%r13 + rorxq $18,%rdx,%rdi + leaq (%r11,%r14,1),%r11 + leaq (%r10,%r12,1),%r10 + andnq %r9,%rdx,%r12 + xorq %rdi,%r13 + rorxq $14,%rdx,%r14 + leaq (%r10,%r12,1),%r10 + xorq %r14,%r13 + movq %r11,%rdi + rorxq $39,%r11,%r12 + leaq (%r10,%r13,1),%r10 + xorq %rax,%rdi + rorxq $34,%r11,%r14 + rorxq $28,%r11,%r13 + leaq (%rcx,%r10,1),%rcx + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rax,%r15 + xorq %r13,%r14 + leaq (%r10,%r15,1),%r10 + movq %rdx,%r12 + addq 32+16(%rbp),%r9 + andq %rcx,%r12 + rorxq $41,%rcx,%r13 + rorxq $18,%rcx,%r15 + leaq (%r10,%r14,1),%r10 + leaq (%r9,%r12,1),%r9 + andnq %r8,%rcx,%r12 + xorq %r15,%r13 + rorxq $14,%rcx,%r14 + leaq (%r9,%r12,1),%r9 + xorq %r14,%r13 + movq %r10,%r15 + rorxq $39,%r10,%r12 + leaq (%r9,%r13,1),%r9 + xorq %r11,%r15 + rorxq $34,%r10,%r14 + rorxq $28,%r10,%r13 + leaq (%rbx,%r9,1),%rbx + andq %r15,%rdi + xorq %r12,%r14 + xorq %r11,%rdi + xorq %r13,%r14 + leaq (%r9,%rdi,1),%r9 + movq %rcx,%r12 + addq 40+16(%rbp),%r8 + andq %rbx,%r12 + rorxq $41,%rbx,%r13 + rorxq $18,%rbx,%rdi + leaq (%r9,%r14,1),%r9 + leaq (%r8,%r12,1),%r8 + andnq %rdx,%rbx,%r12 + xorq %rdi,%r13 + rorxq $14,%rbx,%r14 + leaq (%r8,%r12,1),%r8 + xorq %r14,%r13 + movq %r9,%rdi + rorxq $39,%r9,%r12 + leaq (%r8,%r13,1),%r8 + xorq %r10,%rdi + rorxq $34,%r9,%r14 + rorxq $28,%r9,%r13 + leaq (%rax,%r8,1),%rax + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r10,%r15 + xorq %r13,%r14 + leaq (%r8,%r15,1),%r8 + movq %rbx,%r12 + addq 64+16(%rbp),%rdx + andq %rax,%r12 + rorxq $41,%rax,%r13 + rorxq $18,%rax,%r15 + leaq (%r8,%r14,1),%r8 + leaq (%rdx,%r12,1),%rdx + andnq %rcx,%rax,%r12 + xorq %r15,%r13 + rorxq $14,%rax,%r14 + leaq (%rdx,%r12,1),%rdx + xorq %r14,%r13 + movq %r8,%r15 + rorxq $39,%r8,%r12 + leaq (%rdx,%r13,1),%rdx + xorq %r9,%r15 + rorxq $34,%r8,%r14 + rorxq $28,%r8,%r13 + leaq (%r11,%rdx,1),%r11 + andq %r15,%rdi + xorq %r12,%r14 + xorq %r9,%rdi + xorq %r13,%r14 + leaq (%rdx,%rdi,1),%rdx + movq %rax,%r12 + addq 72+16(%rbp),%rcx + andq %r11,%r12 + rorxq $41,%r11,%r13 + rorxq $18,%r11,%rdi + leaq (%rdx,%r14,1),%rdx + leaq (%rcx,%r12,1),%rcx + andnq %rbx,%r11,%r12 + xorq %rdi,%r13 + rorxq $14,%r11,%r14 + leaq (%rcx,%r12,1),%rcx + xorq %r14,%r13 + movq %rdx,%rdi + rorxq $39,%rdx,%r12 + leaq (%rcx,%r13,1),%rcx + xorq %r8,%rdi + rorxq $34,%rdx,%r14 + rorxq $28,%rdx,%r13 + leaq (%r10,%rcx,1),%r10 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %r8,%r15 + xorq %r13,%r14 + leaq (%rcx,%r15,1),%rcx + movq %r11,%r12 + addq 96+16(%rbp),%rbx + andq %r10,%r12 + rorxq $41,%r10,%r13 + rorxq $18,%r10,%r15 + leaq (%rcx,%r14,1),%rcx + leaq (%rbx,%r12,1),%rbx + andnq %rax,%r10,%r12 + xorq %r15,%r13 + rorxq $14,%r10,%r14 + leaq (%rbx,%r12,1),%rbx + xorq %r14,%r13 + movq %rcx,%r15 + rorxq $39,%rcx,%r12 + leaq (%rbx,%r13,1),%rbx + xorq %rdx,%r15 + rorxq $34,%rcx,%r14 + rorxq $28,%rcx,%r13 + leaq (%r9,%rbx,1),%r9 + andq %r15,%rdi + xorq %r12,%r14 + xorq %rdx,%rdi + xorq %r13,%r14 + leaq (%rbx,%rdi,1),%rbx + movq %r10,%r12 + addq 104+16(%rbp),%rax + andq %r9,%r12 + rorxq $41,%r9,%r13 + rorxq $18,%r9,%rdi + leaq (%rbx,%r14,1),%rbx + leaq (%rax,%r12,1),%rax + andnq %r11,%r9,%r12 + xorq %rdi,%r13 + rorxq $14,%r9,%r14 + leaq (%rax,%r12,1),%rax + xorq %r14,%r13 + movq %rbx,%rdi + rorxq $39,%rbx,%r12 + leaq (%rax,%r13,1),%rax + xorq %rcx,%rdi + rorxq $34,%rbx,%r14 + rorxq $28,%rbx,%r13 + leaq (%r8,%rax,1),%r8 + andq %rdi,%r15 + xorq %r12,%r14 + xorq %rcx,%r15 + xorq %r13,%r14 + leaq (%rax,%r15,1),%rax + movq %r9,%r12 + leaq -128(%rbp),%rbp + cmpq %rsp,%rbp + jae .Lower_avx2 + + movq 1280(%rsp),%rdi + addq %r14,%rax + + leaq 1152(%rsp),%rsp + +.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x08 + + addq 0(%rdi),%rax + addq 8(%rdi),%rbx + addq 16(%rdi),%rcx + addq 24(%rdi),%rdx + addq 32(%rdi),%r8 + addq 40(%rdi),%r9 + leaq 256(%rsi),%rsi + addq 48(%rdi),%r10 + movq %rsi,%r12 + addq 56(%rdi),%r11 + cmpq 128+16(%rsp),%rsi + + movq %rax,0(%rdi) + cmoveq %rsp,%r12 + movq %rbx,8(%rdi) + movq %rcx,16(%rdi) + movq %rdx,24(%rdi) + movq %r8,32(%rdi) + movq %r9,40(%rdi) + movq %r10,48(%rdi) + movq %r11,56(%rdi) + + jbe .Loop_avx2 + leaq (%rsp),%rbp + +.cfi_escape 0x0f,0x06,0x76,0x98,0x01,0x06,0x23,0x08 + +.Ldone_avx2: + movq 152(%rbp),%rsi +.cfi_def_cfa %rsi,8 + vzeroupper + movq -48(%rsi),%r15 +.cfi_restore %r15 + movq -40(%rsi),%r14 +.cfi_restore %r14 + movq -32(%rsi),%r13 +.cfi_restore %r13 + movq -24(%rsi),%r12 +.cfi_restore %r12 + movq -16(%rsi),%rbp +.cfi_restore %rbp + movq -8(%rsi),%rbx +.cfi_restore %rbx + leaq (%rsi),%rsp +.cfi_def_cfa_register %rsp +.Lepilogue_avx2: + RET +.cfi_endproc +SET_SIZE(zfs_sha512_transform_avx2) + +#if defined(__ELF__) + .section .note.GNU-stack,"",%progbits +#endif +#endif diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S deleted file mode 100644 index 746c85a98566..000000000000 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S +++ /dev/null @@ -1,2114 +0,0 @@ -/* - * ==================================================================== - * Written by Andy Polyakov <appro@fy.chalmers.se> for the OpenSSL - * project. Rights for redistribution and usage in source and binary - * forms are granted according to the OpenSSL license. - * ==================================================================== - * - * sha256/512_block procedure for x86_64. - * - * 40% improvement over compiler-generated code on Opteron. On EM64T - * sha256 was observed to run >80% faster and sha512 - >40%. No magical - * tricks, just straight implementation... I really wonder why gcc - * [being armed with inline assembler] fails to generate as fast code. - * The only thing which is cool about this module is that it's very - * same instruction sequence used for both SHA-256 and SHA-512. In - * former case the instructions operate on 32-bit operands, while in - * latter - on 64-bit ones. All I had to do is to get one flavor right, - * the other one passed the test right away:-) - * - * sha256_block runs in ~1005 cycles on Opteron, which gives you - * asymptotic performance of 64*1000/1005=63.7MBps times CPU clock - * frequency in GHz. sha512_block runs in ~1275 cycles, which results - * in 128*1000/1275=100MBps per GHz. Is there room for improvement? - * Well, if you compare it to IA-64 implementation, which maintains - * X[16] in register bank[!], tends to 4 instructions per CPU clock - * cycle and runs in 1003 cycles, 1275 is very good result for 3-way - * issue Opteron pipeline and X[16] maintained in memory. So that *if* - * there is a way to improve it, *then* the only way would be to try to - * offload X[16] updates to SSE unit, but that would require "deeper" - * loop unroll, which in turn would naturally cause size blow-up, not - * to mention increased complexity! And once again, only *if* it's - * actually possible to noticeably improve overall ILP, instruction - * level parallelism, on a given CPU implementation in this case. - * - * Special note on Intel EM64T. While Opteron CPU exhibits perfect - * performance ratio of 1.5 between 64- and 32-bit flavors [see above], - * [currently available] EM64T CPUs apparently are far from it. On the - * contrary, 64-bit version, sha512_block, is ~30% *slower* than 32-bit - * sha256_block:-( This is presumably because 64-bit shifts/rotates - * apparently are not atomic instructions, but implemented in microcode. - */ - -/* - * OpenSolaris OS modifications - * - * Sun elects to use this software under the BSD license. - * - * This source originates from OpenSSL file sha512-x86_64.pl at - * ftp://ftp.openssl.org/snapshot/openssl-0.9.8-stable-SNAP-20080131.tar.gz - * (presumably for future OpenSSL release 0.9.8h), with these changes: - * - * 1. Added perl "use strict" and declared variables. - * - * 2. Added OpenSolaris ENTRY_NP/SET_SIZE macros from - * /usr/include/sys/asm_linkage.h, .ident keywords, and lint(1B) guards. - * - * 3. Removed x86_64-xlate.pl script (not needed for as(1) or gas(1) - * assemblers). Replaced the .picmeup macro with assembler code. - * - * 4. Added 8 to $ctx, as OpenSolaris OS has an extra 4-byte field, "algotype", - * at the beginning of SHA2_CTX (the next field is 8-byte aligned). - */ - -/* - * This file was generated by a perl script (sha512-x86_64.pl) that were - * used to generate sha256 and sha512 variants from the same code base. - * The comments from the original file have been pasted above. - */ - - -#if defined(lint) || defined(__lint) -#include <sys/stdint.h> -#include <sha2/sha2.h> - -/* ARGSUSED */ -void -SHA512TransformBlocks(SHA2_CTX *ctx, const void *in, size_t num) -{ -} - - -#else -#define _ASM -#include <sys/asm_linkage.h> - -ENTRY_NP(SHA512TransformBlocks) -.cfi_startproc - movq %rsp, %rax -.cfi_def_cfa_register %rax - push %rbx -.cfi_offset %rbx,-16 - push %rbp -.cfi_offset %rbp,-24 - push %r12 -.cfi_offset %r12,-32 - push %r13 -.cfi_offset %r13,-40 - push %r14 -.cfi_offset %r14,-48 - push %r15 -.cfi_offset %r15,-56 - mov %rsp,%rbp # copy %rsp - shl $4,%rdx # num*16 - sub $16*8+4*8,%rsp - lea (%rsi,%rdx,8),%rdx # inp+num*16*8 - and $-64,%rsp # align stack frame - add $8,%rdi # Skip OpenSolaris field, "algotype" - mov %rdi,16*8+0*8(%rsp) # save ctx, 1st arg - mov %rsi,16*8+1*8(%rsp) # save inp, 2nd arg - mov %rdx,16*8+2*8(%rsp) # save end pointer, "3rd" arg - mov %rbp,16*8+3*8(%rsp) # save copy of %rsp -# echo ".cfi_cfa_expression %rsp+152,deref,+56" | -# openssl/crypto/perlasm/x86_64-xlate.pl -.cfi_escape 0x0f,0x06,0x77,0x98,0x01,0x06,0x23,0x38 - - #.picmeup %rbp - # The .picmeup pseudo-directive, from perlasm/x86_64_xlate.pl, puts - # the address of the "next" instruction into the target register - # (%rbp). This generates these 2 instructions: - lea .Llea(%rip),%rbp - #nop # .picmeup generates a nop for mod 8 alignment--not needed here - -.Llea: - lea K512-.(%rbp),%rbp - - mov 8*0(%rdi),%rax - mov 8*1(%rdi),%rbx - mov 8*2(%rdi),%rcx - mov 8*3(%rdi),%rdx - mov 8*4(%rdi),%r8 - mov 8*5(%rdi),%r9 - mov 8*6(%rdi),%r10 - mov 8*7(%rdi),%r11 - jmp .Lloop - -.align 16 -.Lloop: - xor %rdi,%rdi - mov 8*0(%rsi),%r12 - bswap %r12 - mov %r8,%r13 - mov %r8,%r14 - mov %r9,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r10,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r8,%r15 # (f^g)&e - mov %r12,0(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r11,%r12 # T1+=h - - mov %rax,%r11 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rax,%r13 - mov %rax,%r14 - - ror $28,%r11 - ror $34,%r13 - mov %rax,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r11 - ror $5,%r13 - or %rcx,%r14 # a|c - - xor %r13,%r11 # h=Sigma0(a) - and %rcx,%r15 # a&c - add %r12,%rdx # d+=T1 - - and %rbx,%r14 # (a|c)&b - add %r12,%r11 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r11 # h+=Maj(a,b,c) - mov 8*1(%rsi),%r12 - bswap %r12 - mov %rdx,%r13 - mov %rdx,%r14 - mov %r8,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r9,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rdx,%r15 # (f^g)&e - mov %r12,8(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r10,%r12 # T1+=h - - mov %r11,%r10 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r11,%r13 - mov %r11,%r14 - - ror $28,%r10 - ror $34,%r13 - mov %r11,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r10 - ror $5,%r13 - or %rbx,%r14 # a|c - - xor %r13,%r10 # h=Sigma0(a) - and %rbx,%r15 # a&c - add %r12,%rcx # d+=T1 - - and %rax,%r14 # (a|c)&b - add %r12,%r10 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r10 # h+=Maj(a,b,c) - mov 8*2(%rsi),%r12 - bswap %r12 - mov %rcx,%r13 - mov %rcx,%r14 - mov %rdx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r8,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rcx,%r15 # (f^g)&e - mov %r12,16(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r9,%r12 # T1+=h - - mov %r10,%r9 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r10,%r13 - mov %r10,%r14 - - ror $28,%r9 - ror $34,%r13 - mov %r10,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r9 - ror $5,%r13 - or %rax,%r14 # a|c - - xor %r13,%r9 # h=Sigma0(a) - and %rax,%r15 # a&c - add %r12,%rbx # d+=T1 - - and %r11,%r14 # (a|c)&b - add %r12,%r9 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r9 # h+=Maj(a,b,c) - mov 8*3(%rsi),%r12 - bswap %r12 - mov %rbx,%r13 - mov %rbx,%r14 - mov %rcx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rdx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rbx,%r15 # (f^g)&e - mov %r12,24(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r8,%r12 # T1+=h - - mov %r9,%r8 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r9,%r13 - mov %r9,%r14 - - ror $28,%r8 - ror $34,%r13 - mov %r9,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r8 - ror $5,%r13 - or %r11,%r14 # a|c - - xor %r13,%r8 # h=Sigma0(a) - and %r11,%r15 # a&c - add %r12,%rax # d+=T1 - - and %r10,%r14 # (a|c)&b - add %r12,%r8 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r8 # h+=Maj(a,b,c) - mov 8*4(%rsi),%r12 - bswap %r12 - mov %rax,%r13 - mov %rax,%r14 - mov %rbx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rcx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rax,%r15 # (f^g)&e - mov %r12,32(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rdx,%r12 # T1+=h - - mov %r8,%rdx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r8,%r13 - mov %r8,%r14 - - ror $28,%rdx - ror $34,%r13 - mov %r8,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rdx - ror $5,%r13 - or %r10,%r14 # a|c - - xor %r13,%rdx # h=Sigma0(a) - and %r10,%r15 # a&c - add %r12,%r11 # d+=T1 - - and %r9,%r14 # (a|c)&b - add %r12,%rdx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rdx # h+=Maj(a,b,c) - mov 8*5(%rsi),%r12 - bswap %r12 - mov %r11,%r13 - mov %r11,%r14 - mov %rax,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rbx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r11,%r15 # (f^g)&e - mov %r12,40(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rcx,%r12 # T1+=h - - mov %rdx,%rcx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rdx,%r13 - mov %rdx,%r14 - - ror $28,%rcx - ror $34,%r13 - mov %rdx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rcx - ror $5,%r13 - or %r9,%r14 # a|c - - xor %r13,%rcx # h=Sigma0(a) - and %r9,%r15 # a&c - add %r12,%r10 # d+=T1 - - and %r8,%r14 # (a|c)&b - add %r12,%rcx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rcx # h+=Maj(a,b,c) - mov 8*6(%rsi),%r12 - bswap %r12 - mov %r10,%r13 - mov %r10,%r14 - mov %r11,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rax,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r10,%r15 # (f^g)&e - mov %r12,48(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rbx,%r12 # T1+=h - - mov %rcx,%rbx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rcx,%r13 - mov %rcx,%r14 - - ror $28,%rbx - ror $34,%r13 - mov %rcx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rbx - ror $5,%r13 - or %r8,%r14 # a|c - - xor %r13,%rbx # h=Sigma0(a) - and %r8,%r15 # a&c - add %r12,%r9 # d+=T1 - - and %rdx,%r14 # (a|c)&b - add %r12,%rbx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rbx # h+=Maj(a,b,c) - mov 8*7(%rsi),%r12 - bswap %r12 - mov %r9,%r13 - mov %r9,%r14 - mov %r10,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r11,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r9,%r15 # (f^g)&e - mov %r12,56(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rax,%r12 # T1+=h - - mov %rbx,%rax - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rbx,%r13 - mov %rbx,%r14 - - ror $28,%rax - ror $34,%r13 - mov %rbx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rax - ror $5,%r13 - or %rdx,%r14 # a|c - - xor %r13,%rax # h=Sigma0(a) - and %rdx,%r15 # a&c - add %r12,%r8 # d+=T1 - - and %rcx,%r14 # (a|c)&b - add %r12,%rax # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rax # h+=Maj(a,b,c) - mov 8*8(%rsi),%r12 - bswap %r12 - mov %r8,%r13 - mov %r8,%r14 - mov %r9,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r10,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r8,%r15 # (f^g)&e - mov %r12,64(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r11,%r12 # T1+=h - - mov %rax,%r11 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rax,%r13 - mov %rax,%r14 - - ror $28,%r11 - ror $34,%r13 - mov %rax,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r11 - ror $5,%r13 - or %rcx,%r14 # a|c - - xor %r13,%r11 # h=Sigma0(a) - and %rcx,%r15 # a&c - add %r12,%rdx # d+=T1 - - and %rbx,%r14 # (a|c)&b - add %r12,%r11 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r11 # h+=Maj(a,b,c) - mov 8*9(%rsi),%r12 - bswap %r12 - mov %rdx,%r13 - mov %rdx,%r14 - mov %r8,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r9,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rdx,%r15 # (f^g)&e - mov %r12,72(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r10,%r12 # T1+=h - - mov %r11,%r10 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r11,%r13 - mov %r11,%r14 - - ror $28,%r10 - ror $34,%r13 - mov %r11,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r10 - ror $5,%r13 - or %rbx,%r14 # a|c - - xor %r13,%r10 # h=Sigma0(a) - and %rbx,%r15 # a&c - add %r12,%rcx # d+=T1 - - and %rax,%r14 # (a|c)&b - add %r12,%r10 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r10 # h+=Maj(a,b,c) - mov 8*10(%rsi),%r12 - bswap %r12 - mov %rcx,%r13 - mov %rcx,%r14 - mov %rdx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r8,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rcx,%r15 # (f^g)&e - mov %r12,80(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r9,%r12 # T1+=h - - mov %r10,%r9 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r10,%r13 - mov %r10,%r14 - - ror $28,%r9 - ror $34,%r13 - mov %r10,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r9 - ror $5,%r13 - or %rax,%r14 # a|c - - xor %r13,%r9 # h=Sigma0(a) - and %rax,%r15 # a&c - add %r12,%rbx # d+=T1 - - and %r11,%r14 # (a|c)&b - add %r12,%r9 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r9 # h+=Maj(a,b,c) - mov 8*11(%rsi),%r12 - bswap %r12 - mov %rbx,%r13 - mov %rbx,%r14 - mov %rcx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rdx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rbx,%r15 # (f^g)&e - mov %r12,88(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r8,%r12 # T1+=h - - mov %r9,%r8 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r9,%r13 - mov %r9,%r14 - - ror $28,%r8 - ror $34,%r13 - mov %r9,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r8 - ror $5,%r13 - or %r11,%r14 # a|c - - xor %r13,%r8 # h=Sigma0(a) - and %r11,%r15 # a&c - add %r12,%rax # d+=T1 - - and %r10,%r14 # (a|c)&b - add %r12,%r8 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r8 # h+=Maj(a,b,c) - mov 8*12(%rsi),%r12 - bswap %r12 - mov %rax,%r13 - mov %rax,%r14 - mov %rbx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rcx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rax,%r15 # (f^g)&e - mov %r12,96(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rdx,%r12 # T1+=h - - mov %r8,%rdx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r8,%r13 - mov %r8,%r14 - - ror $28,%rdx - ror $34,%r13 - mov %r8,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rdx - ror $5,%r13 - or %r10,%r14 # a|c - - xor %r13,%rdx # h=Sigma0(a) - and %r10,%r15 # a&c - add %r12,%r11 # d+=T1 - - and %r9,%r14 # (a|c)&b - add %r12,%rdx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rdx # h+=Maj(a,b,c) - mov 8*13(%rsi),%r12 - bswap %r12 - mov %r11,%r13 - mov %r11,%r14 - mov %rax,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rbx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r11,%r15 # (f^g)&e - mov %r12,104(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rcx,%r12 # T1+=h - - mov %rdx,%rcx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rdx,%r13 - mov %rdx,%r14 - - ror $28,%rcx - ror $34,%r13 - mov %rdx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rcx - ror $5,%r13 - or %r9,%r14 # a|c - - xor %r13,%rcx # h=Sigma0(a) - and %r9,%r15 # a&c - add %r12,%r10 # d+=T1 - - and %r8,%r14 # (a|c)&b - add %r12,%rcx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rcx # h+=Maj(a,b,c) - mov 8*14(%rsi),%r12 - bswap %r12 - mov %r10,%r13 - mov %r10,%r14 - mov %r11,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rax,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r10,%r15 # (f^g)&e - mov %r12,112(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rbx,%r12 # T1+=h - - mov %rcx,%rbx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rcx,%r13 - mov %rcx,%r14 - - ror $28,%rbx - ror $34,%r13 - mov %rcx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rbx - ror $5,%r13 - or %r8,%r14 # a|c - - xor %r13,%rbx # h=Sigma0(a) - and %r8,%r15 # a&c - add %r12,%r9 # d+=T1 - - and %rdx,%r14 # (a|c)&b - add %r12,%rbx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rbx # h+=Maj(a,b,c) - mov 8*15(%rsi),%r12 - bswap %r12 - mov %r9,%r13 - mov %r9,%r14 - mov %r10,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r11,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r9,%r15 # (f^g)&e - mov %r12,120(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rax,%r12 # T1+=h - - mov %rbx,%rax - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rbx,%r13 - mov %rbx,%r14 - - ror $28,%rax - ror $34,%r13 - mov %rbx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rax - ror $5,%r13 - or %rdx,%r14 # a|c - - xor %r13,%rax # h=Sigma0(a) - and %rdx,%r15 # a&c - add %r12,%r8 # d+=T1 - - and %rcx,%r14 # (a|c)&b - add %r12,%rax # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rax # h+=Maj(a,b,c) - jmp .Lrounds_16_xx -.align 16 -.Lrounds_16_xx: - mov 8(%rsp),%r13 - mov 112(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 72(%rsp),%r12 - - add 0(%rsp),%r12 - mov %r8,%r13 - mov %r8,%r14 - mov %r9,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r10,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r8,%r15 # (f^g)&e - mov %r12,0(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r11,%r12 # T1+=h - - mov %rax,%r11 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rax,%r13 - mov %rax,%r14 - - ror $28,%r11 - ror $34,%r13 - mov %rax,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r11 - ror $5,%r13 - or %rcx,%r14 # a|c - - xor %r13,%r11 # h=Sigma0(a) - and %rcx,%r15 # a&c - add %r12,%rdx # d+=T1 - - and %rbx,%r14 # (a|c)&b - add %r12,%r11 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r11 # h+=Maj(a,b,c) - mov 16(%rsp),%r13 - mov 120(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 80(%rsp),%r12 - - add 8(%rsp),%r12 - mov %rdx,%r13 - mov %rdx,%r14 - mov %r8,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r9,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rdx,%r15 # (f^g)&e - mov %r12,8(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r10,%r12 # T1+=h - - mov %r11,%r10 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r11,%r13 - mov %r11,%r14 - - ror $28,%r10 - ror $34,%r13 - mov %r11,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r10 - ror $5,%r13 - or %rbx,%r14 # a|c - - xor %r13,%r10 # h=Sigma0(a) - and %rbx,%r15 # a&c - add %r12,%rcx # d+=T1 - - and %rax,%r14 # (a|c)&b - add %r12,%r10 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r10 # h+=Maj(a,b,c) - mov 24(%rsp),%r13 - mov 0(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 88(%rsp),%r12 - - add 16(%rsp),%r12 - mov %rcx,%r13 - mov %rcx,%r14 - mov %rdx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r8,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rcx,%r15 # (f^g)&e - mov %r12,16(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r9,%r12 # T1+=h - - mov %r10,%r9 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r10,%r13 - mov %r10,%r14 - - ror $28,%r9 - ror $34,%r13 - mov %r10,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r9 - ror $5,%r13 - or %rax,%r14 # a|c - - xor %r13,%r9 # h=Sigma0(a) - and %rax,%r15 # a&c - add %r12,%rbx # d+=T1 - - and %r11,%r14 # (a|c)&b - add %r12,%r9 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r9 # h+=Maj(a,b,c) - mov 32(%rsp),%r13 - mov 8(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 96(%rsp),%r12 - - add 24(%rsp),%r12 - mov %rbx,%r13 - mov %rbx,%r14 - mov %rcx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rdx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rbx,%r15 # (f^g)&e - mov %r12,24(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r8,%r12 # T1+=h - - mov %r9,%r8 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r9,%r13 - mov %r9,%r14 - - ror $28,%r8 - ror $34,%r13 - mov %r9,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r8 - ror $5,%r13 - or %r11,%r14 # a|c - - xor %r13,%r8 # h=Sigma0(a) - and %r11,%r15 # a&c - add %r12,%rax # d+=T1 - - and %r10,%r14 # (a|c)&b - add %r12,%r8 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r8 # h+=Maj(a,b,c) - mov 40(%rsp),%r13 - mov 16(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 104(%rsp),%r12 - - add 32(%rsp),%r12 - mov %rax,%r13 - mov %rax,%r14 - mov %rbx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rcx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rax,%r15 # (f^g)&e - mov %r12,32(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rdx,%r12 # T1+=h - - mov %r8,%rdx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r8,%r13 - mov %r8,%r14 - - ror $28,%rdx - ror $34,%r13 - mov %r8,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rdx - ror $5,%r13 - or %r10,%r14 # a|c - - xor %r13,%rdx # h=Sigma0(a) - and %r10,%r15 # a&c - add %r12,%r11 # d+=T1 - - and %r9,%r14 # (a|c)&b - add %r12,%rdx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rdx # h+=Maj(a,b,c) - mov 48(%rsp),%r13 - mov 24(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 112(%rsp),%r12 - - add 40(%rsp),%r12 - mov %r11,%r13 - mov %r11,%r14 - mov %rax,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rbx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r11,%r15 # (f^g)&e - mov %r12,40(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rcx,%r12 # T1+=h - - mov %rdx,%rcx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rdx,%r13 - mov %rdx,%r14 - - ror $28,%rcx - ror $34,%r13 - mov %rdx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rcx - ror $5,%r13 - or %r9,%r14 # a|c - - xor %r13,%rcx # h=Sigma0(a) - and %r9,%r15 # a&c - add %r12,%r10 # d+=T1 - - and %r8,%r14 # (a|c)&b - add %r12,%rcx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rcx # h+=Maj(a,b,c) - mov 56(%rsp),%r13 - mov 32(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 120(%rsp),%r12 - - add 48(%rsp),%r12 - mov %r10,%r13 - mov %r10,%r14 - mov %r11,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rax,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r10,%r15 # (f^g)&e - mov %r12,48(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rbx,%r12 # T1+=h - - mov %rcx,%rbx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rcx,%r13 - mov %rcx,%r14 - - ror $28,%rbx - ror $34,%r13 - mov %rcx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rbx - ror $5,%r13 - or %r8,%r14 # a|c - - xor %r13,%rbx # h=Sigma0(a) - and %r8,%r15 # a&c - add %r12,%r9 # d+=T1 - - and %rdx,%r14 # (a|c)&b - add %r12,%rbx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rbx # h+=Maj(a,b,c) - mov 64(%rsp),%r13 - mov 40(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 0(%rsp),%r12 - - add 56(%rsp),%r12 - mov %r9,%r13 - mov %r9,%r14 - mov %r10,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r11,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r9,%r15 # (f^g)&e - mov %r12,56(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rax,%r12 # T1+=h - - mov %rbx,%rax - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rbx,%r13 - mov %rbx,%r14 - - ror $28,%rax - ror $34,%r13 - mov %rbx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rax - ror $5,%r13 - or %rdx,%r14 # a|c - - xor %r13,%rax # h=Sigma0(a) - and %rdx,%r15 # a&c - add %r12,%r8 # d+=T1 - - and %rcx,%r14 # (a|c)&b - add %r12,%rax # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rax # h+=Maj(a,b,c) - mov 72(%rsp),%r13 - mov 48(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 8(%rsp),%r12 - - add 64(%rsp),%r12 - mov %r8,%r13 - mov %r8,%r14 - mov %r9,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r10,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r8,%r15 # (f^g)&e - mov %r12,64(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r10,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r11,%r12 # T1+=h - - mov %rax,%r11 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rax,%r13 - mov %rax,%r14 - - ror $28,%r11 - ror $34,%r13 - mov %rax,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r11 - ror $5,%r13 - or %rcx,%r14 # a|c - - xor %r13,%r11 # h=Sigma0(a) - and %rcx,%r15 # a&c - add %r12,%rdx # d+=T1 - - and %rbx,%r14 # (a|c)&b - add %r12,%r11 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r11 # h+=Maj(a,b,c) - mov 80(%rsp),%r13 - mov 56(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 16(%rsp),%r12 - - add 72(%rsp),%r12 - mov %rdx,%r13 - mov %rdx,%r14 - mov %r8,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r9,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rdx,%r15 # (f^g)&e - mov %r12,72(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r9,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r10,%r12 # T1+=h - - mov %r11,%r10 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r11,%r13 - mov %r11,%r14 - - ror $28,%r10 - ror $34,%r13 - mov %r11,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r10 - ror $5,%r13 - or %rbx,%r14 # a|c - - xor %r13,%r10 # h=Sigma0(a) - and %rbx,%r15 # a&c - add %r12,%rcx # d+=T1 - - and %rax,%r14 # (a|c)&b - add %r12,%r10 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r10 # h+=Maj(a,b,c) - mov 88(%rsp),%r13 - mov 64(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 24(%rsp),%r12 - - add 80(%rsp),%r12 - mov %rcx,%r13 - mov %rcx,%r14 - mov %rdx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r8,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rcx,%r15 # (f^g)&e - mov %r12,80(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r8,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r9,%r12 # T1+=h - - mov %r10,%r9 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r10,%r13 - mov %r10,%r14 - - ror $28,%r9 - ror $34,%r13 - mov %r10,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r9 - ror $5,%r13 - or %rax,%r14 # a|c - - xor %r13,%r9 # h=Sigma0(a) - and %rax,%r15 # a&c - add %r12,%rbx # d+=T1 - - and %r11,%r14 # (a|c)&b - add %r12,%r9 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r9 # h+=Maj(a,b,c) - mov 96(%rsp),%r13 - mov 72(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 32(%rsp),%r12 - - add 88(%rsp),%r12 - mov %rbx,%r13 - mov %rbx,%r14 - mov %rcx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rdx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rbx,%r15 # (f^g)&e - mov %r12,88(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rdx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %r8,%r12 # T1+=h - - mov %r9,%r8 - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r9,%r13 - mov %r9,%r14 - - ror $28,%r8 - ror $34,%r13 - mov %r9,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%r8 - ror $5,%r13 - or %r11,%r14 # a|c - - xor %r13,%r8 # h=Sigma0(a) - and %r11,%r15 # a&c - add %r12,%rax # d+=T1 - - and %r10,%r14 # (a|c)&b - add %r12,%r8 # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%r8 # h+=Maj(a,b,c) - mov 104(%rsp),%r13 - mov 80(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 40(%rsp),%r12 - - add 96(%rsp),%r12 - mov %rax,%r13 - mov %rax,%r14 - mov %rbx,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rcx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %rax,%r15 # (f^g)&e - mov %r12,96(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rcx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rdx,%r12 # T1+=h - - mov %r8,%rdx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %r8,%r13 - mov %r8,%r14 - - ror $28,%rdx - ror $34,%r13 - mov %r8,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rdx - ror $5,%r13 - or %r10,%r14 # a|c - - xor %r13,%rdx # h=Sigma0(a) - and %r10,%r15 # a&c - add %r12,%r11 # d+=T1 - - and %r9,%r14 # (a|c)&b - add %r12,%rdx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rdx # h+=Maj(a,b,c) - mov 112(%rsp),%r13 - mov 88(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 48(%rsp),%r12 - - add 104(%rsp),%r12 - mov %r11,%r13 - mov %r11,%r14 - mov %rax,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rbx,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r11,%r15 # (f^g)&e - mov %r12,104(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rbx,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rcx,%r12 # T1+=h - - mov %rdx,%rcx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rdx,%r13 - mov %rdx,%r14 - - ror $28,%rcx - ror $34,%r13 - mov %rdx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rcx - ror $5,%r13 - or %r9,%r14 # a|c - - xor %r13,%rcx # h=Sigma0(a) - and %r9,%r15 # a&c - add %r12,%r10 # d+=T1 - - and %r8,%r14 # (a|c)&b - add %r12,%rcx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rcx # h+=Maj(a,b,c) - mov 120(%rsp),%r13 - mov 96(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 56(%rsp),%r12 - - add 112(%rsp),%r12 - mov %r10,%r13 - mov %r10,%r14 - mov %r11,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %rax,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r10,%r15 # (f^g)&e - mov %r12,112(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %rax,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rbx,%r12 # T1+=h - - mov %rcx,%rbx - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rcx,%r13 - mov %rcx,%r14 - - ror $28,%rbx - ror $34,%r13 - mov %rcx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rbx - ror $5,%r13 - or %r8,%r14 # a|c - - xor %r13,%rbx # h=Sigma0(a) - and %r8,%r15 # a&c - add %r12,%r9 # d+=T1 - - and %rdx,%r14 # (a|c)&b - add %r12,%rbx # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rbx # h+=Maj(a,b,c) - mov 0(%rsp),%r13 - mov 104(%rsp),%r12 - - mov %r13,%r15 - - shr $7,%r13 - ror $1,%r15 - - xor %r15,%r13 - ror $7,%r15 - - xor %r15,%r13 # sigma0(X[(i+1)&0xf]) - mov %r12,%r14 - - shr $6,%r12 - ror $19,%r14 - - xor %r14,%r12 - ror $42,%r14 - - xor %r14,%r12 # sigma1(X[(i+14)&0xf]) - - add %r13,%r12 - - add 64(%rsp),%r12 - - add 120(%rsp),%r12 - mov %r9,%r13 - mov %r9,%r14 - mov %r10,%r15 - - ror $14,%r13 - ror $18,%r14 - xor %r11,%r15 # f^g - - xor %r14,%r13 - ror $23,%r14 - and %r9,%r15 # (f^g)&e - mov %r12,120(%rsp) - - xor %r14,%r13 # Sigma1(e) - xor %r11,%r15 # Ch(e,f,g)=((f^g)&e)^g - add %rax,%r12 # T1+=h - - mov %rbx,%rax - add %r13,%r12 # T1+=Sigma1(e) - - add %r15,%r12 # T1+=Ch(e,f,g) - mov %rbx,%r13 - mov %rbx,%r14 - - ror $28,%rax - ror $34,%r13 - mov %rbx,%r15 - add (%rbp,%rdi,8),%r12 # T1+=K[round] - - xor %r13,%rax - ror $5,%r13 - or %rdx,%r14 # a|c - - xor %r13,%rax # h=Sigma0(a) - and %rdx,%r15 # a&c - add %r12,%r8 # d+=T1 - - and %rcx,%r14 # (a|c)&b - add %r12,%rax # h+=T1 - - or %r15,%r14 # Maj(a,b,c)=((a|c)&b)|(a&c) - lea 1(%rdi),%rdi # round++ - - add %r14,%rax # h+=Maj(a,b,c) - cmp $80,%rdi - jb .Lrounds_16_xx - - mov 16*8+0*8(%rsp),%rdi - lea 16*8(%rsi),%rsi - - add 8*0(%rdi),%rax - add 8*1(%rdi),%rbx - add 8*2(%rdi),%rcx - add 8*3(%rdi),%rdx - add 8*4(%rdi),%r8 - add 8*5(%rdi),%r9 - add 8*6(%rdi),%r10 - add 8*7(%rdi),%r11 - - cmp 16*8+2*8(%rsp),%rsi - - mov %rax,8*0(%rdi) - mov %rbx,8*1(%rdi) - mov %rcx,8*2(%rdi) - mov %rdx,8*3(%rdi) - mov %r8,8*4(%rdi) - mov %r9,8*5(%rdi) - mov %r10,8*6(%rdi) - mov %r11,8*7(%rdi) - jb .Lloop - - mov 16*8+3*8(%rsp),%rsp -.cfi_def_cfa %rsp,56 - pop %r15 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r15 - pop %r14 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r14 - pop %r13 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r13 - pop %r12 -.cfi_adjust_cfa_offset -8 -.cfi_restore %r12 - pop %rbp -.cfi_adjust_cfa_offset -8 -.cfi_restore %rbp - pop %rbx -.cfi_adjust_cfa_offset -8 -.cfi_restore %rbx - - ret -.cfi_endproc -SET_SIZE(SHA512TransformBlocks) - -.data -.align 64 -.type K512,@object -K512: - .quad 0x428a2f98d728ae22,0x7137449123ef65cd - .quad 0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc - .quad 0x3956c25bf348b538,0x59f111f1b605d019 - .quad 0x923f82a4af194f9b,0xab1c5ed5da6d8118 - .quad 0xd807aa98a3030242,0x12835b0145706fbe - .quad 0x243185be4ee4b28c,0x550c7dc3d5ffb4e2 - .quad 0x72be5d74f27b896f,0x80deb1fe3b1696b1 - .quad 0x9bdc06a725c71235,0xc19bf174cf692694 - .quad 0xe49b69c19ef14ad2,0xefbe4786384f25e3 - .quad 0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65 - .quad 0x2de92c6f592b0275,0x4a7484aa6ea6e483 - .quad 0x5cb0a9dcbd41fbd4,0x76f988da831153b5 - .quad 0x983e5152ee66dfab,0xa831c66d2db43210 - .quad 0xb00327c898fb213f,0xbf597fc7beef0ee4 - .quad 0xc6e00bf33da88fc2,0xd5a79147930aa725 - .quad 0x06ca6351e003826f,0x142929670a0e6e70 - .quad 0x27b70a8546d22ffc,0x2e1b21385c26c926 - .quad 0x4d2c6dfc5ac42aed,0x53380d139d95b3df - .quad 0x650a73548baf63de,0x766a0abb3c77b2a8 - .quad 0x81c2c92e47edaee6,0x92722c851482353b - .quad 0xa2bfe8a14cf10364,0xa81a664bbc423001 - .quad 0xc24b8b70d0f89791,0xc76c51a30654be30 - .quad 0xd192e819d6ef5218,0xd69906245565a910 - .quad 0xf40e35855771202a,0x106aa07032bbd1b8 - .quad 0x19a4c116b8d2d0c8,0x1e376c085141ab53 - .quad 0x2748774cdf8eeb99,0x34b0bcb5e19b48a8 - .quad 0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb - .quad 0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3 - .quad 0x748f82ee5defb2fc,0x78a5636f43172f60 - .quad 0x84c87814a1f0ab72,0x8cc702081a6439ec - .quad 0x90befffa23631e28,0xa4506cebde82bde9 - .quad 0xbef9a3f7b2c67915,0xc67178f2e372532b - .quad 0xca273eceea26619c,0xd186b8c721c0c207 - .quad 0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178 - .quad 0x06f067aa72176fba,0x0a637dc5a2c898a6 - .quad 0x113f9804bef90dae,0x1b710b35131c471b - .quad 0x28db77f523047d84,0x32caab7b40c72493 - .quad 0x3c9ebe0a15c9bebc,0x431d67c49c100d4c - .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a - .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 -#endif /* !lint && !__lint */ - -#ifdef __ELF__ -.section .note.GNU-stack,"",%progbits -#endif diff --git a/sys/contrib/openzfs/module/icp/core/kcf_callprov.c b/sys/contrib/openzfs/module/icp/core/kcf_callprov.c index fd2f7e1aac3d..b1822dd5b878 100644 --- a/sys/contrib/openzfs/module/icp/core/kcf_callprov.c +++ b/sys/contrib/openzfs/module/icp/core/kcf_callprov.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -27,9 +27,6 @@ #include <sys/crypto/impl.h> #include <sys/crypto/sched_impl.h> -static int kcf_emulate_dual(kcf_provider_desc_t *, crypto_ctx_t *, - kcf_req_params_t *); - void kcf_free_triedlist(kcf_prov_tried_t *list) { @@ -66,171 +63,9 @@ is_in_triedlist(kcf_provider_desc_t *pd, kcf_prov_tried_t *triedl) if (triedl->pt_pd == pd) return (B_TRUE); triedl = triedl->pt_next; - }; - - return (B_FALSE); -} - -/* - * Search a mech entry's hardware provider list for the specified - * provider. Return true if found. - */ -static boolean_t -is_valid_provider_for_mech(kcf_provider_desc_t *pd, kcf_mech_entry_t *me, - crypto_func_group_t fg) -{ - kcf_prov_mech_desc_t *prov_chain; - - prov_chain = me->me_hw_prov_chain; - if (prov_chain != NULL) { - ASSERT(me->me_num_hwprov > 0); - for (; prov_chain != NULL; prov_chain = prov_chain->pm_next) { - if (prov_chain->pm_prov_desc == pd && - IS_FG_SUPPORTED(prov_chain, fg)) { - return (B_TRUE); - } - } - } - return (B_FALSE); -} - -/* - * This routine, given a logical provider, returns the least loaded - * provider belonging to the logical provider. The provider must be - * able to do the specified mechanism, i.e. check that the mechanism - * hasn't been disabled. In addition, just in case providers are not - * entirely equivalent, the provider's entry point is checked for - * non-nullness. This is accomplished by having the caller pass, as - * arguments, the offset of the function group (offset_1), and the - * offset of the function within the function group (offset_2). - * Returns NULL if no provider can be found. - */ -int -kcf_get_hardware_provider(crypto_mech_type_t mech_type_1, - crypto_mech_type_t mech_type_2, boolean_t call_restrict, - kcf_provider_desc_t *old, kcf_provider_desc_t **new, crypto_func_group_t fg) -{ - kcf_provider_desc_t *provider, *real_pd = old; - kcf_provider_desc_t *gpd = NULL; /* good provider */ - kcf_provider_desc_t *bpd = NULL; /* busy provider */ - kcf_provider_list_t *p; - kcf_ops_class_t class; - kcf_mech_entry_t *me; - kcf_mech_entry_tab_t *me_tab; - int index, len, gqlen = INT_MAX, rv = CRYPTO_SUCCESS; - - /* get the mech entry for the specified mechanism */ - class = KCF_MECH2CLASS(mech_type_1); - if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) { - return (CRYPTO_MECHANISM_INVALID); - } - - me_tab = &kcf_mech_tabs_tab[class]; - index = KCF_MECH2INDEX(mech_type_1); - if ((index < 0) || (index >= me_tab->met_size)) { - return (CRYPTO_MECHANISM_INVALID); } - me = &((me_tab->met_tab)[index]); - mutex_enter(&me->me_mutex); - - /* - * We assume the provider descriptor will not go away because - * it is being held somewhere, i.e. its reference count has been - * incremented. In the case of the crypto module, the provider - * descriptor is held by the session structure. - */ - if (old->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - if (old->pd_provider_list == NULL) { - real_pd = NULL; - rv = CRYPTO_DEVICE_ERROR; - goto out; - } - /* - * Find the least loaded real provider. KCF_PROV_LOAD gives - * the load (number of pending requests) of the provider. - */ - mutex_enter(&old->pd_lock); - p = old->pd_provider_list; - while (p != NULL) { - provider = p->pl_provider; - - ASSERT(provider->pd_prov_type != - CRYPTO_LOGICAL_PROVIDER); - - if (call_restrict && - (provider->pd_flags & KCF_PROV_RESTRICTED)) { - p = p->pl_next; - continue; - } - - if (!is_valid_provider_for_mech(provider, me, fg)) { - p = p->pl_next; - continue; - } - - /* provider does second mech */ - if (mech_type_2 != CRYPTO_MECH_INVALID) { - int i; - - i = KCF_TO_PROV_MECH_INDX(provider, - mech_type_2); - if (i == KCF_INVALID_INDX) { - p = p->pl_next; - continue; - } - } - - if (provider->pd_state != KCF_PROV_READY) { - /* choose BUSY if no READY providers */ - if (provider->pd_state == KCF_PROV_BUSY) - bpd = provider; - p = p->pl_next; - continue; - } - - len = KCF_PROV_LOAD(provider); - if (len < gqlen) { - gqlen = len; - gpd = provider; - } - - p = p->pl_next; - } - - if (gpd != NULL) { - real_pd = gpd; - KCF_PROV_REFHOLD(real_pd); - } else if (bpd != NULL) { - real_pd = bpd; - KCF_PROV_REFHOLD(real_pd); - } else { - /* can't find provider */ - real_pd = NULL; - rv = CRYPTO_MECHANISM_INVALID; - } - mutex_exit(&old->pd_lock); - - } else { - if (!KCF_IS_PROV_USABLE(old) || - (call_restrict && (old->pd_flags & KCF_PROV_RESTRICTED))) { - real_pd = NULL; - rv = CRYPTO_DEVICE_ERROR; - goto out; - } - - if (!is_valid_provider_for_mech(old, me, fg)) { - real_pd = NULL; - rv = CRYPTO_MECHANISM_INVALID; - goto out; - } - - KCF_PROV_REFHOLD(real_pd); - } -out: - mutex_exit(&me->me_mutex); - *new = real_pd; - return (rv); + return (B_FALSE); } /* @@ -243,22 +78,17 @@ out: * search to find one. This is fine as we assume there are only a few * number of providers in this list. If this assumption ever changes, * we should revisit this. - * - * call_restrict represents if the caller should not be allowed to - * use restricted providers. */ kcf_provider_desc_t * kcf_get_mech_provider(crypto_mech_type_t mech_type, kcf_mech_entry_t **mepp, - int *error, kcf_prov_tried_t *triedl, crypto_func_group_t fg, - boolean_t call_restrict, size_t data_size) + int *error, kcf_prov_tried_t *triedl, crypto_func_group_t fg) { - kcf_provider_desc_t *pd = NULL, *gpd = NULL; - kcf_prov_mech_desc_t *prov_chain, *mdesc; - int len, gqlen = INT_MAX; + kcf_provider_desc_t *pd = NULL; + kcf_prov_mech_desc_t *mdesc; kcf_ops_class_t class; int index; kcf_mech_entry_t *me; - kcf_mech_entry_tab_t *me_tab; + const kcf_mech_entry_tab_t *me_tab; class = KCF_MECH2CLASS(mech_type); if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) { @@ -277,58 +107,12 @@ kcf_get_mech_provider(crypto_mech_type_t mech_type, kcf_mech_entry_t **mepp, if (mepp != NULL) *mepp = me; - mutex_enter(&me->me_mutex); - - prov_chain = me->me_hw_prov_chain; - - /* - * We check for the threshold for using a hardware provider for - * this amount of data. If there is no software provider available - * for the mechanism, then the threshold is ignored. - */ - if ((prov_chain != NULL) && - ((data_size == 0) || (me->me_threshold == 0) || - (data_size >= me->me_threshold) || - ((mdesc = me->me_sw_prov) == NULL) || - (!IS_FG_SUPPORTED(mdesc, fg)) || - (!KCF_IS_PROV_USABLE(mdesc->pm_prov_desc)))) { - ASSERT(me->me_num_hwprov > 0); - /* there is at least one provider */ - - /* - * Find the least loaded real provider. KCF_PROV_LOAD gives - * the load (number of pending requests) of the provider. - */ - while (prov_chain != NULL) { - pd = prov_chain->pm_prov_desc; - - if (!IS_FG_SUPPORTED(prov_chain, fg) || - !KCF_IS_PROV_USABLE(pd) || - IS_PROVIDER_TRIED(pd, triedl) || - (call_restrict && - (pd->pd_flags & KCF_PROV_RESTRICTED))) { - prov_chain = prov_chain->pm_next; - continue; - } - - if ((len = KCF_PROV_LOAD(pd)) < gqlen) { - gqlen = len; - gpd = pd; - } - - prov_chain = prov_chain->pm_next; - } - - pd = gpd; - } - - /* No HW provider for this mech, is there a SW provider? */ + /* Is there a provider? */ if (pd == NULL && (mdesc = me->me_sw_prov) != NULL) { pd = mdesc->pm_prov_desc; if (!IS_FG_SUPPORTED(mdesc, fg) || !KCF_IS_PROV_USABLE(pd) || - IS_PROVIDER_TRIED(pd, triedl) || - (call_restrict && (pd->pd_flags & KCF_PROV_RESTRICTED))) + IS_PROVIDER_TRIED(pd, triedl)) pd = NULL; } @@ -344,1224 +128,5 @@ kcf_get_mech_provider(crypto_mech_type_t mech_type, kcf_mech_entry_t **mepp, } else KCF_PROV_REFHOLD(pd); - mutex_exit(&me->me_mutex); return (pd); } - -/* - * Very similar to kcf_get_mech_provider(). Finds the best provider capable of - * a dual operation with both me1 and me2. - * When no dual-ops capable providers are available, return the best provider - * for me1 only, and sets *prov_mt2 to CRYPTO_INVALID_MECHID; - * We assume/expect that a slower HW capable of the dual is still - * faster than the 2 fastest providers capable of the individual ops - * separately. - */ -kcf_provider_desc_t * -kcf_get_dual_provider(crypto_mechanism_t *mech1, crypto_mechanism_t *mech2, - kcf_mech_entry_t **mepp, crypto_mech_type_t *prov_mt1, - crypto_mech_type_t *prov_mt2, int *error, kcf_prov_tried_t *triedl, - crypto_func_group_t fg1, crypto_func_group_t fg2, boolean_t call_restrict, - size_t data_size) -{ - kcf_provider_desc_t *pd = NULL, *pdm1 = NULL, *pdm1m2 = NULL; - kcf_prov_mech_desc_t *prov_chain, *mdesc; - int len, gqlen = INT_MAX, dgqlen = INT_MAX; - crypto_mech_info_list_t *mil; - crypto_mech_type_t m2id = mech2->cm_type; - kcf_mech_entry_t *me; - - /* when mech is a valid mechanism, me will be its mech_entry */ - if (kcf_get_mech_entry(mech1->cm_type, &me) != KCF_SUCCESS) { - *error = CRYPTO_MECHANISM_INVALID; - return (NULL); - } - - *prov_mt2 = CRYPTO_MECH_INVALID; - - if (mepp != NULL) - *mepp = me; - mutex_enter(&me->me_mutex); - - prov_chain = me->me_hw_prov_chain; - /* - * We check the threshold for using a hardware provider for - * this amount of data. If there is no software provider available - * for the first mechanism, then the threshold is ignored. - */ - if ((prov_chain != NULL) && - ((data_size == 0) || (me->me_threshold == 0) || - (data_size >= me->me_threshold) || - ((mdesc = me->me_sw_prov) == NULL) || - (!IS_FG_SUPPORTED(mdesc, fg1)) || - (!KCF_IS_PROV_USABLE(mdesc->pm_prov_desc)))) { - /* there is at least one provider */ - ASSERT(me->me_num_hwprov > 0); - - /* - * Find the least loaded provider capable of the combo - * me1 + me2, and save a pointer to the least loaded - * provider capable of me1 only. - */ - while (prov_chain != NULL) { - pd = prov_chain->pm_prov_desc; - len = KCF_PROV_LOAD(pd); - - if (!IS_FG_SUPPORTED(prov_chain, fg1) || - !KCF_IS_PROV_USABLE(pd) || - IS_PROVIDER_TRIED(pd, triedl) || - (call_restrict && - (pd->pd_flags & KCF_PROV_RESTRICTED))) { - prov_chain = prov_chain->pm_next; - continue; - } - - /* Save the best provider capable of m1 */ - if (len < gqlen) { - *prov_mt1 = - prov_chain->pm_mech_info.cm_mech_number; - gqlen = len; - pdm1 = pd; - } - - /* See if pd can do me2 too */ - for (mil = prov_chain->pm_mi_list; - mil != NULL; mil = mil->ml_next) { - if ((mil->ml_mech_info.cm_func_group_mask & - fg2) == 0) - continue; - - if ((mil->ml_kcf_mechid == m2id) && - (len < dgqlen)) { - /* Bingo! */ - dgqlen = len; - pdm1m2 = pd; - *prov_mt2 = - mil->ml_mech_info.cm_mech_number; - *prov_mt1 = prov_chain-> - pm_mech_info.cm_mech_number; - break; - } - } - - prov_chain = prov_chain->pm_next; - } - - pd = (pdm1m2 != NULL) ? pdm1m2 : pdm1; - } - - /* no HW provider for this mech, is there a SW provider? */ - if (pd == NULL && (mdesc = me->me_sw_prov) != NULL) { - pd = mdesc->pm_prov_desc; - if (!IS_FG_SUPPORTED(mdesc, fg1) || - !KCF_IS_PROV_USABLE(pd) || - IS_PROVIDER_TRIED(pd, triedl) || - (call_restrict && (pd->pd_flags & KCF_PROV_RESTRICTED))) - pd = NULL; - else { - /* See if pd can do me2 too */ - for (mil = me->me_sw_prov->pm_mi_list; - mil != NULL; mil = mil->ml_next) { - if ((mil->ml_mech_info.cm_func_group_mask & - fg2) == 0) - continue; - - if (mil->ml_kcf_mechid == m2id) { - /* Bingo! */ - *prov_mt2 = - mil->ml_mech_info.cm_mech_number; - break; - } - } - *prov_mt1 = me->me_sw_prov->pm_mech_info.cm_mech_number; - } - } - - if (pd == NULL) - *error = CRYPTO_MECH_NOT_SUPPORTED; - else - KCF_PROV_REFHOLD(pd); - - mutex_exit(&me->me_mutex); - return (pd); -} - -/* - * Do the actual work of calling the provider routines. - * - * pd - Provider structure - * ctx - Context for this operation - * params - Parameters for this operation - * rhndl - Request handle to use for notification - * - * The return values are the same as that of the respective SPI. - */ -int -common_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx, - kcf_req_params_t *params, crypto_req_handle_t rhndl) -{ - int err = CRYPTO_ARGUMENTS_BAD; - kcf_op_type_t optype; - - optype = params->rp_optype; - - switch (params->rp_opgrp) { - case KCF_OG_DIGEST: { - kcf_digest_ops_params_t *dops = ¶ms->rp_u.digest_params; - - switch (optype) { - case KCF_OP_INIT: - /* - * We should do this only here and not in KCF_WRAP_* - * macros. This is because we may want to try other - * providers, in case we recover from a failure. - */ - KCF_SET_PROVIDER_MECHNUM(dops->do_framework_mechtype, - pd, &dops->do_mech); - - err = KCF_PROV_DIGEST_INIT(pd, ctx, &dops->do_mech, - rhndl); - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_DIGEST(pd, ctx, dops->do_data, - dops->do_digest, rhndl); - break; - - case KCF_OP_UPDATE: - err = KCF_PROV_DIGEST_UPDATE(pd, ctx, - dops->do_data, rhndl); - break; - - case KCF_OP_FINAL: - err = KCF_PROV_DIGEST_FINAL(pd, ctx, - dops->do_digest, rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(dops->do_framework_mechtype, - pd, &dops->do_mech); - err = KCF_PROV_DIGEST_ATOMIC(pd, dops->do_sid, - &dops->do_mech, dops->do_data, dops->do_digest, - rhndl); - break; - - case KCF_OP_DIGEST_KEY: - err = KCF_PROV_DIGEST_KEY(pd, ctx, dops->do_digest_key, - rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_MAC: { - kcf_mac_ops_params_t *mops = ¶ms->rp_u.mac_params; - - switch (optype) { - case KCF_OP_INIT: - KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype, - pd, &mops->mo_mech); - - err = KCF_PROV_MAC_INIT(pd, ctx, &mops->mo_mech, - mops->mo_key, mops->mo_templ, rhndl); - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_MAC(pd, ctx, mops->mo_data, - mops->mo_mac, rhndl); - break; - - case KCF_OP_UPDATE: - err = KCF_PROV_MAC_UPDATE(pd, ctx, mops->mo_data, - rhndl); - break; - - case KCF_OP_FINAL: - err = KCF_PROV_MAC_FINAL(pd, ctx, mops->mo_mac, rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype, - pd, &mops->mo_mech); - - err = KCF_PROV_MAC_ATOMIC(pd, mops->mo_sid, - &mops->mo_mech, mops->mo_key, mops->mo_data, - mops->mo_mac, mops->mo_templ, rhndl); - break; - - case KCF_OP_MAC_VERIFY_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(mops->mo_framework_mechtype, - pd, &mops->mo_mech); - - err = KCF_PROV_MAC_VERIFY_ATOMIC(pd, mops->mo_sid, - &mops->mo_mech, mops->mo_key, mops->mo_data, - mops->mo_mac, mops->mo_templ, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_ENCRYPT: { - kcf_encrypt_ops_params_t *eops = ¶ms->rp_u.encrypt_params; - - switch (optype) { - case KCF_OP_INIT: - KCF_SET_PROVIDER_MECHNUM(eops->eo_framework_mechtype, - pd, &eops->eo_mech); - - err = KCF_PROV_ENCRYPT_INIT(pd, ctx, &eops->eo_mech, - eops->eo_key, eops->eo_templ, rhndl); - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_ENCRYPT(pd, ctx, eops->eo_plaintext, - eops->eo_ciphertext, rhndl); - break; - - case KCF_OP_UPDATE: - err = KCF_PROV_ENCRYPT_UPDATE(pd, ctx, - eops->eo_plaintext, eops->eo_ciphertext, rhndl); - break; - - case KCF_OP_FINAL: - err = KCF_PROV_ENCRYPT_FINAL(pd, ctx, - eops->eo_ciphertext, rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(eops->eo_framework_mechtype, - pd, &eops->eo_mech); - - err = KCF_PROV_ENCRYPT_ATOMIC(pd, eops->eo_sid, - &eops->eo_mech, eops->eo_key, eops->eo_plaintext, - eops->eo_ciphertext, eops->eo_templ, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_DECRYPT: { - kcf_decrypt_ops_params_t *dcrops = ¶ms->rp_u.decrypt_params; - - switch (optype) { - case KCF_OP_INIT: - KCF_SET_PROVIDER_MECHNUM(dcrops->dop_framework_mechtype, - pd, &dcrops->dop_mech); - - err = KCF_PROV_DECRYPT_INIT(pd, ctx, &dcrops->dop_mech, - dcrops->dop_key, dcrops->dop_templ, rhndl); - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_DECRYPT(pd, ctx, dcrops->dop_ciphertext, - dcrops->dop_plaintext, rhndl); - break; - - case KCF_OP_UPDATE: - err = KCF_PROV_DECRYPT_UPDATE(pd, ctx, - dcrops->dop_ciphertext, dcrops->dop_plaintext, - rhndl); - break; - - case KCF_OP_FINAL: - err = KCF_PROV_DECRYPT_FINAL(pd, ctx, - dcrops->dop_plaintext, rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(dcrops->dop_framework_mechtype, - pd, &dcrops->dop_mech); - - err = KCF_PROV_DECRYPT_ATOMIC(pd, dcrops->dop_sid, - &dcrops->dop_mech, dcrops->dop_key, - dcrops->dop_ciphertext, dcrops->dop_plaintext, - dcrops->dop_templ, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_SIGN: { - kcf_sign_ops_params_t *sops = ¶ms->rp_u.sign_params; - - switch (optype) { - case KCF_OP_INIT: - KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype, - pd, &sops->so_mech); - - err = KCF_PROV_SIGN_INIT(pd, ctx, &sops->so_mech, - sops->so_key, sops->so_templ, rhndl); - break; - - case KCF_OP_SIGN_RECOVER_INIT: - KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype, - pd, &sops->so_mech); - - err = KCF_PROV_SIGN_RECOVER_INIT(pd, ctx, - &sops->so_mech, sops->so_key, sops->so_templ, - rhndl); - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_SIGN(pd, ctx, sops->so_data, - sops->so_signature, rhndl); - break; - - case KCF_OP_SIGN_RECOVER: - err = KCF_PROV_SIGN_RECOVER(pd, ctx, - sops->so_data, sops->so_signature, rhndl); - break; - - case KCF_OP_UPDATE: - err = KCF_PROV_SIGN_UPDATE(pd, ctx, sops->so_data, - rhndl); - break; - - case KCF_OP_FINAL: - err = KCF_PROV_SIGN_FINAL(pd, ctx, sops->so_signature, - rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype, - pd, &sops->so_mech); - - err = KCF_PROV_SIGN_ATOMIC(pd, sops->so_sid, - &sops->so_mech, sops->so_key, sops->so_data, - sops->so_templ, sops->so_signature, rhndl); - break; - - case KCF_OP_SIGN_RECOVER_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(sops->so_framework_mechtype, - pd, &sops->so_mech); - - err = KCF_PROV_SIGN_RECOVER_ATOMIC(pd, sops->so_sid, - &sops->so_mech, sops->so_key, sops->so_data, - sops->so_templ, sops->so_signature, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_VERIFY: { - kcf_verify_ops_params_t *vops = ¶ms->rp_u.verify_params; - - switch (optype) { - case KCF_OP_INIT: - KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype, - pd, &vops->vo_mech); - - err = KCF_PROV_VERIFY_INIT(pd, ctx, &vops->vo_mech, - vops->vo_key, vops->vo_templ, rhndl); - break; - - case KCF_OP_VERIFY_RECOVER_INIT: - KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype, - pd, &vops->vo_mech); - - err = KCF_PROV_VERIFY_RECOVER_INIT(pd, ctx, - &vops->vo_mech, vops->vo_key, vops->vo_templ, - rhndl); - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_VERIFY(pd, ctx, vops->vo_data, - vops->vo_signature, rhndl); - break; - - case KCF_OP_VERIFY_RECOVER: - err = KCF_PROV_VERIFY_RECOVER(pd, ctx, - vops->vo_signature, vops->vo_data, rhndl); - break; - - case KCF_OP_UPDATE: - err = KCF_PROV_VERIFY_UPDATE(pd, ctx, vops->vo_data, - rhndl); - break; - - case KCF_OP_FINAL: - err = KCF_PROV_VERIFY_FINAL(pd, ctx, vops->vo_signature, - rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype, - pd, &vops->vo_mech); - - err = KCF_PROV_VERIFY_ATOMIC(pd, vops->vo_sid, - &vops->vo_mech, vops->vo_key, vops->vo_data, - vops->vo_templ, vops->vo_signature, rhndl); - break; - - case KCF_OP_VERIFY_RECOVER_ATOMIC: - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(vops->vo_framework_mechtype, - pd, &vops->vo_mech); - - err = KCF_PROV_VERIFY_RECOVER_ATOMIC(pd, vops->vo_sid, - &vops->vo_mech, vops->vo_key, vops->vo_signature, - vops->vo_templ, vops->vo_data, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_ENCRYPT_MAC: { - kcf_encrypt_mac_ops_params_t *eops = - ¶ms->rp_u.encrypt_mac_params; - kcf_context_t *kcf_secondctx; - - switch (optype) { - case KCF_OP_INIT: - kcf_secondctx = ((kcf_context_t *) - (ctx->cc_framework_private))->kc_secondctx; - - if (kcf_secondctx != NULL) { - err = kcf_emulate_dual(pd, ctx, params); - break; - } - KCF_SET_PROVIDER_MECHNUM( - eops->em_framework_encr_mechtype, - pd, &eops->em_encr_mech); - - KCF_SET_PROVIDER_MECHNUM( - eops->em_framework_mac_mechtype, - pd, &eops->em_mac_mech); - - err = KCF_PROV_ENCRYPT_MAC_INIT(pd, ctx, - &eops->em_encr_mech, eops->em_encr_key, - &eops->em_mac_mech, eops->em_mac_key, - eops->em_encr_templ, eops->em_mac_templ, - rhndl); - - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_ENCRYPT_MAC(pd, ctx, - eops->em_plaintext, eops->em_ciphertext, - eops->em_mac, rhndl); - break; - - case KCF_OP_UPDATE: - kcf_secondctx = ((kcf_context_t *) - (ctx->cc_framework_private))->kc_secondctx; - if (kcf_secondctx != NULL) { - err = kcf_emulate_dual(pd, ctx, params); - break; - } - err = KCF_PROV_ENCRYPT_MAC_UPDATE(pd, ctx, - eops->em_plaintext, eops->em_ciphertext, rhndl); - break; - - case KCF_OP_FINAL: - kcf_secondctx = ((kcf_context_t *) - (ctx->cc_framework_private))->kc_secondctx; - if (kcf_secondctx != NULL) { - err = kcf_emulate_dual(pd, ctx, params); - break; - } - err = KCF_PROV_ENCRYPT_MAC_FINAL(pd, ctx, - eops->em_ciphertext, eops->em_mac, rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - - KCF_SET_PROVIDER_MECHNUM( - eops->em_framework_encr_mechtype, - pd, &eops->em_encr_mech); - - KCF_SET_PROVIDER_MECHNUM( - eops->em_framework_mac_mechtype, - pd, &eops->em_mac_mech); - - err = KCF_PROV_ENCRYPT_MAC_ATOMIC(pd, eops->em_sid, - &eops->em_encr_mech, eops->em_encr_key, - &eops->em_mac_mech, eops->em_mac_key, - eops->em_plaintext, eops->em_ciphertext, - eops->em_mac, - eops->em_encr_templ, eops->em_mac_templ, - rhndl); - - break; - - default: - break; - } - break; - } - - case KCF_OG_MAC_DECRYPT: { - kcf_mac_decrypt_ops_params_t *dops = - ¶ms->rp_u.mac_decrypt_params; - kcf_context_t *kcf_secondctx; - - switch (optype) { - case KCF_OP_INIT: - kcf_secondctx = ((kcf_context_t *) - (ctx->cc_framework_private))->kc_secondctx; - - if (kcf_secondctx != NULL) { - err = kcf_emulate_dual(pd, ctx, params); - break; - } - KCF_SET_PROVIDER_MECHNUM( - dops->md_framework_mac_mechtype, - pd, &dops->md_mac_mech); - - KCF_SET_PROVIDER_MECHNUM( - dops->md_framework_decr_mechtype, - pd, &dops->md_decr_mech); - - err = KCF_PROV_MAC_DECRYPT_INIT(pd, ctx, - &dops->md_mac_mech, dops->md_mac_key, - &dops->md_decr_mech, dops->md_decr_key, - dops->md_mac_templ, dops->md_decr_templ, - rhndl); - - break; - - case KCF_OP_SINGLE: - err = KCF_PROV_MAC_DECRYPT(pd, ctx, - dops->md_ciphertext, dops->md_mac, - dops->md_plaintext, rhndl); - break; - - case KCF_OP_UPDATE: - kcf_secondctx = ((kcf_context_t *) - (ctx->cc_framework_private))->kc_secondctx; - if (kcf_secondctx != NULL) { - err = kcf_emulate_dual(pd, ctx, params); - break; - } - err = KCF_PROV_MAC_DECRYPT_UPDATE(pd, ctx, - dops->md_ciphertext, dops->md_plaintext, rhndl); - break; - - case KCF_OP_FINAL: - kcf_secondctx = ((kcf_context_t *) - (ctx->cc_framework_private))->kc_secondctx; - if (kcf_secondctx != NULL) { - err = kcf_emulate_dual(pd, ctx, params); - break; - } - err = KCF_PROV_MAC_DECRYPT_FINAL(pd, ctx, - dops->md_mac, dops->md_plaintext, rhndl); - break; - - case KCF_OP_ATOMIC: - ASSERT(ctx == NULL); - - KCF_SET_PROVIDER_MECHNUM( - dops->md_framework_mac_mechtype, - pd, &dops->md_mac_mech); - - KCF_SET_PROVIDER_MECHNUM( - dops->md_framework_decr_mechtype, - pd, &dops->md_decr_mech); - - err = KCF_PROV_MAC_DECRYPT_ATOMIC(pd, dops->md_sid, - &dops->md_mac_mech, dops->md_mac_key, - &dops->md_decr_mech, dops->md_decr_key, - dops->md_ciphertext, dops->md_mac, - dops->md_plaintext, - dops->md_mac_templ, dops->md_decr_templ, - rhndl); - - break; - - case KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC: - ASSERT(ctx == NULL); - - KCF_SET_PROVIDER_MECHNUM( - dops->md_framework_mac_mechtype, - pd, &dops->md_mac_mech); - - KCF_SET_PROVIDER_MECHNUM( - dops->md_framework_decr_mechtype, - pd, &dops->md_decr_mech); - - err = KCF_PROV_MAC_VERIFY_DECRYPT_ATOMIC(pd, - dops->md_sid, &dops->md_mac_mech, dops->md_mac_key, - &dops->md_decr_mech, dops->md_decr_key, - dops->md_ciphertext, dops->md_mac, - dops->md_plaintext, - dops->md_mac_templ, dops->md_decr_templ, - rhndl); - - break; - - default: - break; - } - break; - } - - case KCF_OG_KEY: { - kcf_key_ops_params_t *kops = ¶ms->rp_u.key_params; - - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(kops->ko_framework_mechtype, pd, - &kops->ko_mech); - - switch (optype) { - case KCF_OP_KEY_GENERATE: - err = KCF_PROV_KEY_GENERATE(pd, kops->ko_sid, - &kops->ko_mech, - kops->ko_key_template, kops->ko_key_attribute_count, - kops->ko_key_object_id_ptr, rhndl); - break; - - case KCF_OP_KEY_GENERATE_PAIR: - err = KCF_PROV_KEY_GENERATE_PAIR(pd, kops->ko_sid, - &kops->ko_mech, - kops->ko_key_template, kops->ko_key_attribute_count, - kops->ko_private_key_template, - kops->ko_private_key_attribute_count, - kops->ko_key_object_id_ptr, - kops->ko_private_key_object_id_ptr, rhndl); - break; - - case KCF_OP_KEY_WRAP: - err = KCF_PROV_KEY_WRAP(pd, kops->ko_sid, - &kops->ko_mech, - kops->ko_key, kops->ko_key_object_id_ptr, - kops->ko_wrapped_key, kops->ko_wrapped_key_len_ptr, - rhndl); - break; - - case KCF_OP_KEY_UNWRAP: - err = KCF_PROV_KEY_UNWRAP(pd, kops->ko_sid, - &kops->ko_mech, - kops->ko_key, kops->ko_wrapped_key, - kops->ko_wrapped_key_len_ptr, - kops->ko_key_template, kops->ko_key_attribute_count, - kops->ko_key_object_id_ptr, rhndl); - break; - - case KCF_OP_KEY_DERIVE: - err = KCF_PROV_KEY_DERIVE(pd, kops->ko_sid, - &kops->ko_mech, - kops->ko_key, kops->ko_key_template, - kops->ko_key_attribute_count, - kops->ko_key_object_id_ptr, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_RANDOM: { - kcf_random_number_ops_params_t *rops = - ¶ms->rp_u.random_number_params; - - ASSERT(ctx == NULL); - - switch (optype) { - case KCF_OP_RANDOM_SEED: - err = KCF_PROV_SEED_RANDOM(pd, rops->rn_sid, - rops->rn_buf, rops->rn_buflen, rops->rn_entropy_est, - rops->rn_flags, rhndl); - break; - - case KCF_OP_RANDOM_GENERATE: - err = KCF_PROV_GENERATE_RANDOM(pd, rops->rn_sid, - rops->rn_buf, rops->rn_buflen, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_SESSION: { - kcf_session_ops_params_t *sops = ¶ms->rp_u.session_params; - - ASSERT(ctx == NULL); - switch (optype) { - case KCF_OP_SESSION_OPEN: - /* - * so_pd may be a logical provider, in which case - * we need to check whether it has been removed. - */ - if (KCF_IS_PROV_REMOVED(sops->so_pd)) { - err = CRYPTO_DEVICE_ERROR; - break; - } - err = KCF_PROV_SESSION_OPEN(pd, sops->so_sid_ptr, - rhndl, sops->so_pd); - break; - - case KCF_OP_SESSION_CLOSE: - /* - * so_pd may be a logical provider, in which case - * we need to check whether it has been removed. - */ - if (KCF_IS_PROV_REMOVED(sops->so_pd)) { - err = CRYPTO_DEVICE_ERROR; - break; - } - err = KCF_PROV_SESSION_CLOSE(pd, sops->so_sid, - rhndl, sops->so_pd); - break; - - case KCF_OP_SESSION_LOGIN: - err = KCF_PROV_SESSION_LOGIN(pd, sops->so_sid, - sops->so_user_type, sops->so_pin, - sops->so_pin_len, rhndl); - break; - - case KCF_OP_SESSION_LOGOUT: - err = KCF_PROV_SESSION_LOGOUT(pd, sops->so_sid, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_OBJECT: { - kcf_object_ops_params_t *jops = ¶ms->rp_u.object_params; - - ASSERT(ctx == NULL); - switch (optype) { - case KCF_OP_OBJECT_CREATE: - err = KCF_PROV_OBJECT_CREATE(pd, jops->oo_sid, - jops->oo_template, jops->oo_attribute_count, - jops->oo_object_id_ptr, rhndl); - break; - - case KCF_OP_OBJECT_COPY: - err = KCF_PROV_OBJECT_COPY(pd, jops->oo_sid, - jops->oo_object_id, - jops->oo_template, jops->oo_attribute_count, - jops->oo_object_id_ptr, rhndl); - break; - - case KCF_OP_OBJECT_DESTROY: - err = KCF_PROV_OBJECT_DESTROY(pd, jops->oo_sid, - jops->oo_object_id, rhndl); - break; - - case KCF_OP_OBJECT_GET_SIZE: - err = KCF_PROV_OBJECT_GET_SIZE(pd, jops->oo_sid, - jops->oo_object_id, jops->oo_object_size, rhndl); - break; - - case KCF_OP_OBJECT_GET_ATTRIBUTE_VALUE: - err = KCF_PROV_OBJECT_GET_ATTRIBUTE_VALUE(pd, - jops->oo_sid, jops->oo_object_id, - jops->oo_template, jops->oo_attribute_count, rhndl); - break; - - case KCF_OP_OBJECT_SET_ATTRIBUTE_VALUE: - err = KCF_PROV_OBJECT_SET_ATTRIBUTE_VALUE(pd, - jops->oo_sid, jops->oo_object_id, - jops->oo_template, jops->oo_attribute_count, rhndl); - break; - - case KCF_OP_OBJECT_FIND_INIT: - err = KCF_PROV_OBJECT_FIND_INIT(pd, jops->oo_sid, - jops->oo_template, jops->oo_attribute_count, - jops->oo_find_init_pp_ptr, rhndl); - break; - - case KCF_OP_OBJECT_FIND: - err = KCF_PROV_OBJECT_FIND(pd, jops->oo_find_pp, - jops->oo_object_id_ptr, jops->oo_max_object_count, - jops->oo_object_count_ptr, rhndl); - break; - - case KCF_OP_OBJECT_FIND_FINAL: - err = KCF_PROV_OBJECT_FIND_FINAL(pd, jops->oo_find_pp, - rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_PROVMGMT: { - kcf_provmgmt_ops_params_t *pops = ¶ms->rp_u.provmgmt_params; - - ASSERT(ctx == NULL); - switch (optype) { - case KCF_OP_MGMT_EXTINFO: - /* - * po_pd may be a logical provider, in which case - * we need to check whether it has been removed. - */ - if (KCF_IS_PROV_REMOVED(pops->po_pd)) { - err = CRYPTO_DEVICE_ERROR; - break; - } - err = KCF_PROV_EXT_INFO(pd, pops->po_ext_info, rhndl, - pops->po_pd); - break; - - case KCF_OP_MGMT_INITTOKEN: - err = KCF_PROV_INIT_TOKEN(pd, pops->po_pin, - pops->po_pin_len, pops->po_label, rhndl); - break; - - case KCF_OP_MGMT_INITPIN: - err = KCF_PROV_INIT_PIN(pd, pops->po_sid, pops->po_pin, - pops->po_pin_len, rhndl); - break; - - case KCF_OP_MGMT_SETPIN: - err = KCF_PROV_SET_PIN(pd, pops->po_sid, - pops->po_old_pin, pops->po_old_pin_len, - pops->po_pin, pops->po_pin_len, rhndl); - break; - - default: - break; - } - break; - } - - case KCF_OG_NOSTORE_KEY: { - kcf_key_ops_params_t *kops = ¶ms->rp_u.key_params; - - ASSERT(ctx == NULL); - KCF_SET_PROVIDER_MECHNUM(kops->ko_framework_mechtype, pd, - &kops->ko_mech); - - switch (optype) { - case KCF_OP_KEY_GENERATE: - err = KCF_PROV_NOSTORE_KEY_GENERATE(pd, kops->ko_sid, - &kops->ko_mech, kops->ko_key_template, - kops->ko_key_attribute_count, - kops->ko_out_template1, - kops->ko_out_attribute_count1, rhndl); - break; - - case KCF_OP_KEY_GENERATE_PAIR: - err = KCF_PROV_NOSTORE_KEY_GENERATE_PAIR(pd, - kops->ko_sid, &kops->ko_mech, - kops->ko_key_template, kops->ko_key_attribute_count, - kops->ko_private_key_template, - kops->ko_private_key_attribute_count, - kops->ko_out_template1, - kops->ko_out_attribute_count1, - kops->ko_out_template2, - kops->ko_out_attribute_count2, - rhndl); - break; - - case KCF_OP_KEY_DERIVE: - err = KCF_PROV_NOSTORE_KEY_DERIVE(pd, kops->ko_sid, - &kops->ko_mech, kops->ko_key, - kops->ko_key_template, - kops->ko_key_attribute_count, - kops->ko_out_template1, - kops->ko_out_attribute_count1, rhndl); - break; - - default: - break; - } - break; - } - default: - break; - } /* end of switch(params->rp_opgrp) */ - - KCF_PROV_INCRSTATS(pd, err); - return (err); -} - - -/* - * Emulate the call for a multipart dual ops with 2 single steps. - * This routine is always called in the context of a working thread - * running kcf_svc_do_run(). - * The single steps are submitted in a pure synchronous way (blocking). - * When this routine returns, kcf_svc_do_run() will call kcf_aop_done() - * so the originating consumer's callback gets invoked. kcf_aop_done() - * takes care of freeing the operation context. So, this routine does - * not free the operation context. - * - * The provider descriptor is assumed held by the callers. - */ -static int -kcf_emulate_dual(kcf_provider_desc_t *pd, crypto_ctx_t *ctx, - kcf_req_params_t *params) -{ - int err = CRYPTO_ARGUMENTS_BAD; - kcf_op_type_t optype; - size_t save_len; - off_t save_offset; - - optype = params->rp_optype; - - switch (params->rp_opgrp) { - case KCF_OG_ENCRYPT_MAC: { - kcf_encrypt_mac_ops_params_t *cmops = - ¶ms->rp_u.encrypt_mac_params; - kcf_context_t *encr_kcf_ctx; - crypto_ctx_t *mac_ctx; - kcf_req_params_t encr_params; - - encr_kcf_ctx = (kcf_context_t *)(ctx->cc_framework_private); - - switch (optype) { - case KCF_OP_INIT: { - encr_kcf_ctx->kc_secondctx = NULL; - - KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_INIT, - pd->pd_sid, &cmops->em_encr_mech, - cmops->em_encr_key, NULL, NULL, - cmops->em_encr_templ); - - err = kcf_submit_request(pd, ctx, NULL, &encr_params, - B_FALSE); - - /* It can't be CRYPTO_QUEUED */ - if (err != CRYPTO_SUCCESS) { - break; - } - - err = crypto_mac_init(&cmops->em_mac_mech, - cmops->em_mac_key, cmops->em_mac_templ, - (crypto_context_t *)&mac_ctx, NULL); - - if (err == CRYPTO_SUCCESS) { - encr_kcf_ctx->kc_secondctx = (kcf_context_t *) - mac_ctx->cc_framework_private; - KCF_CONTEXT_REFHOLD((kcf_context_t *) - mac_ctx->cc_framework_private); - } - - break; - - } - case KCF_OP_UPDATE: { - crypto_dual_data_t *ct = cmops->em_ciphertext; - crypto_data_t *pt = cmops->em_plaintext; - kcf_context_t *mac_kcf_ctx = encr_kcf_ctx->kc_secondctx; - crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx; - - KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_UPDATE, - pd->pd_sid, NULL, NULL, pt, (crypto_data_t *)ct, - NULL); - - err = kcf_submit_request(pd, ctx, NULL, &encr_params, - B_FALSE); - - /* It can't be CRYPTO_QUEUED */ - if (err != CRYPTO_SUCCESS) { - break; - } - - save_offset = ct->dd_offset1; - save_len = ct->dd_len1; - if (ct->dd_len2 == 0) { - /* - * The previous encrypt step was an - * accumulation only and didn't produce any - * partial output - */ - if (ct->dd_len1 == 0) - break; - - } else { - ct->dd_offset1 = ct->dd_offset2; - ct->dd_len1 = ct->dd_len2; - } - err = crypto_mac_update((crypto_context_t)mac_ctx, - (crypto_data_t *)ct, NULL); - - ct->dd_offset1 = save_offset; - ct->dd_len1 = save_len; - - break; - } - case KCF_OP_FINAL: { - crypto_dual_data_t *ct = cmops->em_ciphertext; - crypto_data_t *mac = cmops->em_mac; - kcf_context_t *mac_kcf_ctx = encr_kcf_ctx->kc_secondctx; - crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx; - crypto_context_t mac_context = mac_ctx; - - KCF_WRAP_ENCRYPT_OPS_PARAMS(&encr_params, KCF_OP_FINAL, - pd->pd_sid, NULL, NULL, NULL, (crypto_data_t *)ct, - NULL); - - err = kcf_submit_request(pd, ctx, NULL, &encr_params, - B_FALSE); - - /* It can't be CRYPTO_QUEUED */ - if (err != CRYPTO_SUCCESS) { - crypto_cancel_ctx(mac_context); - break; - } - - if (ct->dd_len2 > 0) { - save_offset = ct->dd_offset1; - save_len = ct->dd_len1; - ct->dd_offset1 = ct->dd_offset2; - ct->dd_len1 = ct->dd_len2; - - err = crypto_mac_update(mac_context, - (crypto_data_t *)ct, NULL); - - ct->dd_offset1 = save_offset; - ct->dd_len1 = save_len; - - if (err != CRYPTO_SUCCESS) { - crypto_cancel_ctx(mac_context); - return (err); - } - } - - /* and finally, collect the MAC */ - err = crypto_mac_final(mac_context, mac, NULL); - break; - } - - default: - break; - } - KCF_PROV_INCRSTATS(pd, err); - break; - } - case KCF_OG_MAC_DECRYPT: { - kcf_mac_decrypt_ops_params_t *mdops = - ¶ms->rp_u.mac_decrypt_params; - kcf_context_t *decr_kcf_ctx; - crypto_ctx_t *mac_ctx; - kcf_req_params_t decr_params; - - decr_kcf_ctx = (kcf_context_t *)(ctx->cc_framework_private); - - switch (optype) { - case KCF_OP_INIT: { - decr_kcf_ctx->kc_secondctx = NULL; - - err = crypto_mac_init(&mdops->md_mac_mech, - mdops->md_mac_key, mdops->md_mac_templ, - (crypto_context_t *)&mac_ctx, NULL); - - /* It can't be CRYPTO_QUEUED */ - if (err != CRYPTO_SUCCESS) { - break; - } - - KCF_WRAP_DECRYPT_OPS_PARAMS(&decr_params, KCF_OP_INIT, - pd->pd_sid, &mdops->md_decr_mech, - mdops->md_decr_key, NULL, NULL, - mdops->md_decr_templ); - - err = kcf_submit_request(pd, ctx, NULL, &decr_params, - B_FALSE); - - /* It can't be CRYPTO_QUEUED */ - if (err != CRYPTO_SUCCESS) { - crypto_cancel_ctx((crypto_context_t)mac_ctx); - break; - } - - decr_kcf_ctx->kc_secondctx = (kcf_context_t *) - mac_ctx->cc_framework_private; - KCF_CONTEXT_REFHOLD((kcf_context_t *) - mac_ctx->cc_framework_private); - - break; - default: - break; - - } - case KCF_OP_UPDATE: { - crypto_dual_data_t *ct = mdops->md_ciphertext; - crypto_data_t *pt = mdops->md_plaintext; - kcf_context_t *mac_kcf_ctx = decr_kcf_ctx->kc_secondctx; - crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx; - - err = crypto_mac_update((crypto_context_t)mac_ctx, - (crypto_data_t *)ct, NULL); - - if (err != CRYPTO_SUCCESS) - break; - - save_offset = ct->dd_offset1; - save_len = ct->dd_len1; - - /* zero ct->dd_len2 means decrypt everything */ - if (ct->dd_len2 > 0) { - ct->dd_offset1 = ct->dd_offset2; - ct->dd_len1 = ct->dd_len2; - } - - err = crypto_decrypt_update((crypto_context_t)ctx, - (crypto_data_t *)ct, pt, NULL); - - ct->dd_offset1 = save_offset; - ct->dd_len1 = save_len; - - break; - } - case KCF_OP_FINAL: { - crypto_data_t *pt = mdops->md_plaintext; - crypto_data_t *mac = mdops->md_mac; - kcf_context_t *mac_kcf_ctx = decr_kcf_ctx->kc_secondctx; - crypto_ctx_t *mac_ctx = &mac_kcf_ctx->kc_glbl_ctx; - - err = crypto_mac_final((crypto_context_t)mac_ctx, - mac, NULL); - - if (err != CRYPTO_SUCCESS) { - crypto_cancel_ctx(ctx); - break; - } - - /* Get the last chunk of plaintext */ - KCF_CONTEXT_REFHOLD(decr_kcf_ctx); - err = crypto_decrypt_final((crypto_context_t)ctx, pt, - NULL); - - break; - } - } - break; - } - default: - - break; - } /* end of switch(params->rp_opgrp) */ - - return (err); -} diff --git a/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c b/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c index 2642b317d698..41705e84bc4b 100644 --- a/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c +++ b/sys/contrib/openzfs/module/icp/core/kcf_mech_tabs.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -27,7 +27,6 @@ #include <sys/crypto/common.h> #include <sys/crypto/api.h> #include <sys/crypto/impl.h> -#include <sys/modhash.h> /* Cryptographic mechanisms tables and their access functions */ @@ -55,9 +54,6 @@ /* * Locking conventions: * -------------------- - * A global mutex, kcf_mech_tabs_lock, serializes writes to the - * mechanism table via kcf_create_mech_entry(). - * * A mutex is associated with every entry of the tables. * The mutex is acquired whenever the entry is accessed for * 1) retrieving the mech_id (comparing the mech name) @@ -72,9 +68,6 @@ * long enough to justify the cost of using rwlocks, so the per-mechanism * entry mutex won't be very *hot*. * - * When both kcf_mech_tabs_lock and a mech_entry mutex need to be held, - * kcf_mech_tabs_lock must always be acquired first. - * */ /* Mechanisms tables */ @@ -82,79 +75,33 @@ /* RFE 4687834 Will deal with the extensibility of these tables later */ -kcf_mech_entry_t kcf_digest_mechs_tab[KCF_MAXDIGEST]; -kcf_mech_entry_t kcf_cipher_mechs_tab[KCF_MAXCIPHER]; -kcf_mech_entry_t kcf_mac_mechs_tab[KCF_MAXMAC]; -kcf_mech_entry_t kcf_sign_mechs_tab[KCF_MAXSIGN]; -kcf_mech_entry_t kcf_keyops_mechs_tab[KCF_MAXKEYOPS]; -kcf_mech_entry_t kcf_misc_mechs_tab[KCF_MAXMISC]; +static kcf_mech_entry_t kcf_digest_mechs_tab[KCF_MAXDIGEST]; +static kcf_mech_entry_t kcf_cipher_mechs_tab[KCF_MAXCIPHER]; +static kcf_mech_entry_t kcf_mac_mechs_tab[KCF_MAXMAC]; -kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = { +const kcf_mech_entry_tab_t kcf_mech_tabs_tab[KCF_LAST_OPSCLASS + 1] = { {0, NULL}, /* No class zero */ {KCF_MAXDIGEST, kcf_digest_mechs_tab}, {KCF_MAXCIPHER, kcf_cipher_mechs_tab}, {KCF_MAXMAC, kcf_mac_mechs_tab}, - {KCF_MAXSIGN, kcf_sign_mechs_tab}, - {KCF_MAXKEYOPS, kcf_keyops_mechs_tab}, - {KCF_MAXMISC, kcf_misc_mechs_tab} }; -/* - * Per-algorithm internal thresholds for the minimum input size of before - * offloading to hardware provider. - * Dispatching a crypto operation to a hardware provider entails paying the - * cost of an additional context switch. Measurements with Sun Accelerator 4000 - * shows that 512-byte jobs or smaller are better handled in software. - * There is room for refinement here. - * - */ -int kcf_md5_threshold = 512; -int kcf_sha1_threshold = 512; -int kcf_des_threshold = 512; -int kcf_des3_threshold = 512; -int kcf_aes_threshold = 512; -int kcf_bf_threshold = 512; -int kcf_rc4_threshold = 512; - -kmutex_t kcf_mech_tabs_lock; -static uint32_t kcf_gen_swprov = 0; - -int kcf_mech_hash_size = 256; -mod_hash_t *kcf_mech_hash; /* mech name to id hash */ - -static crypto_mech_type_t -kcf_mech_hash_find(char *mechname) -{ - mod_hash_val_t hv; - crypto_mech_type_t mt; - - mt = CRYPTO_MECH_INVALID; - if (mod_hash_find(kcf_mech_hash, (mod_hash_key_t)mechname, &hv) == 0) { - mt = *(crypto_mech_type_t *)hv; - ASSERT(mt != CRYPTO_MECH_INVALID); - } +static avl_tree_t kcf_mech_hash; - return (mt); +static int +kcf_mech_hash_compar(const void *lhs, const void *rhs) +{ + const kcf_mech_entry_t *l = lhs, *r = rhs; + int cmp = strncmp(l->me_name, r->me_name, CRYPTO_MAX_MECH_NAME); + return ((0 < cmp) - (cmp < 0)); } void kcf_destroy_mech_tabs(void) { - int i, max; - kcf_ops_class_t class; - kcf_mech_entry_t *me_tab; - - if (kcf_mech_hash) - mod_hash_destroy_hash(kcf_mech_hash); - - mutex_destroy(&kcf_mech_tabs_lock); - - for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) { - max = kcf_mech_tabs_tab[class].met_size; - me_tab = kcf_mech_tabs_tab[class].met_tab; - for (i = 0; i < max; i++) - mutex_destroy(&(me_tab[i].me_mutex)); - } + for (void *cookie = NULL; avl_destroy_nodes(&kcf_mech_hash, &cookie); ) + ; + avl_destroy(&kcf_mech_hash); } /* @@ -166,102 +113,8 @@ kcf_destroy_mech_tabs(void) void kcf_init_mech_tabs(void) { - int i, max; - kcf_ops_class_t class; - kcf_mech_entry_t *me_tab; - - /* Initializes the mutex locks. */ - - mutex_init(&kcf_mech_tabs_lock, NULL, MUTEX_DEFAULT, NULL); - - /* Then the pre-defined mechanism entries */ - - /* Two digests */ - (void) strncpy(kcf_digest_mechs_tab[0].me_name, SUN_CKM_MD5, - CRYPTO_MAX_MECH_NAME); - kcf_digest_mechs_tab[0].me_threshold = kcf_md5_threshold; - - (void) strncpy(kcf_digest_mechs_tab[1].me_name, SUN_CKM_SHA1, - CRYPTO_MAX_MECH_NAME); - kcf_digest_mechs_tab[1].me_threshold = kcf_sha1_threshold; - - /* The symmetric ciphers in various modes */ - (void) strncpy(kcf_cipher_mechs_tab[0].me_name, SUN_CKM_DES_CBC, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[0].me_threshold = kcf_des_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[1].me_name, SUN_CKM_DES3_CBC, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[1].me_threshold = kcf_des3_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[2].me_name, SUN_CKM_DES_ECB, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[2].me_threshold = kcf_des_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[3].me_name, SUN_CKM_DES3_ECB, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[3].me_threshold = kcf_des3_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[4].me_name, SUN_CKM_BLOWFISH_CBC, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[4].me_threshold = kcf_bf_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[5].me_name, SUN_CKM_BLOWFISH_ECB, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[5].me_threshold = kcf_bf_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[6].me_name, SUN_CKM_AES_CBC, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[6].me_threshold = kcf_aes_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[7].me_name, SUN_CKM_AES_ECB, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[7].me_threshold = kcf_aes_threshold; - - (void) strncpy(kcf_cipher_mechs_tab[8].me_name, SUN_CKM_RC4, - CRYPTO_MAX_MECH_NAME); - kcf_cipher_mechs_tab[8].me_threshold = kcf_rc4_threshold; - - - /* 4 HMACs */ - (void) strncpy(kcf_mac_mechs_tab[0].me_name, SUN_CKM_MD5_HMAC, - CRYPTO_MAX_MECH_NAME); - kcf_mac_mechs_tab[0].me_threshold = kcf_md5_threshold; - - (void) strncpy(kcf_mac_mechs_tab[1].me_name, SUN_CKM_MD5_HMAC_GENERAL, - CRYPTO_MAX_MECH_NAME); - kcf_mac_mechs_tab[1].me_threshold = kcf_md5_threshold; - - (void) strncpy(kcf_mac_mechs_tab[2].me_name, SUN_CKM_SHA1_HMAC, - CRYPTO_MAX_MECH_NAME); - kcf_mac_mechs_tab[2].me_threshold = kcf_sha1_threshold; - - (void) strncpy(kcf_mac_mechs_tab[3].me_name, SUN_CKM_SHA1_HMAC_GENERAL, - CRYPTO_MAX_MECH_NAME); - kcf_mac_mechs_tab[3].me_threshold = kcf_sha1_threshold; - - - /* 1 random number generation pseudo mechanism */ - (void) strncpy(kcf_misc_mechs_tab[0].me_name, SUN_RANDOM, - CRYPTO_MAX_MECH_NAME); - - kcf_mech_hash = mod_hash_create_strhash_nodtr("kcf mech2id hash", - kcf_mech_hash_size, mod_hash_null_valdtor); - - for (class = KCF_FIRST_OPSCLASS; class <= KCF_LAST_OPSCLASS; class++) { - max = kcf_mech_tabs_tab[class].met_size; - me_tab = kcf_mech_tabs_tab[class].met_tab; - for (i = 0; i < max; i++) { - mutex_init(&(me_tab[i].me_mutex), NULL, - MUTEX_DEFAULT, NULL); - if (me_tab[i].me_name[0] != 0) { - me_tab[i].me_mechid = KCF_MECHID(class, i); - (void) mod_hash_insert(kcf_mech_hash, - (mod_hash_key_t)me_tab[i].me_name, - (mod_hash_val_t)&(me_tab[i].me_mechid)); - } - } - } + avl_create(&kcf_mech_hash, kcf_mech_hash_compar, + sizeof (kcf_mech_entry_t), offsetof(kcf_mech_entry_t, me_node)); } /* @@ -291,12 +144,8 @@ kcf_init_mech_tabs(void) * KCF_SUCCESS otherwise. */ static int -kcf_create_mech_entry(kcf_ops_class_t class, char *mechname) +kcf_create_mech_entry(kcf_ops_class_t class, const char *mechname) { - crypto_mech_type_t mt; - kcf_mech_entry_t *me_tab; - int i = 0, size; - if ((class < KCF_FIRST_OPSCLASS) || (class > KCF_LAST_OPSCLASS)) return (KCF_INVALID_MECH_CLASS); @@ -306,49 +155,28 @@ kcf_create_mech_entry(kcf_ops_class_t class, char *mechname) * First check if the mechanism is already in one of the tables. * The mech_entry could be in another class. */ - mutex_enter(&kcf_mech_tabs_lock); - mt = kcf_mech_hash_find(mechname); - if (mt != CRYPTO_MECH_INVALID) { - /* Nothing to do, regardless the suggested class. */ - mutex_exit(&kcf_mech_tabs_lock); + avl_index_t where = 0; + kcf_mech_entry_t tmptab; + strlcpy(tmptab.me_name, mechname, CRYPTO_MAX_MECH_NAME); + if (avl_find(&kcf_mech_hash, &tmptab, &where) != NULL) return (KCF_SUCCESS); - } /* Now take the next unused mech entry in the class's tab */ - me_tab = kcf_mech_tabs_tab[class].met_tab; - size = kcf_mech_tabs_tab[class].met_size; + kcf_mech_entry_t *me_tab = kcf_mech_tabs_tab[class].met_tab; + int size = kcf_mech_tabs_tab[class].met_size; - while (i < size) { - mutex_enter(&(me_tab[i].me_mutex)); + for (int i = 0; i < size; ++i) if (me_tab[i].me_name[0] == 0) { /* Found an empty spot */ - (void) strlcpy(me_tab[i].me_name, mechname, + strlcpy(me_tab[i].me_name, mechname, CRYPTO_MAX_MECH_NAME); - me_tab[i].me_name[CRYPTO_MAX_MECH_NAME-1] = '\0'; me_tab[i].me_mechid = KCF_MECHID(class, i); - /* - * No a-priori information about the new mechanism, so - * the threshold is set to zero. - */ - me_tab[i].me_threshold = 0; - mutex_exit(&(me_tab[i].me_mutex)); /* Add the new mechanism to the hash table */ - (void) mod_hash_insert(kcf_mech_hash, - (mod_hash_key_t)me_tab[i].me_name, - (mod_hash_val_t)&(me_tab[i].me_mechid)); - break; + avl_insert(&kcf_mech_hash, &me_tab[i], where); + return (KCF_SUCCESS); } - mutex_exit(&(me_tab[i].me_mutex)); - i++; - } - mutex_exit(&kcf_mech_tabs_lock); - - if (i == size) { - return (KCF_MECH_TAB_FULL); - } - - return (KCF_SUCCESS); + return (KCF_MECH_TAB_FULL); } /* @@ -376,16 +204,9 @@ kcf_add_mech_provider(short mech_indx, { int error; kcf_mech_entry_t *mech_entry = NULL; - crypto_mech_info_t *mech_info; - crypto_mech_type_t kcf_mech_type, mt; - kcf_prov_mech_desc_t *prov_mech, *prov_mech2; - crypto_func_group_t simple_fg_mask, dual_fg_mask; - crypto_mech_info_t *dmi; - crypto_mech_info_list_t *mil, *mil2; - kcf_mech_entry_t *me; - int i; - - ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); + const crypto_mech_info_t *mech_info; + crypto_mech_type_t kcf_mech_type; + kcf_prov_mech_desc_t *prov_mech; mech_info = &prov_desc->pd_mechanisms[mech_indx]; @@ -394,7 +215,7 @@ kcf_add_mech_provider(short mech_indx, * Find the class corresponding to the function group flag of * the mechanism. */ - kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name); + kcf_mech_type = crypto_mech2id(mech_info->cm_mech_name); if (kcf_mech_type == CRYPTO_MECH_INVALID) { crypto_func_group_t fg = mech_info->cm_func_group_mask; kcf_ops_class_t class; @@ -407,19 +228,8 @@ kcf_add_mech_provider(short mech_indx, class = KCF_CIPHER_CLASS; else if (fg & CRYPTO_FG_MAC || fg & CRYPTO_FG_MAC_ATOMIC) class = KCF_MAC_CLASS; - else if (fg & CRYPTO_FG_SIGN || fg & CRYPTO_FG_VERIFY || - fg & CRYPTO_FG_SIGN_ATOMIC || - fg & CRYPTO_FG_VERIFY_ATOMIC || - fg & CRYPTO_FG_SIGN_RECOVER || - fg & CRYPTO_FG_VERIFY_RECOVER) - class = KCF_SIGN_CLASS; - else if (fg & CRYPTO_FG_GENERATE || - fg & CRYPTO_FG_GENERATE_KEY_PAIR || - fg & CRYPTO_FG_WRAP || fg & CRYPTO_FG_UNWRAP || - fg & CRYPTO_FG_DERIVE) - class = KCF_KEYOPS_CLASS; else - class = KCF_MISC_CLASS; + __builtin_unreachable(); /* * Attempt to create a new mech_entry for the specified @@ -431,7 +241,7 @@ kcf_add_mech_provider(short mech_indx, return (error); } /* get the KCF mech type that was assigned to the mechanism */ - kcf_mech_type = kcf_mech_hash_find(mech_info->cm_mech_name); + kcf_mech_type = crypto_mech2id(mech_info->cm_mech_name); ASSERT(kcf_mech_type != CRYPTO_MECH_INVALID); } @@ -440,7 +250,8 @@ kcf_add_mech_provider(short mech_indx, /* allocate and initialize new kcf_prov_mech_desc */ prov_mech = kmem_zalloc(sizeof (kcf_prov_mech_desc_t), KM_SLEEP); - bcopy(mech_info, &prov_mech->pm_mech_info, sizeof (crypto_mech_info_t)); + memcpy(&prov_mech->pm_mech_info, mech_info, + sizeof (crypto_mech_info_t)); prov_mech->pm_prov_desc = prov_desc; prov_desc->pd_mech_indx[KCF_MECH2CLASS(kcf_mech_type)] [KCF_MECH2INDEX(kcf_mech_type)] = mech_indx; @@ -448,142 +259,32 @@ kcf_add_mech_provider(short mech_indx, KCF_PROV_REFHOLD(prov_desc); KCF_PROV_IREFHOLD(prov_desc); - dual_fg_mask = mech_info->cm_func_group_mask & CRYPTO_FG_DUAL_MASK; - - if (dual_fg_mask == ((crypto_func_group_t)0)) - goto add_entry; - - simple_fg_mask = (mech_info->cm_func_group_mask & - CRYPTO_FG_SIMPLEOP_MASK) | CRYPTO_FG_RANDOM; - - for (i = 0; i < prov_desc->pd_mech_list_count; i++) { - dmi = &prov_desc->pd_mechanisms[i]; - - /* skip self */ - if (dmi->cm_mech_number == mech_info->cm_mech_number) - continue; - - /* skip if not a dual operation mechanism */ - if (!(dmi->cm_func_group_mask & dual_fg_mask) || - (dmi->cm_func_group_mask & simple_fg_mask)) - continue; - - mt = kcf_mech_hash_find(dmi->cm_mech_name); - if (mt == CRYPTO_MECH_INVALID) - continue; - - if (kcf_get_mech_entry(mt, &me) != KCF_SUCCESS) - continue; - - mil = kmem_zalloc(sizeof (*mil), KM_SLEEP); - mil2 = kmem_zalloc(sizeof (*mil2), KM_SLEEP); - - /* - * Ignore hard-coded entries in the mech table - * if the provider hasn't registered. - */ - mutex_enter(&me->me_mutex); - if (me->me_hw_prov_chain == NULL && me->me_sw_prov == NULL) { - mutex_exit(&me->me_mutex); - kmem_free(mil, sizeof (*mil)); - kmem_free(mil2, sizeof (*mil2)); - continue; - } - - /* - * Add other dual mechanisms that have registered - * with the framework to this mechanism's - * cross-reference list. - */ - mil->ml_mech_info = *dmi; /* struct assignment */ - mil->ml_kcf_mechid = mt; - - /* add to head of list */ - mil->ml_next = prov_mech->pm_mi_list; - prov_mech->pm_mi_list = mil; - - if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER) - prov_mech2 = me->me_hw_prov_chain; - else - prov_mech2 = me->me_sw_prov; - - if (prov_mech2 == NULL) { - kmem_free(mil2, sizeof (*mil2)); - mutex_exit(&me->me_mutex); - continue; - } - - /* - * Update all other cross-reference lists by - * adding this new mechanism. - */ - while (prov_mech2 != NULL) { - if (prov_mech2->pm_prov_desc == prov_desc) { - /* struct assignment */ - mil2->ml_mech_info = *mech_info; - mil2->ml_kcf_mechid = kcf_mech_type; - - /* add to head of list */ - mil2->ml_next = prov_mech2->pm_mi_list; - prov_mech2->pm_mi_list = mil2; - break; - } - prov_mech2 = prov_mech2->pm_next; - } - if (prov_mech2 == NULL) - kmem_free(mil2, sizeof (*mil2)); - - mutex_exit(&me->me_mutex); - } - -add_entry: /* * Add new kcf_prov_mech_desc at the front of HW providers * chain. */ - switch (prov_desc->pd_prov_type) { - - case CRYPTO_HW_PROVIDER: - mutex_enter(&mech_entry->me_mutex); - prov_mech->pm_me = mech_entry; - prov_mech->pm_next = mech_entry->me_hw_prov_chain; - mech_entry->me_hw_prov_chain = prov_mech; - mech_entry->me_num_hwprov++; - mutex_exit(&mech_entry->me_mutex); - break; - - case CRYPTO_SW_PROVIDER: - mutex_enter(&mech_entry->me_mutex); - if (mech_entry->me_sw_prov != NULL) { - /* - * There is already a SW provider for this mechanism. - * Since we allow only one SW provider per mechanism, - * report this condition. - */ - cmn_err(CE_WARN, "The cryptographic software provider " - "\"%s\" will not be used for %s. The provider " - "\"%s\" will be used for this mechanism " - "instead.", prov_desc->pd_description, - mech_info->cm_mech_name, - mech_entry->me_sw_prov->pm_prov_desc-> - pd_description); - KCF_PROV_REFRELE(prov_desc); - kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t)); - prov_mech = NULL; - } else { - /* - * Set the provider as the software provider for - * this mechanism. - */ - mech_entry->me_sw_prov = prov_mech; - - /* We'll wrap around after 4 billion registrations! */ - mech_entry->me_gen_swprov = kcf_gen_swprov++; - } - mutex_exit(&mech_entry->me_mutex); - break; - default: - break; + if (mech_entry->me_sw_prov != NULL) { + /* + * There is already a provider for this mechanism. + * Since we allow only one provider per mechanism, + * report this condition. + */ + cmn_err(CE_WARN, "The cryptographic provider " + "\"%s\" will not be used for %s. The provider " + "\"%s\" will be used for this mechanism " + "instead.", prov_desc->pd_description, + mech_info->cm_mech_name, + mech_entry->me_sw_prov->pm_prov_desc-> + pd_description); + KCF_PROV_REFRELE(prov_desc); + kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t)); + prov_mech = NULL; + } else { + /* + * Set the provider as the provider for + * this mechanism. + */ + mech_entry->me_sw_prov = prov_mech; } *pmdpp = prov_mech; @@ -607,18 +308,14 @@ add_entry: * User context only. */ void -kcf_remove_mech_provider(char *mech_name, kcf_provider_desc_t *prov_desc) +kcf_remove_mech_provider(const char *mech_name, kcf_provider_desc_t *prov_desc) { crypto_mech_type_t mech_type; - kcf_prov_mech_desc_t *prov_mech = NULL, *prov_chain; - kcf_prov_mech_desc_t **prev_entry_next; + kcf_prov_mech_desc_t *prov_mech = NULL; kcf_mech_entry_t *mech_entry; - crypto_mech_info_list_t *mil, *mil2, *next, **prev_next; - - ASSERT(prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER); /* get the KCF mech type that was assigned to the mechanism */ - if ((mech_type = kcf_mech_hash_find(mech_name)) == + if ((mech_type = crypto_mech2id(mech_name)) == CRYPTO_MECH_INVALID) { /* * Provider was not allowed for this mech due to policy or @@ -636,92 +333,17 @@ kcf_remove_mech_provider(char *mech_name, kcf_provider_desc_t *prov_desc) return; } - mutex_enter(&mech_entry->me_mutex); - - switch (prov_desc->pd_prov_type) { - - case CRYPTO_HW_PROVIDER: - /* find the provider in the mech_entry chain */ - prev_entry_next = &mech_entry->me_hw_prov_chain; - prov_mech = mech_entry->me_hw_prov_chain; - while (prov_mech != NULL && - prov_mech->pm_prov_desc != prov_desc) { - prev_entry_next = &prov_mech->pm_next; - prov_mech = prov_mech->pm_next; - } - - if (prov_mech == NULL) { - /* entry not found, simply return */ - mutex_exit(&mech_entry->me_mutex); - return; - } - - /* remove provider entry from mech_entry chain */ - *prev_entry_next = prov_mech->pm_next; - ASSERT(mech_entry->me_num_hwprov > 0); - mech_entry->me_num_hwprov--; - break; - - case CRYPTO_SW_PROVIDER: - if (mech_entry->me_sw_prov == NULL || - mech_entry->me_sw_prov->pm_prov_desc != prov_desc) { - /* not the software provider for this mechanism */ - mutex_exit(&mech_entry->me_mutex); - return; - } - prov_mech = mech_entry->me_sw_prov; - mech_entry->me_sw_prov = NULL; - break; - default: - /* unexpected crypto_provider_type_t */ - mutex_exit(&mech_entry->me_mutex); + if (mech_entry->me_sw_prov == NULL || + mech_entry->me_sw_prov->pm_prov_desc != prov_desc) { + /* not the provider for this mechanism */ return; } - - mutex_exit(&mech_entry->me_mutex); - - /* Free the dual ops cross-reference lists */ - mil = prov_mech->pm_mi_list; - while (mil != NULL) { - next = mil->ml_next; - if (kcf_get_mech_entry(mil->ml_kcf_mechid, - &mech_entry) != KCF_SUCCESS) { - mil = next; - continue; - } - - mutex_enter(&mech_entry->me_mutex); - if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER) - prov_chain = mech_entry->me_hw_prov_chain; - else - prov_chain = mech_entry->me_sw_prov; - - while (prov_chain != NULL) { - if (prov_chain->pm_prov_desc == prov_desc) { - prev_next = &prov_chain->pm_mi_list; - mil2 = prov_chain->pm_mi_list; - while (mil2 != NULL && - mil2->ml_kcf_mechid != mech_type) { - prev_next = &mil2->ml_next; - mil2 = mil2->ml_next; - } - if (mil2 != NULL) { - *prev_next = mil2->ml_next; - kmem_free(mil2, sizeof (*mil2)); - } - break; - } - prov_chain = prov_chain->pm_next; - } - - mutex_exit(&mech_entry->me_mutex); - kmem_free(mil, sizeof (crypto_mech_info_list_t)); - mil = next; - } + prov_mech = mech_entry->me_sw_prov; + mech_entry->me_sw_prov = NULL; /* free entry */ - KCF_PROV_REFRELE(prov_mech->pm_prov_desc); KCF_PROV_IREFRELE(prov_mech->pm_prov_desc); + KCF_PROV_REFRELE(prov_mech->pm_prov_desc); kmem_free(prov_mech, sizeof (kcf_prov_mech_desc_t)); } @@ -747,7 +369,7 @@ kcf_get_mech_entry(crypto_mech_type_t mech_type, kcf_mech_entry_t **mep) { kcf_ops_class_t class; int index; - kcf_mech_entry_tab_t *me_tab; + const kcf_mech_entry_tab_t *me_tab; ASSERT(mep != NULL); @@ -770,22 +392,44 @@ kcf_get_mech_entry(crypto_mech_type_t mech_type, kcf_mech_entry_t **mep) return (KCF_SUCCESS); } -/* CURRENTLY UNSUPPORTED: attempting to load the module if it isn't found */ +/* + * crypto_mech2id() + * + * Arguments: + * . mechname: A null-terminated string identifying the mechanism name. + * + * Description: + * Walks the mechanisms tables, looking for an entry that matches the + * mechname. Once it find it, it builds the 64-bit mech_type and returns + * it. + * + * Context: + * Process and interruption. + * + * Returns: + * The unique mechanism identified by 'mechname', if found. + * CRYPTO_MECH_INVALID otherwise. + */ /* * Lookup the hash table for an entry that matches the mechname. - * If there are no hardware or software providers for the mechanism, - * but there is an unloaded software provider, this routine will attempt + * If there are no providers for the mechanism, + * but there is an unloaded provider, this routine will attempt * to load it. - * - * If the MOD_NOAUTOUNLOAD flag is not set, a software provider is - * in constant danger of being unloaded. For consumers that call - * crypto_mech2id() only once, the provider will not be reloaded - * if it becomes unloaded. If a provider gets loaded elsewhere - * without the MOD_NOAUTOUNLOAD flag being set, we set it now. */ crypto_mech_type_t -crypto_mech2id_common(char *mechname, boolean_t load_module) +crypto_mech2id(const char *mechname) { - crypto_mech_type_t mt = kcf_mech_hash_find(mechname); - return (mt); + kcf_mech_entry_t tmptab, *found; + strlcpy(tmptab.me_name, mechname, CRYPTO_MAX_MECH_NAME); + + if ((found = avl_find(&kcf_mech_hash, &tmptab, NULL))) { + ASSERT(found->me_mechid != CRYPTO_MECH_INVALID); + return (found->me_mechid); + } + + return (CRYPTO_MECH_INVALID); } + +#if defined(_KERNEL) +EXPORT_SYMBOL(crypto_mech2id); +#endif diff --git a/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c b/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c index 1b115d976232..9dca3882e174 100644 --- a/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c +++ b/sys/contrib/openzfs/module/icp/core/kcf_prov_lib.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -33,12 +33,11 @@ */ /* - * Utility routine to apply the command, 'cmd', to the + * Utility routine to apply the command COPY_TO_DATA to the * data in the uio structure. */ -int -crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd, - void *digest_ctx, void (*update)(void)) +static int +crypto_uio_copy_to_data(crypto_data_t *data, uchar_t *buf, int len) { zfs_uio_t *uiop = data->cd_uio; off_t offset = data->cd_offset; @@ -71,26 +70,8 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd, offset, length); datap = (uchar_t *)(zfs_uio_iovbase(uiop, vec_idx) + offset); - switch (cmd) { - case COPY_FROM_DATA: - bcopy(datap, buf, cur_len); - buf += cur_len; - break; - case COPY_TO_DATA: - bcopy(buf, datap, cur_len); - buf += cur_len; - break; - case COMPARE_TO_DATA: - if (bcmp(datap, buf, cur_len)) - return (CRYPTO_SIGNATURE_INVALID); - buf += cur_len; - break; - case MD5_DIGEST_DATA: - case SHA1_DIGEST_DATA: - case SHA2_DIGEST_DATA: - case GHASH_DATA: - return (CRYPTO_ARGUMENTS_BAD); - } + memcpy(datap, buf, cur_len); + buf += cur_len; length -= cur_len; vec_idx++; @@ -99,16 +80,11 @@ crypto_uio_data(crypto_data_t *data, uchar_t *buf, int len, cmd_type_t cmd, if (vec_idx == zfs_uio_iovcnt(uiop) && length > 0) { /* - * The end of the specified iovec's was reached but + * The end of the specified iovecs was reached but * the length requested could not be processed. */ - switch (cmd) { - case COPY_TO_DATA: - data->cd_length = len; - return (CRYPTO_BUFFER_TOO_SMALL); - default: - return (CRYPTO_DATA_LEN_RANGE); - } + data->cd_length = len; + return (CRYPTO_BUFFER_TOO_SMALL); } return (CRYPTO_SUCCESS); @@ -123,13 +99,12 @@ crypto_put_output_data(uchar_t *buf, crypto_data_t *output, int len) output->cd_length = len; return (CRYPTO_BUFFER_TOO_SMALL); } - bcopy(buf, (uchar_t *)(output->cd_raw.iov_base + - output->cd_offset), len); + memcpy((uchar_t *)(output->cd_raw.iov_base + + output->cd_offset), buf, len); break; case CRYPTO_DATA_UIO: - return (crypto_uio_data(output, buf, len, - COPY_TO_DATA, NULL, NULL)); + return (crypto_uio_copy_to_data(output, buf, len)); default: return (CRYPTO_ARGUMENTS_BAD); } @@ -139,33 +114,21 @@ crypto_put_output_data(uchar_t *buf, crypto_data_t *output, int len) int crypto_update_iov(void *ctx, crypto_data_t *input, crypto_data_t *output, - int (*cipher)(void *, caddr_t, size_t, crypto_data_t *), - void (*copy_block)(uint8_t *, uint64_t *)) + int (*cipher)(void *, caddr_t, size_t, crypto_data_t *)) { - common_ctx_t *common_ctx = ctx; - int rv; - ASSERT(input != output); - if (input->cd_miscdata != NULL) { - copy_block((uint8_t *)input->cd_miscdata, - &common_ctx->cc_iv[0]); - } if (input->cd_raw.iov_len < input->cd_length) return (CRYPTO_ARGUMENTS_BAD); - rv = (cipher)(ctx, input->cd_raw.iov_base + input->cd_offset, - input->cd_length, output); - - return (rv); + return ((cipher)(ctx, input->cd_raw.iov_base + input->cd_offset, + input->cd_length, output)); } int crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output, - int (*cipher)(void *, caddr_t, size_t, crypto_data_t *), - void (*copy_block)(uint8_t *, uint64_t *)) + int (*cipher)(void *, caddr_t, size_t, crypto_data_t *)) { - common_ctx_t *common_ctx = ctx; zfs_uio_t *uiop = input->cd_uio; off_t offset = input->cd_offset; size_t length = input->cd_length; @@ -173,10 +136,6 @@ crypto_update_uio(void *ctx, crypto_data_t *input, crypto_data_t *output, size_t cur_len; ASSERT(input != output); - if (input->cd_miscdata != NULL) { - copy_block((uint8_t *)input->cd_miscdata, - &common_ctx->cc_iv[0]); - } if (zfs_uio_segflg(input->cd_uio) != UIO_SYSSPACE) { return (CRYPTO_ARGUMENTS_BAD); diff --git a/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c b/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c index 9d303d022517..93af61a235d0 100644 --- a/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c +++ b/sys/contrib/openzfs/module/icp/core/kcf_prov_tabs.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -45,7 +45,7 @@ #include <sys/crypto/sched_impl.h> #include <sys/crypto/spi.h> -#define KCF_MAX_PROVIDERS 512 /* max number of providers */ +#define KCF_MAX_PROVIDERS 8 /* max number of providers */ /* * Prov_tab is an array of providers which is updated when @@ -59,33 +59,25 @@ * * prov_tab entries are not updated from kcf.conf or by cryptoadm(1M). */ -static kcf_provider_desc_t **prov_tab = NULL; +static kcf_provider_desc_t *prov_tab[KCF_MAX_PROVIDERS]; static kmutex_t prov_tab_mutex; /* ensure exclusive access to the table */ static uint_t prov_tab_num = 0; /* number of providers in table */ -static uint_t prov_tab_max = KCF_MAX_PROVIDERS; void kcf_prov_tab_destroy(void) { mutex_destroy(&prov_tab_mutex); - - if (prov_tab) - kmem_free(prov_tab, prov_tab_max * - sizeof (kcf_provider_desc_t *)); } /* * Initialize a mutex and the KCF providers table, prov_tab. - * The providers table is dynamically allocated with prov_tab_max entries. + * The providers table is dynamically allocated with KCF_MAX_PROVIDERS entries. * Called from kcf module _init(). */ void kcf_prov_tab_init(void) { mutex_init(&prov_tab_mutex, NULL, MUTEX_DEFAULT, NULL); - - prov_tab = kmem_zalloc(prov_tab_max * sizeof (kcf_provider_desc_t *), - KM_SLEEP); } /* @@ -101,8 +93,6 @@ kcf_prov_tab_add_provider(kcf_provider_desc_t *prov_desc) { uint_t i; - ASSERT(prov_tab != NULL); - mutex_enter(&prov_tab_mutex); /* find free slot in providers table */ @@ -146,9 +136,6 @@ kcf_prov_tab_rem_provider(crypto_provider_id_t prov_id) { kcf_provider_desc_t *prov_desc; - ASSERT(prov_tab != NULL); - ASSERT(prov_tab_num >= 0); - /* * Validate provider id, since it can be specified by a 3rd-party * provider. @@ -171,8 +158,8 @@ kcf_prov_tab_rem_provider(crypto_provider_id_t prov_id) * at that time. */ - KCF_PROV_REFRELE(prov_desc); KCF_PROV_IREFRELE(prov_desc); + KCF_PROV_REFRELE(prov_desc); return (CRYPTO_SUCCESS); } @@ -204,91 +191,6 @@ kcf_prov_tab_lookup(crypto_provider_id_t prov_id) return (prov_desc); } -static void -allocate_ops_v1(crypto_ops_t *src, crypto_ops_t *dst, uint_t *mech_list_count) -{ - if (src->co_control_ops != NULL) - dst->co_control_ops = kmem_alloc(sizeof (crypto_control_ops_t), - KM_SLEEP); - - if (src->co_digest_ops != NULL) - dst->co_digest_ops = kmem_alloc(sizeof (crypto_digest_ops_t), - KM_SLEEP); - - if (src->co_cipher_ops != NULL) - dst->co_cipher_ops = kmem_alloc(sizeof (crypto_cipher_ops_t), - KM_SLEEP); - - if (src->co_mac_ops != NULL) - dst->co_mac_ops = kmem_alloc(sizeof (crypto_mac_ops_t), - KM_SLEEP); - - if (src->co_sign_ops != NULL) - dst->co_sign_ops = kmem_alloc(sizeof (crypto_sign_ops_t), - KM_SLEEP); - - if (src->co_verify_ops != NULL) - dst->co_verify_ops = kmem_alloc(sizeof (crypto_verify_ops_t), - KM_SLEEP); - - if (src->co_dual_ops != NULL) - dst->co_dual_ops = kmem_alloc(sizeof (crypto_dual_ops_t), - KM_SLEEP); - - if (src->co_dual_cipher_mac_ops != NULL) - dst->co_dual_cipher_mac_ops = kmem_alloc( - sizeof (crypto_dual_cipher_mac_ops_t), KM_SLEEP); - - if (src->co_random_ops != NULL) { - dst->co_random_ops = kmem_alloc( - sizeof (crypto_random_number_ops_t), KM_SLEEP); - - /* - * Allocate storage to store the array of supported mechanisms - * specified by provider. We allocate extra mechanism storage - * if the provider has random_ops since we keep an internal - * mechanism, SUN_RANDOM, in this case. - */ - (*mech_list_count)++; - } - - if (src->co_session_ops != NULL) - dst->co_session_ops = kmem_alloc(sizeof (crypto_session_ops_t), - KM_SLEEP); - - if (src->co_object_ops != NULL) - dst->co_object_ops = kmem_alloc(sizeof (crypto_object_ops_t), - KM_SLEEP); - - if (src->co_key_ops != NULL) - dst->co_key_ops = kmem_alloc(sizeof (crypto_key_ops_t), - KM_SLEEP); - - if (src->co_provider_ops != NULL) - dst->co_provider_ops = kmem_alloc( - sizeof (crypto_provider_management_ops_t), KM_SLEEP); - - if (src->co_ctx_ops != NULL) - dst->co_ctx_ops = kmem_alloc(sizeof (crypto_ctx_ops_t), - KM_SLEEP); -} - -static void -allocate_ops_v2(crypto_ops_t *src, crypto_ops_t *dst) -{ - if (src->co_mech_ops != NULL) - dst->co_mech_ops = kmem_alloc(sizeof (crypto_mech_ops_t), - KM_SLEEP); -} - -static void -allocate_ops_v3(crypto_ops_t *src, crypto_ops_t *dst) -{ - if (src->co_nostore_key_ops != NULL) - dst->co_nostore_key_ops = - kmem_alloc(sizeof (crypto_nostore_key_ops_t), KM_SLEEP); -} - /* * Allocate a provider descriptor. mech_list_count specifies the * number of mechanisms supported by the providers, and is used @@ -297,61 +199,19 @@ allocate_ops_v3(crypto_ops_t *src, crypto_ops_t *dst) * since it is invoked from user context during provider registration. */ kcf_provider_desc_t * -kcf_alloc_provider_desc(crypto_provider_info_t *info) +kcf_alloc_provider_desc(void) { - int i, j; - kcf_provider_desc_t *desc; - uint_t mech_list_count = info->pi_mech_list_count; - crypto_ops_t *src_ops = info->pi_ops_vector; - - desc = kmem_zalloc(sizeof (kcf_provider_desc_t), KM_SLEEP); + kcf_provider_desc_t *desc = + kmem_zalloc(sizeof (kcf_provider_desc_t), KM_SLEEP); - /* - * pd_description serves two purposes - * - Appears as a blank padded PKCS#11 style string, that will be - * returned to applications in CK_SLOT_INFO.slotDescription. - * This means that we should not have a null character in the - * first CRYPTO_PROVIDER_DESCR_MAX_LEN bytes. - * - Appears as a null-terminated string that can be used by - * other kcf routines. - * - * So, we allocate enough room for one extra null terminator - * which keeps every one happy. - */ - desc->pd_description = kmem_alloc(CRYPTO_PROVIDER_DESCR_MAX_LEN + 1, - KM_SLEEP); - (void) memset(desc->pd_description, ' ', - CRYPTO_PROVIDER_DESCR_MAX_LEN); - desc->pd_description[CRYPTO_PROVIDER_DESCR_MAX_LEN] = '\0'; - - /* - * Since the framework does not require the ops vector specified - * by the providers during registration to be persistent, - * KCF needs to allocate storage where copies of the ops - * vectors are copied. - */ - desc->pd_ops_vector = kmem_zalloc(sizeof (crypto_ops_t), KM_SLEEP); - - if (info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER) { - allocate_ops_v1(src_ops, desc->pd_ops_vector, &mech_list_count); - if (info->pi_interface_version >= CRYPTO_SPI_VERSION_2) - allocate_ops_v2(src_ops, desc->pd_ops_vector); - if (info->pi_interface_version == CRYPTO_SPI_VERSION_3) - allocate_ops_v3(src_ops, desc->pd_ops_vector); - } - - desc->pd_mech_list_count = mech_list_count; - desc->pd_mechanisms = kmem_zalloc(sizeof (crypto_mech_info_t) * - mech_list_count, KM_SLEEP); - for (i = 0; i < KCF_OPS_CLASSSIZE; i++) - for (j = 0; j < KCF_MAXMECHTAB; j++) + for (int i = 0; i < KCF_OPS_CLASSSIZE; i++) + for (int j = 0; j < KCF_MAXMECHTAB; j++) desc->pd_mech_indx[i][j] = KCF_INVALID_INDX; desc->pd_prov_id = KCF_PROVID_INVALID; desc->pd_state = KCF_PROV_ALLOCATED; mutex_init(&desc->pd_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&desc->pd_resume_cv, NULL, CV_DEFAULT, NULL); cv_init(&desc->pd_remove_cv, NULL, CV_DEFAULT, NULL); return (desc); @@ -360,7 +220,7 @@ kcf_alloc_provider_desc(crypto_provider_info_t *info) /* * Called by KCF_PROV_REFRELE when a provider's reference count drops * to zero. We free the descriptor when the last reference is released. - * However, for software providers, we do not free it when there is an + * However, for providers, we do not free it when there is an * unregister thread waiting. We signal that thread in this case and * that thread is responsible for freeing the descriptor. */ @@ -368,22 +228,16 @@ void kcf_provider_zero_refcnt(kcf_provider_desc_t *desc) { mutex_enter(&desc->pd_lock); - switch (desc->pd_prov_type) { - case CRYPTO_SW_PROVIDER: - if (desc->pd_state == KCF_PROV_REMOVED || - desc->pd_state == KCF_PROV_DISABLED) { - desc->pd_state = KCF_PROV_FREED; - cv_broadcast(&desc->pd_remove_cv); - mutex_exit(&desc->pd_lock); - break; - } - fallthrough; - - case CRYPTO_HW_PROVIDER: - case CRYPTO_LOGICAL_PROVIDER: + if (desc->pd_state == KCF_PROV_REMOVED || + desc->pd_state == KCF_PROV_DISABLED) { + desc->pd_state = KCF_PROV_FREED; + cv_broadcast(&desc->pd_remove_cv); mutex_exit(&desc->pd_lock); - kcf_free_provider_desc(desc); + return; } + + mutex_exit(&desc->pd_lock); + kcf_free_provider_desc(desc); } /* @@ -406,202 +260,15 @@ kcf_free_provider_desc(kcf_provider_desc_t *desc) /* free the kernel memory associated with the provider descriptor */ - if (desc->pd_description != NULL) - kmem_free(desc->pd_description, - CRYPTO_PROVIDER_DESCR_MAX_LEN + 1); - - if (desc->pd_ops_vector != NULL) { - - if (desc->pd_ops_vector->co_control_ops != NULL) - kmem_free(desc->pd_ops_vector->co_control_ops, - sizeof (crypto_control_ops_t)); - - if (desc->pd_ops_vector->co_digest_ops != NULL) - kmem_free(desc->pd_ops_vector->co_digest_ops, - sizeof (crypto_digest_ops_t)); - - if (desc->pd_ops_vector->co_cipher_ops != NULL) - kmem_free(desc->pd_ops_vector->co_cipher_ops, - sizeof (crypto_cipher_ops_t)); - - if (desc->pd_ops_vector->co_mac_ops != NULL) - kmem_free(desc->pd_ops_vector->co_mac_ops, - sizeof (crypto_mac_ops_t)); - - if (desc->pd_ops_vector->co_sign_ops != NULL) - kmem_free(desc->pd_ops_vector->co_sign_ops, - sizeof (crypto_sign_ops_t)); - - if (desc->pd_ops_vector->co_verify_ops != NULL) - kmem_free(desc->pd_ops_vector->co_verify_ops, - sizeof (crypto_verify_ops_t)); - - if (desc->pd_ops_vector->co_dual_ops != NULL) - kmem_free(desc->pd_ops_vector->co_dual_ops, - sizeof (crypto_dual_ops_t)); - - if (desc->pd_ops_vector->co_dual_cipher_mac_ops != NULL) - kmem_free(desc->pd_ops_vector->co_dual_cipher_mac_ops, - sizeof (crypto_dual_cipher_mac_ops_t)); - - if (desc->pd_ops_vector->co_random_ops != NULL) - kmem_free(desc->pd_ops_vector->co_random_ops, - sizeof (crypto_random_number_ops_t)); - - if (desc->pd_ops_vector->co_session_ops != NULL) - kmem_free(desc->pd_ops_vector->co_session_ops, - sizeof (crypto_session_ops_t)); - - if (desc->pd_ops_vector->co_object_ops != NULL) - kmem_free(desc->pd_ops_vector->co_object_ops, - sizeof (crypto_object_ops_t)); - - if (desc->pd_ops_vector->co_key_ops != NULL) - kmem_free(desc->pd_ops_vector->co_key_ops, - sizeof (crypto_key_ops_t)); - - if (desc->pd_ops_vector->co_provider_ops != NULL) - kmem_free(desc->pd_ops_vector->co_provider_ops, - sizeof (crypto_provider_management_ops_t)); - - if (desc->pd_ops_vector->co_ctx_ops != NULL) - kmem_free(desc->pd_ops_vector->co_ctx_ops, - sizeof (crypto_ctx_ops_t)); - - if (desc->pd_ops_vector->co_mech_ops != NULL) - kmem_free(desc->pd_ops_vector->co_mech_ops, - sizeof (crypto_mech_ops_t)); - - if (desc->pd_ops_vector->co_nostore_key_ops != NULL) - kmem_free(desc->pd_ops_vector->co_nostore_key_ops, - sizeof (crypto_nostore_key_ops_t)); - - kmem_free(desc->pd_ops_vector, sizeof (crypto_ops_t)); - } - - if (desc->pd_mechanisms != NULL) - /* free the memory associated with the mechanism info's */ - kmem_free(desc->pd_mechanisms, sizeof (crypto_mech_info_t) * - desc->pd_mech_list_count); - - if (desc->pd_sched_info.ks_taskq != NULL) - taskq_destroy(desc->pd_sched_info.ks_taskq); - mutex_destroy(&desc->pd_lock); - cv_destroy(&desc->pd_resume_cv); cv_destroy(&desc->pd_remove_cv); kmem_free(desc, sizeof (kcf_provider_desc_t)); } /* - * Returns an array of hardware and logical provider descriptors, - * a.k.a the PKCS#11 slot list. A REFHOLD is done on each descriptor - * before the array is returned. The entire table can be freed by - * calling kcf_free_provider_tab(). - */ -int -kcf_get_slot_list(uint_t *count, kcf_provider_desc_t ***array, - boolean_t unverified) -{ - kcf_provider_desc_t *prov_desc; - kcf_provider_desc_t **p = NULL; - char *last; - uint_t cnt = 0; - uint_t i, j; - int rval = CRYPTO_SUCCESS; - size_t n, final_size; - - /* count the providers */ - mutex_enter(&prov_tab_mutex); - for (i = 0; i < KCF_MAX_PROVIDERS; i++) { - if ((prov_desc = prov_tab[i]) != NULL && - ((prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER && - (prov_desc->pd_flags & CRYPTO_HIDE_PROVIDER) == 0) || - prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)) { - if (KCF_IS_PROV_USABLE(prov_desc) || - (unverified && KCF_IS_PROV_UNVERIFIED(prov_desc))) { - cnt++; - } - } - } - mutex_exit(&prov_tab_mutex); - - if (cnt == 0) - goto out; - - n = cnt * sizeof (kcf_provider_desc_t *); -again: - p = kmem_zalloc(n, KM_SLEEP); - - /* pointer to last entry in the array */ - last = (char *)&p[cnt-1]; - - mutex_enter(&prov_tab_mutex); - /* fill the slot list */ - for (i = 0, j = 0; i < KCF_MAX_PROVIDERS; i++) { - if ((prov_desc = prov_tab[i]) != NULL && - ((prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER && - (prov_desc->pd_flags & CRYPTO_HIDE_PROVIDER) == 0) || - prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER)) { - if (KCF_IS_PROV_USABLE(prov_desc) || - (unverified && KCF_IS_PROV_UNVERIFIED(prov_desc))) { - if ((char *)&p[j] > last) { - mutex_exit(&prov_tab_mutex); - kcf_free_provider_tab(cnt, p); - n = n << 1; - cnt = cnt << 1; - goto again; - } - p[j++] = prov_desc; - KCF_PROV_REFHOLD(prov_desc); - } - } - } - mutex_exit(&prov_tab_mutex); - - final_size = j * sizeof (kcf_provider_desc_t *); - cnt = j; - ASSERT(final_size <= n); - - /* check if buffer we allocated is too large */ - if (final_size < n) { - char *final_buffer = NULL; - - if (final_size > 0) { - final_buffer = kmem_alloc(final_size, KM_SLEEP); - bcopy(p, final_buffer, final_size); - } - kmem_free(p, n); - p = (kcf_provider_desc_t **)final_buffer; - } -out: - *count = cnt; - *array = p; - return (rval); -} - -/* - * Free an array of hardware provider descriptors. A REFRELE - * is done on each descriptor before the table is freed. - */ -void -kcf_free_provider_tab(uint_t count, kcf_provider_desc_t **array) -{ - kcf_provider_desc_t *prov_desc; - int i; - - for (i = 0; i < count; i++) { - if ((prov_desc = array[i]) != NULL) { - KCF_PROV_REFRELE(prov_desc); - } - } - kmem_free(array, count * sizeof (kcf_provider_desc_t *)); -} - -/* * Returns in the location pointed to by pd a pointer to the descriptor - * for the software provider for the specified mechanism. + * for the provider for the specified mechanism. * The provider descriptor is returned held and it is the caller's * responsibility to release it when done. The mechanism entry * is returned if the optional argument mep is non NULL. @@ -619,24 +286,17 @@ kcf_get_sw_prov(crypto_mech_type_t mech_type, kcf_provider_desc_t **pd, if (kcf_get_mech_entry(mech_type, &me) != KCF_SUCCESS) return (CRYPTO_MECHANISM_INVALID); - /* - * Get the software provider for this mechanism. - * Lock the mech_entry until we grab the 'pd'. - */ - mutex_enter(&me->me_mutex); - + /* Get the provider for this mechanism. */ if (me->me_sw_prov == NULL || (*pd = me->me_sw_prov->pm_prov_desc) == NULL) { - /* no SW provider for this mechanism */ + /* no provider for this mechanism */ if (log_warn) - cmn_err(CE_WARN, "no SW provider for \"%s\"\n", + cmn_err(CE_WARN, "no provider for \"%s\"\n", me->me_name); - mutex_exit(&me->me_mutex); return (CRYPTO_MECH_NOT_SUPPORTED); } KCF_PROV_REFHOLD(*pd); - mutex_exit(&me->me_mutex); if (mep != NULL) *mep = me; diff --git a/sys/contrib/openzfs/module/icp/core/kcf_sched.c b/sys/contrib/openzfs/module/icp/core/kcf_sched.c index 81fd15f8ea26..360ecfb2be19 100644 --- a/sys/contrib/openzfs/module/icp/core/kcf_sched.c +++ b/sys/contrib/openzfs/module/icp/core/kcf_sched.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -35,774 +35,36 @@ #include <sys/crypto/sched_impl.h> #include <sys/crypto/api.h> -kcf_global_swq_t *gswq; /* Global software queue */ - -/* Thread pool related variables */ -static kcf_pool_t *kcfpool; /* Thread pool of kcfd LWPs */ -int kcf_maxthreads = 2; -int kcf_minthreads = 1; -int kcf_thr_multiple = 2; /* Boot-time tunable for experimentation */ -static ulong_t kcf_idlethr_timeout; -#define KCF_DEFAULT_THRTIMEOUT 60000000 /* 60 seconds */ - /* kmem caches used by the scheduler */ -static kmem_cache_t *kcf_sreq_cache; -static kmem_cache_t *kcf_areq_cache; static kmem_cache_t *kcf_context_cache; -/* Global request ID table */ -static kcf_reqid_table_t *kcf_reqid_table[REQID_TABLES]; - -/* KCF stats. Not protected. */ -static kcf_stats_t kcf_ksdata = { - { "total threads in pool", KSTAT_DATA_UINT32}, - { "idle threads in pool", KSTAT_DATA_UINT32}, - { "min threads in pool", KSTAT_DATA_UINT32}, - { "max threads in pool", KSTAT_DATA_UINT32}, - { "requests in gswq", KSTAT_DATA_UINT32}, - { "max requests in gswq", KSTAT_DATA_UINT32}, - { "threads for HW taskq", KSTAT_DATA_UINT32}, - { "minalloc for HW taskq", KSTAT_DATA_UINT32}, - { "maxalloc for HW taskq", KSTAT_DATA_UINT32} -}; - -static kstat_t *kcf_misc_kstat = NULL; -ulong_t kcf_swprov_hndl = 0; - -static kcf_areq_node_t *kcf_areqnode_alloc(kcf_provider_desc_t *, - kcf_context_t *, crypto_call_req_t *, kcf_req_params_t *, boolean_t); -static int kcf_disp_sw_request(kcf_areq_node_t *); -static void process_req_hwp(void *); -static int kcf_enqueue(kcf_areq_node_t *); -static void kcfpool_alloc(void); -static void kcf_reqid_delete(kcf_areq_node_t *areq); -static crypto_req_id_t kcf_reqid_insert(kcf_areq_node_t *areq); -static int kcf_misc_kstat_update(kstat_t *ksp, int rw); - /* * Create a new context. */ crypto_ctx_t * -kcf_new_ctx(crypto_call_req_t *crq, kcf_provider_desc_t *pd, - crypto_session_id_t sid) +kcf_new_ctx(kcf_provider_desc_t *pd) { crypto_ctx_t *ctx; kcf_context_t *kcf_ctx; - kcf_ctx = kmem_cache_alloc(kcf_context_cache, - (crq == NULL) ? KM_SLEEP : KM_NOSLEEP); + kcf_ctx = kmem_cache_alloc(kcf_context_cache, KM_SLEEP); if (kcf_ctx == NULL) return (NULL); /* initialize the context for the consumer */ kcf_ctx->kc_refcnt = 1; - kcf_ctx->kc_req_chain_first = NULL; - kcf_ctx->kc_req_chain_last = NULL; - kcf_ctx->kc_secondctx = NULL; KCF_PROV_REFHOLD(pd); kcf_ctx->kc_prov_desc = pd; kcf_ctx->kc_sw_prov_desc = NULL; - kcf_ctx->kc_mech = NULL; ctx = &kcf_ctx->kc_glbl_ctx; - ctx->cc_provider = pd->pd_prov_handle; - ctx->cc_session = sid; ctx->cc_provider_private = NULL; ctx->cc_framework_private = (void *)kcf_ctx; - ctx->cc_flags = 0; - ctx->cc_opstate = NULL; return (ctx); } /* - * Allocate a new async request node. - * - * ictx - Framework private context pointer - * crq - Has callback function and argument. Should be non NULL. - * req - The parameters to pass to the SPI - */ -static kcf_areq_node_t * -kcf_areqnode_alloc(kcf_provider_desc_t *pd, kcf_context_t *ictx, - crypto_call_req_t *crq, kcf_req_params_t *req, boolean_t isdual) -{ - kcf_areq_node_t *arptr, *areq; - - ASSERT(crq != NULL); - arptr = kmem_cache_alloc(kcf_areq_cache, KM_NOSLEEP); - if (arptr == NULL) - return (NULL); - - arptr->an_state = REQ_ALLOCATED; - arptr->an_reqarg = *crq; - arptr->an_params = *req; - arptr->an_context = ictx; - arptr->an_isdual = isdual; - - arptr->an_next = arptr->an_prev = NULL; - KCF_PROV_REFHOLD(pd); - arptr->an_provider = pd; - arptr->an_tried_plist = NULL; - arptr->an_refcnt = 1; - arptr->an_idnext = arptr->an_idprev = NULL; - - /* - * Requests for context-less operations do not use the - * fields - an_is_my_turn, and an_ctxchain_next. - */ - if (ictx == NULL) - return (arptr); - - KCF_CONTEXT_REFHOLD(ictx); - /* - * Chain this request to the context. - */ - mutex_enter(&ictx->kc_in_use_lock); - arptr->an_ctxchain_next = NULL; - if ((areq = ictx->kc_req_chain_last) == NULL) { - arptr->an_is_my_turn = B_TRUE; - ictx->kc_req_chain_last = - ictx->kc_req_chain_first = arptr; - } else { - ASSERT(ictx->kc_req_chain_first != NULL); - arptr->an_is_my_turn = B_FALSE; - /* Insert the new request to the end of the chain. */ - areq->an_ctxchain_next = arptr; - ictx->kc_req_chain_last = arptr; - } - mutex_exit(&ictx->kc_in_use_lock); - - return (arptr); -} - -/* - * Queue the request node and do one of the following: - * - If there is an idle thread signal it to run. - * - If there is no idle thread and max running threads is not - * reached, signal the creator thread for more threads. - * - * If the two conditions above are not met, we don't need to do - * anything. The request will be picked up by one of the - * worker threads when it becomes available. - */ -static int -kcf_disp_sw_request(kcf_areq_node_t *areq) -{ - int err; - int cnt = 0; - - if ((err = kcf_enqueue(areq)) != 0) - return (err); - - if (kcfpool->kp_idlethreads > 0) { - /* Signal an idle thread to run */ - mutex_enter(&gswq->gs_lock); - cv_signal(&gswq->gs_cv); - mutex_exit(&gswq->gs_lock); - - return (CRYPTO_QUEUED); - } - - /* - * We keep the number of running threads to be at - * kcf_minthreads to reduce gs_lock contention. - */ - cnt = kcf_minthreads - - (kcfpool->kp_threads - kcfpool->kp_blockedthreads); - if (cnt > 0) { - /* - * The following ensures the number of threads in pool - * does not exceed kcf_maxthreads. - */ - cnt = MIN(cnt, kcf_maxthreads - (int)kcfpool->kp_threads); - if (cnt > 0) { - /* Signal the creator thread for more threads */ - mutex_enter(&kcfpool->kp_user_lock); - if (!kcfpool->kp_signal_create_thread) { - kcfpool->kp_signal_create_thread = B_TRUE; - kcfpool->kp_nthrs = cnt; - cv_signal(&kcfpool->kp_user_cv); - } - mutex_exit(&kcfpool->kp_user_lock); - } - } - - return (CRYPTO_QUEUED); -} - -/* - * This routine is called by the taskq associated with - * each hardware provider. We notify the kernel consumer - * via the callback routine in case of CRYPTO_SUCCESS or - * a failure. - * - * A request can be of type kcf_areq_node_t or of type - * kcf_sreq_node_t. - */ -static void -process_req_hwp(void *ireq) -{ - int error = 0; - crypto_ctx_t *ctx; - kcf_call_type_t ctype; - kcf_provider_desc_t *pd; - kcf_areq_node_t *areq = (kcf_areq_node_t *)ireq; - kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)ireq; - - pd = ((ctype = GET_REQ_TYPE(ireq)) == CRYPTO_SYNCH) ? - sreq->sn_provider : areq->an_provider; - - /* - * Wait if flow control is in effect for the provider. A - * CRYPTO_PROVIDER_READY or CRYPTO_PROVIDER_FAILED - * notification will signal us. We also get signaled if - * the provider is unregistering. - */ - if (pd->pd_state == KCF_PROV_BUSY) { - mutex_enter(&pd->pd_lock); - while (pd->pd_state == KCF_PROV_BUSY) - cv_wait(&pd->pd_resume_cv, &pd->pd_lock); - mutex_exit(&pd->pd_lock); - } - - /* - * Bump the internal reference count while the request is being - * processed. This is how we know when it's safe to unregister - * a provider. This step must precede the pd_state check below. - */ - KCF_PROV_IREFHOLD(pd); - - /* - * Fail the request if the provider has failed. We return a - * recoverable error and the notified clients attempt any - * recovery. For async clients this is done in kcf_aop_done() - * and for sync clients it is done in the k-api routines. - */ - if (pd->pd_state >= KCF_PROV_FAILED) { - error = CRYPTO_DEVICE_ERROR; - goto bail; - } - - if (ctype == CRYPTO_SYNCH) { - mutex_enter(&sreq->sn_lock); - sreq->sn_state = REQ_INPROGRESS; - mutex_exit(&sreq->sn_lock); - - ctx = sreq->sn_context ? &sreq->sn_context->kc_glbl_ctx : NULL; - error = common_submit_request(sreq->sn_provider, ctx, - sreq->sn_params, sreq); - } else { - kcf_context_t *ictx; - ASSERT(ctype == CRYPTO_ASYNCH); - - /* - * We are in the per-hardware provider thread context and - * hence can sleep. Note that the caller would have done - * a taskq_dispatch(..., TQ_NOSLEEP) and would have returned. - */ - ctx = (ictx = areq->an_context) ? &ictx->kc_glbl_ctx : NULL; - - mutex_enter(&areq->an_lock); - /* - * We need to maintain ordering for multi-part requests. - * an_is_my_turn is set to B_TRUE initially for a request - * when it is enqueued and there are no other requests - * for that context. It is set later from kcf_aop_done() when - * the request before us in the chain of requests for the - * context completes. We get signaled at that point. - */ - if (ictx != NULL) { - ASSERT(ictx->kc_prov_desc == areq->an_provider); - - while (areq->an_is_my_turn == B_FALSE) { - cv_wait(&areq->an_turn_cv, &areq->an_lock); - } - } - areq->an_state = REQ_INPROGRESS; - mutex_exit(&areq->an_lock); - - error = common_submit_request(areq->an_provider, ctx, - &areq->an_params, areq); - } - -bail: - if (error == CRYPTO_QUEUED) { - /* - * The request is queued by the provider and we should - * get a crypto_op_notification() from the provider later. - * We notify the consumer at that time. - */ - return; - } else { /* CRYPTO_SUCCESS or other failure */ - KCF_PROV_IREFRELE(pd); - if (ctype == CRYPTO_SYNCH) - kcf_sop_done(sreq, error); - else - kcf_aop_done(areq, error); - } -} - -/* - * This routine checks if a request can be retried on another - * provider. If true, mech1 is initialized to point to the mechanism - * structure. mech2 is also initialized in case of a dual operation. fg - * is initialized to the correct crypto_func_group_t bit flag. They are - * initialized by this routine, so that the caller can pass them to a - * kcf_get_mech_provider() or kcf_get_dual_provider() with no further change. - * - * We check that the request is for a init or atomic routine and that - * it is for one of the operation groups used from k-api . - */ -static boolean_t -can_resubmit(kcf_areq_node_t *areq, crypto_mechanism_t **mech1, - crypto_mechanism_t **mech2, crypto_func_group_t *fg) -{ - kcf_req_params_t *params; - kcf_op_type_t optype; - - params = &areq->an_params; - optype = params->rp_optype; - - if (!(IS_INIT_OP(optype) || IS_ATOMIC_OP(optype))) - return (B_FALSE); - - switch (params->rp_opgrp) { - case KCF_OG_DIGEST: { - kcf_digest_ops_params_t *dops = ¶ms->rp_u.digest_params; - - dops->do_mech.cm_type = dops->do_framework_mechtype; - *mech1 = &dops->do_mech; - *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DIGEST : - CRYPTO_FG_DIGEST_ATOMIC; - break; - } - - case KCF_OG_MAC: { - kcf_mac_ops_params_t *mops = ¶ms->rp_u.mac_params; - - mops->mo_mech.cm_type = mops->mo_framework_mechtype; - *mech1 = &mops->mo_mech; - *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC : - CRYPTO_FG_MAC_ATOMIC; - break; - } - - case KCF_OG_SIGN: { - kcf_sign_ops_params_t *sops = ¶ms->rp_u.sign_params; - - sops->so_mech.cm_type = sops->so_framework_mechtype; - *mech1 = &sops->so_mech; - switch (optype) { - case KCF_OP_INIT: - *fg = CRYPTO_FG_SIGN; - break; - case KCF_OP_ATOMIC: - *fg = CRYPTO_FG_SIGN_ATOMIC; - break; - default: - ASSERT(optype == KCF_OP_SIGN_RECOVER_ATOMIC); - *fg = CRYPTO_FG_SIGN_RECOVER_ATOMIC; - } - break; - } - - case KCF_OG_VERIFY: { - kcf_verify_ops_params_t *vops = ¶ms->rp_u.verify_params; - - vops->vo_mech.cm_type = vops->vo_framework_mechtype; - *mech1 = &vops->vo_mech; - switch (optype) { - case KCF_OP_INIT: - *fg = CRYPTO_FG_VERIFY; - break; - case KCF_OP_ATOMIC: - *fg = CRYPTO_FG_VERIFY_ATOMIC; - break; - default: - ASSERT(optype == KCF_OP_VERIFY_RECOVER_ATOMIC); - *fg = CRYPTO_FG_VERIFY_RECOVER_ATOMIC; - } - break; - } - - case KCF_OG_ENCRYPT: { - kcf_encrypt_ops_params_t *eops = ¶ms->rp_u.encrypt_params; - - eops->eo_mech.cm_type = eops->eo_framework_mechtype; - *mech1 = &eops->eo_mech; - *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT : - CRYPTO_FG_ENCRYPT_ATOMIC; - break; - } - - case KCF_OG_DECRYPT: { - kcf_decrypt_ops_params_t *dcrops = ¶ms->rp_u.decrypt_params; - - dcrops->dop_mech.cm_type = dcrops->dop_framework_mechtype; - *mech1 = &dcrops->dop_mech; - *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_DECRYPT : - CRYPTO_FG_DECRYPT_ATOMIC; - break; - } - - case KCF_OG_ENCRYPT_MAC: { - kcf_encrypt_mac_ops_params_t *eops = - ¶ms->rp_u.encrypt_mac_params; - - eops->em_encr_mech.cm_type = eops->em_framework_encr_mechtype; - *mech1 = &eops->em_encr_mech; - eops->em_mac_mech.cm_type = eops->em_framework_mac_mechtype; - *mech2 = &eops->em_mac_mech; - *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_ENCRYPT_MAC : - CRYPTO_FG_ENCRYPT_MAC_ATOMIC; - break; - } - - case KCF_OG_MAC_DECRYPT: { - kcf_mac_decrypt_ops_params_t *dops = - ¶ms->rp_u.mac_decrypt_params; - - dops->md_mac_mech.cm_type = dops->md_framework_mac_mechtype; - *mech1 = &dops->md_mac_mech; - dops->md_decr_mech.cm_type = dops->md_framework_decr_mechtype; - *mech2 = &dops->md_decr_mech; - *fg = (optype == KCF_OP_INIT) ? CRYPTO_FG_MAC_DECRYPT : - CRYPTO_FG_MAC_DECRYPT_ATOMIC; - break; - } - - default: - return (B_FALSE); - } - - return (B_TRUE); -} - -/* - * This routine is called when a request to a provider has failed - * with a recoverable error. This routine tries to find another provider - * and dispatches the request to the new provider, if one is available. - * We reuse the request structure. - * - * A return value of NULL from kcf_get_mech_provider() indicates - * we have tried the last provider. - */ -static int -kcf_resubmit_request(kcf_areq_node_t *areq) -{ - int error = CRYPTO_FAILED; - kcf_context_t *ictx; - kcf_provider_desc_t *old_pd; - kcf_provider_desc_t *new_pd; - crypto_mechanism_t *mech1 = NULL, *mech2 = NULL; - crypto_mech_type_t prov_mt1, prov_mt2; - crypto_func_group_t fg = 0; - - if (!can_resubmit(areq, &mech1, &mech2, &fg)) - return (error); - - old_pd = areq->an_provider; - /* - * Add old_pd to the list of providers already tried. We release - * the hold on old_pd (from the earlier kcf_get_mech_provider()) in - * kcf_free_triedlist(). - */ - if (kcf_insert_triedlist(&areq->an_tried_plist, old_pd, - KM_NOSLEEP) == NULL) - return (error); - - if (mech1 && !mech2) { - new_pd = kcf_get_mech_provider(mech1->cm_type, NULL, &error, - areq->an_tried_plist, fg, - (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0); - } else { - ASSERT(mech1 != NULL && mech2 != NULL); - - new_pd = kcf_get_dual_provider(mech1, mech2, NULL, &prov_mt1, - &prov_mt2, &error, areq->an_tried_plist, fg, fg, - (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), 0); - } - - if (new_pd == NULL) - return (error); - - /* - * We reuse the old context by resetting provider specific - * fields in it. - */ - if ((ictx = areq->an_context) != NULL) { - crypto_ctx_t *ctx; - - ASSERT(old_pd == ictx->kc_prov_desc); - KCF_PROV_REFRELE(ictx->kc_prov_desc); - KCF_PROV_REFHOLD(new_pd); - ictx->kc_prov_desc = new_pd; - - ctx = &ictx->kc_glbl_ctx; - ctx->cc_provider = new_pd->pd_prov_handle; - ctx->cc_session = new_pd->pd_sid; - ctx->cc_provider_private = NULL; - } - - /* We reuse areq. by resetting the provider and context fields. */ - KCF_PROV_REFRELE(old_pd); - KCF_PROV_REFHOLD(new_pd); - areq->an_provider = new_pd; - mutex_enter(&areq->an_lock); - areq->an_state = REQ_WAITING; - mutex_exit(&areq->an_lock); - - switch (new_pd->pd_prov_type) { - case CRYPTO_SW_PROVIDER: - error = kcf_disp_sw_request(areq); - break; - - case CRYPTO_HW_PROVIDER: { - taskq_t *taskq = new_pd->pd_sched_info.ks_taskq; - - if (taskq_dispatch(taskq, process_req_hwp, areq, TQ_NOSLEEP) == - TASKQID_INVALID) { - error = CRYPTO_HOST_MEMORY; - } else { - error = CRYPTO_QUEUED; - } - - break; - default: - break; - } - } - - return (error); -} - -static inline int EMPTY_TASKQ(taskq_t *tq) -{ -#ifdef _KERNEL - return (tq->tq_lowest_id == tq->tq_next_id); -#else - return (tq->tq_task.tqent_next == &tq->tq_task || tq->tq_active == 0); -#endif -} - -/* - * Routine called by both ioctl and k-api. The consumer should - * bundle the parameters into a kcf_req_params_t structure. A bunch - * of macros are available in ops_impl.h for this bundling. They are: - * - * KCF_WRAP_DIGEST_OPS_PARAMS() - * KCF_WRAP_MAC_OPS_PARAMS() - * KCF_WRAP_ENCRYPT_OPS_PARAMS() - * KCF_WRAP_DECRYPT_OPS_PARAMS() ... etc. - * - * It is the caller's responsibility to free the ctx argument when - * appropriate. See the KCF_CONTEXT_COND_RELEASE macro for details. - */ -int -kcf_submit_request(kcf_provider_desc_t *pd, crypto_ctx_t *ctx, - crypto_call_req_t *crq, kcf_req_params_t *params, boolean_t cont) -{ - int error = CRYPTO_SUCCESS; - kcf_areq_node_t *areq; - kcf_sreq_node_t *sreq; - kcf_context_t *kcf_ctx; - taskq_t *taskq = pd->pd_sched_info.ks_taskq; - - kcf_ctx = ctx ? (kcf_context_t *)ctx->cc_framework_private : NULL; - - /* Synchronous cases */ - if (crq == NULL) { - switch (pd->pd_prov_type) { - case CRYPTO_SW_PROVIDER: - error = common_submit_request(pd, ctx, params, - KCF_RHNDL(KM_SLEEP)); - break; - - case CRYPTO_HW_PROVIDER: - /* - * Special case for CRYPTO_SYNCHRONOUS providers that - * never return a CRYPTO_QUEUED error. We skip any - * request allocation and call the SPI directly. - */ - if ((pd->pd_flags & CRYPTO_SYNCHRONOUS) && - EMPTY_TASKQ(taskq)) { - KCF_PROV_IREFHOLD(pd); - if (pd->pd_state == KCF_PROV_READY) { - error = common_submit_request(pd, ctx, - params, KCF_RHNDL(KM_SLEEP)); - KCF_PROV_IREFRELE(pd); - ASSERT(error != CRYPTO_QUEUED); - break; - } - KCF_PROV_IREFRELE(pd); - } - - sreq = kmem_cache_alloc(kcf_sreq_cache, KM_SLEEP); - sreq->sn_state = REQ_ALLOCATED; - sreq->sn_rv = CRYPTO_FAILED; - sreq->sn_params = params; - - /* - * Note that we do not need to hold the context - * for synchronous case as the context will never - * become invalid underneath us. We do not need to hold - * the provider here either as the caller has a hold. - */ - sreq->sn_context = kcf_ctx; - ASSERT(KCF_PROV_REFHELD(pd)); - sreq->sn_provider = pd; - - ASSERT(taskq != NULL); - /* - * Call the SPI directly if the taskq is empty and the - * provider is not busy, else dispatch to the taskq. - * Calling directly is fine as this is the synchronous - * case. This is unlike the asynchronous case where we - * must always dispatch to the taskq. - */ - if (EMPTY_TASKQ(taskq) && - pd->pd_state == KCF_PROV_READY) { - process_req_hwp(sreq); - } else { - /* - * We can not tell from taskq_dispatch() return - * value if we exceeded maxalloc. Hence the - * check here. Since we are allowed to wait in - * the synchronous case, we wait for the taskq - * to become empty. - */ - if (taskq->tq_nalloc >= crypto_taskq_maxalloc) { - taskq_wait(taskq); - } - - (void) taskq_dispatch(taskq, process_req_hwp, - sreq, TQ_SLEEP); - } - - /* - * Wait for the notification to arrive, - * if the operation is not done yet. - * Bug# 4722589 will make the wait a cv_wait_sig(). - */ - mutex_enter(&sreq->sn_lock); - while (sreq->sn_state < REQ_DONE) - cv_wait(&sreq->sn_cv, &sreq->sn_lock); - mutex_exit(&sreq->sn_lock); - - error = sreq->sn_rv; - kmem_cache_free(kcf_sreq_cache, sreq); - - break; - - default: - error = CRYPTO_FAILED; - break; - } - - } else { /* Asynchronous cases */ - switch (pd->pd_prov_type) { - case CRYPTO_SW_PROVIDER: - if (!(crq->cr_flag & CRYPTO_ALWAYS_QUEUE)) { - /* - * This case has less overhead since there is - * no switching of context. - */ - error = common_submit_request(pd, ctx, params, - KCF_RHNDL(KM_NOSLEEP)); - } else { - /* - * CRYPTO_ALWAYS_QUEUE is set. We need to - * queue the request and return. - */ - areq = kcf_areqnode_alloc(pd, kcf_ctx, crq, - params, cont); - if (areq == NULL) - error = CRYPTO_HOST_MEMORY; - else { - if (!(crq->cr_flag - & CRYPTO_SKIP_REQID)) { - /* - * Set the request handle. This handle - * is used for any crypto_cancel_req(9f) - * calls from the consumer. We have to - * do this before dispatching the - * request. - */ - crq->cr_reqid = kcf_reqid_insert(areq); - } - - error = kcf_disp_sw_request(areq); - /* - * There is an error processing this - * request. Remove the handle and - * release the request structure. - */ - if (error != CRYPTO_QUEUED) { - if (!(crq->cr_flag - & CRYPTO_SKIP_REQID)) - kcf_reqid_delete(areq); - KCF_AREQ_REFRELE(areq); - } - } - } - break; - - case CRYPTO_HW_PROVIDER: - /* - * We need to queue the request and return. - */ - areq = kcf_areqnode_alloc(pd, kcf_ctx, crq, params, - cont); - if (areq == NULL) { - error = CRYPTO_HOST_MEMORY; - goto done; - } - - ASSERT(taskq != NULL); - /* - * We can not tell from taskq_dispatch() return - * value if we exceeded maxalloc. Hence the check - * here. - */ - if (taskq->tq_nalloc >= crypto_taskq_maxalloc) { - error = CRYPTO_BUSY; - KCF_AREQ_REFRELE(areq); - goto done; - } - - if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) { - /* - * Set the request handle. This handle is used - * for any crypto_cancel_req(9f) calls from the - * consumer. We have to do this before dispatching - * the request. - */ - crq->cr_reqid = kcf_reqid_insert(areq); - } - - if (taskq_dispatch(taskq, - process_req_hwp, areq, TQ_NOSLEEP) == - TASKQID_INVALID) { - error = CRYPTO_HOST_MEMORY; - if (!(crq->cr_flag & CRYPTO_SKIP_REQID)) - kcf_reqid_delete(areq); - KCF_AREQ_REFRELE(areq); - } else { - error = CRYPTO_QUEUED; - } - break; - - default: - error = CRYPTO_FAILED; - break; - } - } - -done: - return (error); -} - -/* * We're done with this framework context, so free it. Note that freeing * framework context (kcf_context) frees the global context (crypto_ctx). * @@ -817,12 +79,6 @@ kcf_free_context(kcf_context_t *kcf_ctx) { kcf_provider_desc_t *pd = kcf_ctx->kc_prov_desc; crypto_ctx_t *gctx = &kcf_ctx->kc_glbl_ctx; - kcf_context_t *kcf_secondctx = kcf_ctx->kc_secondctx; - - /* Release the second context, if any */ - - if (kcf_secondctx != NULL) - KCF_CONTEXT_REFRELE(kcf_secondctx); if (gctx->cc_provider_private != NULL) { mutex_enter(&pd->pd_lock); @@ -844,250 +100,37 @@ kcf_free_context(kcf_context_t *kcf_ctx) /* kcf_ctx->kc_prov_desc has a hold on pd */ KCF_PROV_REFRELE(kcf_ctx->kc_prov_desc); - /* check if this context is shared with a software provider */ - if ((gctx->cc_flags & CRYPTO_INIT_OPSTATE) && - kcf_ctx->kc_sw_prov_desc != NULL) { - KCF_PROV_REFRELE(kcf_ctx->kc_sw_prov_desc); - } - kmem_cache_free(kcf_context_cache, kcf_ctx); } /* - * Free the request after releasing all the holds. - */ -void -kcf_free_req(kcf_areq_node_t *areq) -{ - KCF_PROV_REFRELE(areq->an_provider); - if (areq->an_context != NULL) - KCF_CONTEXT_REFRELE(areq->an_context); - - if (areq->an_tried_plist != NULL) - kcf_free_triedlist(areq->an_tried_plist); - kmem_cache_free(kcf_areq_cache, areq); -} - -/* - * Utility routine to remove a request from the chain of requests - * hanging off a context. - */ -static void -kcf_removereq_in_ctxchain(kcf_context_t *ictx, kcf_areq_node_t *areq) -{ - kcf_areq_node_t *cur, *prev; - - /* - * Get context lock, search for areq in the chain and remove it. - */ - ASSERT(ictx != NULL); - mutex_enter(&ictx->kc_in_use_lock); - prev = cur = ictx->kc_req_chain_first; - - while (cur != NULL) { - if (cur == areq) { - if (prev == cur) { - if ((ictx->kc_req_chain_first = - cur->an_ctxchain_next) == NULL) - ictx->kc_req_chain_last = NULL; - } else { - if (cur == ictx->kc_req_chain_last) - ictx->kc_req_chain_last = prev; - prev->an_ctxchain_next = cur->an_ctxchain_next; - } - - break; - } - prev = cur; - cur = cur->an_ctxchain_next; - } - mutex_exit(&ictx->kc_in_use_lock); -} - -/* - * Remove the specified node from the global software queue. - * - * The caller must hold the queue lock and request lock (an_lock). - */ -static void -kcf_remove_node(kcf_areq_node_t *node) -{ - kcf_areq_node_t *nextp = node->an_next; - kcf_areq_node_t *prevp = node->an_prev; - - if (nextp != NULL) - nextp->an_prev = prevp; - else - gswq->gs_last = prevp; - - if (prevp != NULL) - prevp->an_next = nextp; - else - gswq->gs_first = nextp; - - node->an_state = REQ_CANCELED; -} - -/* - * Add the request node to the end of the global software queue. - * - * The caller should not hold the queue lock. Returns 0 if the - * request is successfully queued. Returns CRYPTO_BUSY if the limit - * on the number of jobs is exceeded. - */ -static int -kcf_enqueue(kcf_areq_node_t *node) -{ - kcf_areq_node_t *tnode; - - mutex_enter(&gswq->gs_lock); - - if (gswq->gs_njobs >= gswq->gs_maxjobs) { - mutex_exit(&gswq->gs_lock); - return (CRYPTO_BUSY); - } - - if (gswq->gs_last == NULL) { - gswq->gs_first = gswq->gs_last = node; - } else { - ASSERT(gswq->gs_last->an_next == NULL); - tnode = gswq->gs_last; - tnode->an_next = node; - gswq->gs_last = node; - node->an_prev = tnode; - } - - gswq->gs_njobs++; - - /* an_lock not needed here as we hold gs_lock */ - node->an_state = REQ_WAITING; - - mutex_exit(&gswq->gs_lock); - - return (0); -} - -/* - * kmem_cache_alloc constructor for sync request structure. - */ -/* ARGSUSED */ -static int -kcf_sreq_cache_constructor(void *buf, void *cdrarg, int kmflags) -{ - kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf; - - sreq->sn_type = CRYPTO_SYNCH; - cv_init(&sreq->sn_cv, NULL, CV_DEFAULT, NULL); - mutex_init(&sreq->sn_lock, NULL, MUTEX_DEFAULT, NULL); - - return (0); -} - -/* ARGSUSED */ -static void -kcf_sreq_cache_destructor(void *buf, void *cdrarg) -{ - kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)buf; - - mutex_destroy(&sreq->sn_lock); - cv_destroy(&sreq->sn_cv); -} - -/* - * kmem_cache_alloc constructor for async request structure. - */ -/* ARGSUSED */ -static int -kcf_areq_cache_constructor(void *buf, void *cdrarg, int kmflags) -{ - kcf_areq_node_t *areq = (kcf_areq_node_t *)buf; - - areq->an_type = CRYPTO_ASYNCH; - areq->an_refcnt = 0; - mutex_init(&areq->an_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&areq->an_done, NULL, CV_DEFAULT, NULL); - cv_init(&areq->an_turn_cv, NULL, CV_DEFAULT, NULL); - - return (0); -} - -/* ARGSUSED */ -static void -kcf_areq_cache_destructor(void *buf, void *cdrarg) -{ - kcf_areq_node_t *areq = (kcf_areq_node_t *)buf; - - ASSERT(areq->an_refcnt == 0); - mutex_destroy(&areq->an_lock); - cv_destroy(&areq->an_done); - cv_destroy(&areq->an_turn_cv); -} - -/* * kmem_cache_alloc constructor for kcf_context structure. */ -/* ARGSUSED */ static int kcf_context_cache_constructor(void *buf, void *cdrarg, int kmflags) { + (void) cdrarg, (void) kmflags; kcf_context_t *kctx = (kcf_context_t *)buf; kctx->kc_refcnt = 0; - mutex_init(&kctx->kc_in_use_lock, NULL, MUTEX_DEFAULT, NULL); return (0); } -/* ARGSUSED */ static void kcf_context_cache_destructor(void *buf, void *cdrarg) { + (void) cdrarg; kcf_context_t *kctx = (kcf_context_t *)buf; ASSERT(kctx->kc_refcnt == 0); - mutex_destroy(&kctx->kc_in_use_lock); } void kcf_sched_destroy(void) { - int i; - - if (kcf_misc_kstat) - kstat_delete(kcf_misc_kstat); - - if (kcfpool) { - mutex_destroy(&kcfpool->kp_thread_lock); - cv_destroy(&kcfpool->kp_nothr_cv); - mutex_destroy(&kcfpool->kp_user_lock); - cv_destroy(&kcfpool->kp_user_cv); - - kmem_free(kcfpool, sizeof (kcf_pool_t)); - } - - for (i = 0; i < REQID_TABLES; i++) { - if (kcf_reqid_table[i]) { - mutex_destroy(&(kcf_reqid_table[i]->rt_lock)); - kmem_free(kcf_reqid_table[i], - sizeof (kcf_reqid_table_t)); - } - } - - if (gswq) { - mutex_destroy(&gswq->gs_lock); - cv_destroy(&gswq->gs_cv); - kmem_free(gswq, sizeof (kcf_global_swq_t)); - } - if (kcf_context_cache) kmem_cache_destroy(kcf_context_cache); - if (kcf_areq_cache) - kmem_cache_destroy(kcf_areq_cache); - if (kcf_sreq_cache) - kmem_cache_destroy(kcf_sreq_cache); - - mutex_destroy(&ntfy_list_lock); - cv_destroy(&ntfy_list_cv); } /* @@ -1096,9 +139,6 @@ kcf_sched_destroy(void) void kcf_sched_init(void) { - int i; - kcf_reqid_table_t *rt; - /* * Create all the kmem caches needed by the framework. We set the * align argument to 64, to get a slab aligned to 64-byte as well as @@ -1106,675 +146,7 @@ kcf_sched_init(void) * This helps to avoid false sharing as this is the size of the * CPU cache line. */ - kcf_sreq_cache = kmem_cache_create("kcf_sreq_cache", - sizeof (struct kcf_sreq_node), 64, kcf_sreq_cache_constructor, - kcf_sreq_cache_destructor, NULL, NULL, NULL, 0); - - kcf_areq_cache = kmem_cache_create("kcf_areq_cache", - sizeof (struct kcf_areq_node), 64, kcf_areq_cache_constructor, - kcf_areq_cache_destructor, NULL, NULL, NULL, 0); - kcf_context_cache = kmem_cache_create("kcf_context_cache", sizeof (struct kcf_context), 64, kcf_context_cache_constructor, kcf_context_cache_destructor, NULL, NULL, NULL, 0); - - gswq = kmem_alloc(sizeof (kcf_global_swq_t), KM_SLEEP); - - mutex_init(&gswq->gs_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&gswq->gs_cv, NULL, CV_DEFAULT, NULL); - gswq->gs_njobs = 0; - gswq->gs_maxjobs = kcf_maxthreads * crypto_taskq_maxalloc; - gswq->gs_first = gswq->gs_last = NULL; - - /* Initialize the global reqid table */ - for (i = 0; i < REQID_TABLES; i++) { - rt = kmem_zalloc(sizeof (kcf_reqid_table_t), KM_SLEEP); - kcf_reqid_table[i] = rt; - mutex_init(&rt->rt_lock, NULL, MUTEX_DEFAULT, NULL); - rt->rt_curid = i; - } - - /* Allocate and initialize the thread pool */ - kcfpool_alloc(); - - /* Initialize the event notification list variables */ - mutex_init(&ntfy_list_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&ntfy_list_cv, NULL, CV_DEFAULT, NULL); - - /* Create the kcf kstat */ - kcf_misc_kstat = kstat_create("kcf", 0, "framework_stats", "crypto", - KSTAT_TYPE_NAMED, sizeof (kcf_stats_t) / sizeof (kstat_named_t), - KSTAT_FLAG_VIRTUAL); - - if (kcf_misc_kstat != NULL) { - kcf_misc_kstat->ks_data = &kcf_ksdata; - kcf_misc_kstat->ks_update = kcf_misc_kstat_update; - kstat_install(kcf_misc_kstat); - } -} - -/* - * Signal the waiting sync client. - */ -void -kcf_sop_done(kcf_sreq_node_t *sreq, int error) -{ - mutex_enter(&sreq->sn_lock); - sreq->sn_state = REQ_DONE; - sreq->sn_rv = error; - cv_signal(&sreq->sn_cv); - mutex_exit(&sreq->sn_lock); -} - -/* - * Callback the async client with the operation status. - * We free the async request node and possibly the context. - * We also handle any chain of requests hanging off of - * the context. - */ -void -kcf_aop_done(kcf_areq_node_t *areq, int error) -{ - kcf_op_type_t optype; - boolean_t skip_notify = B_FALSE; - kcf_context_t *ictx; - kcf_areq_node_t *nextreq; - - /* - * Handle recoverable errors. This has to be done first - * before doing anything else in this routine so that - * we do not change the state of the request. - */ - if (error != CRYPTO_SUCCESS && IS_RECOVERABLE(error)) { - /* - * We try another provider, if one is available. Else - * we continue with the failure notification to the - * client. - */ - if (kcf_resubmit_request(areq) == CRYPTO_QUEUED) - return; - } - - mutex_enter(&areq->an_lock); - areq->an_state = REQ_DONE; - mutex_exit(&areq->an_lock); - - optype = (&areq->an_params)->rp_optype; - if ((ictx = areq->an_context) != NULL) { - /* - * A request after it is removed from the request - * queue, still stays on a chain of requests hanging - * of its context structure. It needs to be removed - * from this chain at this point. - */ - mutex_enter(&ictx->kc_in_use_lock); - nextreq = areq->an_ctxchain_next; - if (nextreq != NULL) { - mutex_enter(&nextreq->an_lock); - nextreq->an_is_my_turn = B_TRUE; - cv_signal(&nextreq->an_turn_cv); - mutex_exit(&nextreq->an_lock); - } - - ictx->kc_req_chain_first = nextreq; - if (nextreq == NULL) - ictx->kc_req_chain_last = NULL; - mutex_exit(&ictx->kc_in_use_lock); - - if (IS_SINGLE_OP(optype) || IS_FINAL_OP(optype)) { - ASSERT(nextreq == NULL); - KCF_CONTEXT_REFRELE(ictx); - } else if (error != CRYPTO_SUCCESS && IS_INIT_OP(optype)) { - /* - * NOTE - We do not release the context in case of update - * operations. We require the consumer to free it explicitly, - * in case it wants to abandon an update operation. This is done - * as there may be mechanisms in ECB mode that can continue - * even if an operation on a block fails. - */ - KCF_CONTEXT_REFRELE(ictx); - } - } - - /* Deal with the internal continuation to this request first */ - - if (areq->an_isdual) { - kcf_dual_req_t *next_arg; - next_arg = (kcf_dual_req_t *)areq->an_reqarg.cr_callback_arg; - next_arg->kr_areq = areq; - KCF_AREQ_REFHOLD(areq); - areq->an_isdual = B_FALSE; - - NOTIFY_CLIENT(areq, error); - return; - } - - /* - * If CRYPTO_NOTIFY_OPDONE flag is set, we should notify - * always. If this flag is clear, we skip the notification - * provided there are no errors. We check this flag for only - * init or update operations. It is ignored for single, final or - * atomic operations. - */ - skip_notify = (IS_UPDATE_OP(optype) || IS_INIT_OP(optype)) && - (!(areq->an_reqarg.cr_flag & CRYPTO_NOTIFY_OPDONE)) && - (error == CRYPTO_SUCCESS); - - if (!skip_notify) { - NOTIFY_CLIENT(areq, error); - } - - if (!(areq->an_reqarg.cr_flag & CRYPTO_SKIP_REQID)) - kcf_reqid_delete(areq); - - KCF_AREQ_REFRELE(areq); -} - -/* - * Allocate the thread pool and initialize all the fields. - */ -static void -kcfpool_alloc() -{ - kcfpool = kmem_alloc(sizeof (kcf_pool_t), KM_SLEEP); - - kcfpool->kp_threads = kcfpool->kp_idlethreads = 0; - kcfpool->kp_blockedthreads = 0; - kcfpool->kp_signal_create_thread = B_FALSE; - kcfpool->kp_nthrs = 0; - kcfpool->kp_user_waiting = B_FALSE; - - mutex_init(&kcfpool->kp_thread_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&kcfpool->kp_nothr_cv, NULL, CV_DEFAULT, NULL); - - mutex_init(&kcfpool->kp_user_lock, NULL, MUTEX_DEFAULT, NULL); - cv_init(&kcfpool->kp_user_cv, NULL, CV_DEFAULT, NULL); - - kcf_idlethr_timeout = KCF_DEFAULT_THRTIMEOUT; -} - -/* - * Insert the async request in the hash table after assigning it - * an ID. Returns the ID. - * - * The ID is used by the caller to pass as an argument to a - * cancel_req() routine later. - */ -static crypto_req_id_t -kcf_reqid_insert(kcf_areq_node_t *areq) -{ - int indx; - crypto_req_id_t id; - kcf_areq_node_t *headp; - kcf_reqid_table_t *rt; - - rt = kcf_reqid_table[CPU_SEQID_UNSTABLE & REQID_TABLE_MASK]; - - mutex_enter(&rt->rt_lock); - - rt->rt_curid = id = - (rt->rt_curid - REQID_COUNTER_LOW) | REQID_COUNTER_HIGH; - SET_REQID(areq, id); - indx = REQID_HASH(id); - headp = areq->an_idnext = rt->rt_idhash[indx]; - areq->an_idprev = NULL; - if (headp != NULL) - headp->an_idprev = areq; - - rt->rt_idhash[indx] = areq; - mutex_exit(&rt->rt_lock); - - return (id); -} - -/* - * Delete the async request from the hash table. - */ -static void -kcf_reqid_delete(kcf_areq_node_t *areq) -{ - int indx; - kcf_areq_node_t *nextp, *prevp; - crypto_req_id_t id = GET_REQID(areq); - kcf_reqid_table_t *rt; - - rt = kcf_reqid_table[id & REQID_TABLE_MASK]; - indx = REQID_HASH(id); - - mutex_enter(&rt->rt_lock); - - nextp = areq->an_idnext; - prevp = areq->an_idprev; - if (nextp != NULL) - nextp->an_idprev = prevp; - if (prevp != NULL) - prevp->an_idnext = nextp; - else - rt->rt_idhash[indx] = nextp; - - SET_REQID(areq, 0); - cv_broadcast(&areq->an_done); - - mutex_exit(&rt->rt_lock); -} - -/* - * Cancel a single asynchronous request. - * - * We guarantee that no problems will result from calling - * crypto_cancel_req() for a request which is either running, or - * has already completed. We remove the request from any queues - * if it is possible. We wait for request completion if the - * request is dispatched to a provider. - * - * Calling context: - * Can be called from user context only. - * - * NOTE: We acquire the following locks in this routine (in order): - * - rt_lock (kcf_reqid_table_t) - * - gswq->gs_lock - * - areq->an_lock - * - ictx->kc_in_use_lock (from kcf_removereq_in_ctxchain()) - * - * This locking order MUST be maintained in code every where else. - */ -void -crypto_cancel_req(crypto_req_id_t id) -{ - int indx; - kcf_areq_node_t *areq; - kcf_provider_desc_t *pd; - kcf_context_t *ictx; - kcf_reqid_table_t *rt; - - rt = kcf_reqid_table[id & REQID_TABLE_MASK]; - indx = REQID_HASH(id); - - mutex_enter(&rt->rt_lock); - for (areq = rt->rt_idhash[indx]; areq; areq = areq->an_idnext) { - if (GET_REQID(areq) == id) { - /* - * We found the request. It is either still waiting - * in the framework queues or running at the provider. - */ - pd = areq->an_provider; - ASSERT(pd != NULL); - - switch (pd->pd_prov_type) { - case CRYPTO_SW_PROVIDER: - mutex_enter(&gswq->gs_lock); - mutex_enter(&areq->an_lock); - - /* This request can be safely canceled. */ - if (areq->an_state <= REQ_WAITING) { - /* Remove from gswq, global software queue. */ - kcf_remove_node(areq); - if ((ictx = areq->an_context) != NULL) - kcf_removereq_in_ctxchain(ictx, areq); - - mutex_exit(&areq->an_lock); - mutex_exit(&gswq->gs_lock); - mutex_exit(&rt->rt_lock); - - /* Remove areq from hash table and free it. */ - kcf_reqid_delete(areq); - KCF_AREQ_REFRELE(areq); - return; - } - - mutex_exit(&areq->an_lock); - mutex_exit(&gswq->gs_lock); - break; - - case CRYPTO_HW_PROVIDER: - /* - * There is no interface to remove an entry - * once it is on the taskq. So, we do not do - * anything for a hardware provider. - */ - break; - default: - break; - } - - /* - * The request is running. Wait for the request completion - * to notify us. - */ - KCF_AREQ_REFHOLD(areq); - while (GET_REQID(areq) == id) - cv_wait(&areq->an_done, &rt->rt_lock); - KCF_AREQ_REFRELE(areq); - break; - } - } - - mutex_exit(&rt->rt_lock); -} - -/* - * Cancel all asynchronous requests associated with the - * passed in crypto context and free it. - * - * A client SHOULD NOT call this routine after calling a crypto_*_final - * routine. This routine is called only during intermediate operations. - * The client should not use the crypto context after this function returns - * since we destroy it. - * - * Calling context: - * Can be called from user context only. - */ -void -crypto_cancel_ctx(crypto_context_t ctx) -{ - kcf_context_t *ictx; - kcf_areq_node_t *areq; - - if (ctx == NULL) - return; - - ictx = (kcf_context_t *)((crypto_ctx_t *)ctx)->cc_framework_private; - - mutex_enter(&ictx->kc_in_use_lock); - - /* Walk the chain and cancel each request */ - while ((areq = ictx->kc_req_chain_first) != NULL) { - /* - * We have to drop the lock here as we may have - * to wait for request completion. We hold the - * request before dropping the lock though, so that it - * won't be freed underneath us. - */ - KCF_AREQ_REFHOLD(areq); - mutex_exit(&ictx->kc_in_use_lock); - - crypto_cancel_req(GET_REQID(areq)); - KCF_AREQ_REFRELE(areq); - - mutex_enter(&ictx->kc_in_use_lock); - } - - mutex_exit(&ictx->kc_in_use_lock); - KCF_CONTEXT_REFRELE(ictx); -} - -/* - * Update kstats. - */ -static int -kcf_misc_kstat_update(kstat_t *ksp, int rw) -{ - uint_t tcnt; - kcf_stats_t *ks_data; - - if (rw == KSTAT_WRITE) - return (EACCES); - - ks_data = ksp->ks_data; - - ks_data->ks_thrs_in_pool.value.ui32 = kcfpool->kp_threads; - /* - * The failover thread is counted in kp_idlethreads in - * some corner cases. This is done to avoid doing more checks - * when submitting a request. We account for those cases below. - */ - if ((tcnt = kcfpool->kp_idlethreads) == (kcfpool->kp_threads + 1)) - tcnt--; - ks_data->ks_idle_thrs.value.ui32 = tcnt; - ks_data->ks_minthrs.value.ui32 = kcf_minthreads; - ks_data->ks_maxthrs.value.ui32 = kcf_maxthreads; - ks_data->ks_swq_njobs.value.ui32 = gswq->gs_njobs; - ks_data->ks_swq_maxjobs.value.ui32 = gswq->gs_maxjobs; - ks_data->ks_taskq_threads.value.ui32 = crypto_taskq_threads; - ks_data->ks_taskq_minalloc.value.ui32 = crypto_taskq_minalloc; - ks_data->ks_taskq_maxalloc.value.ui32 = crypto_taskq_maxalloc; - - return (0); -} - -/* - * Allocate and initialize a kcf_dual_req, used for saving the arguments of - * a dual operation or an atomic operation that has to be internally - * simulated with multiple single steps. - * crq determines the memory allocation flags. - */ - -kcf_dual_req_t * -kcf_alloc_req(crypto_call_req_t *crq) -{ - kcf_dual_req_t *kcr; - - kcr = kmem_alloc(sizeof (kcf_dual_req_t), KCF_KMFLAG(crq)); - - if (kcr == NULL) - return (NULL); - - /* Copy the whole crypto_call_req struct, as it isn't persistent */ - if (crq != NULL) - kcr->kr_callreq = *crq; - else - bzero(&(kcr->kr_callreq), sizeof (crypto_call_req_t)); - kcr->kr_areq = NULL; - kcr->kr_saveoffset = 0; - kcr->kr_savelen = 0; - - return (kcr); -} - -/* - * Callback routine for the next part of a simulated dual part. - * Schedules the next step. - * - * This routine can be called from interrupt context. - */ -void -kcf_next_req(void *next_req_arg, int status) -{ - kcf_dual_req_t *next_req = (kcf_dual_req_t *)next_req_arg; - kcf_req_params_t *params = &(next_req->kr_params); - kcf_areq_node_t *areq = next_req->kr_areq; - int error = status; - kcf_provider_desc_t *pd = NULL; - crypto_dual_data_t *ct = NULL; - - /* Stop the processing if an error occurred at this step */ - if (error != CRYPTO_SUCCESS) { -out: - areq->an_reqarg = next_req->kr_callreq; - KCF_AREQ_REFRELE(areq); - kmem_free(next_req, sizeof (kcf_dual_req_t)); - areq->an_isdual = B_FALSE; - kcf_aop_done(areq, error); - return; - } - - switch (params->rp_opgrp) { - case KCF_OG_MAC: { - - /* - * The next req is submitted with the same reqid as the - * first part. The consumer only got back that reqid, and - * should still be able to cancel the operation during its - * second step. - */ - kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params); - crypto_ctx_template_t mac_tmpl; - kcf_mech_entry_t *me; - - ct = (crypto_dual_data_t *)mops->mo_data; - mac_tmpl = (crypto_ctx_template_t)mops->mo_templ; - - /* No expected recoverable failures, so no retry list */ - pd = kcf_get_mech_provider(mops->mo_framework_mechtype, - &me, &error, NULL, CRYPTO_FG_MAC_ATOMIC, - (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len2); - - if (pd == NULL) { - error = CRYPTO_MECH_NOT_SUPPORTED; - goto out; - } - /* Validate the MAC context template here */ - if ((pd->pd_prov_type == CRYPTO_SW_PROVIDER) && - (mac_tmpl != NULL)) { - kcf_ctx_template_t *ctx_mac_tmpl; - - ctx_mac_tmpl = (kcf_ctx_template_t *)mac_tmpl; - - if (ctx_mac_tmpl->ct_generation != me->me_gen_swprov) { - KCF_PROV_REFRELE(pd); - error = CRYPTO_OLD_CTX_TEMPLATE; - goto out; - } - mops->mo_templ = ctx_mac_tmpl->ct_prov_tmpl; - } - - break; - } - case KCF_OG_DECRYPT: { - kcf_decrypt_ops_params_t *dcrops = - &(params->rp_u.decrypt_params); - - ct = (crypto_dual_data_t *)dcrops->dop_ciphertext; - /* No expected recoverable failures, so no retry list */ - pd = kcf_get_mech_provider(dcrops->dop_framework_mechtype, - NULL, &error, NULL, CRYPTO_FG_DECRYPT_ATOMIC, - (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED), ct->dd_len1); - - if (pd == NULL) { - error = CRYPTO_MECH_NOT_SUPPORTED; - goto out; - } - break; - } - default: - break; - } - - /* The second step uses len2 and offset2 of the dual_data */ - next_req->kr_saveoffset = ct->dd_offset1; - next_req->kr_savelen = ct->dd_len1; - ct->dd_offset1 = ct->dd_offset2; - ct->dd_len1 = ct->dd_len2; - - /* preserve if the caller is restricted */ - if (areq->an_reqarg.cr_flag & CRYPTO_RESTRICTED) { - areq->an_reqarg.cr_flag = CRYPTO_RESTRICTED; - } else { - areq->an_reqarg.cr_flag = 0; - } - - areq->an_reqarg.cr_callback_func = kcf_last_req; - areq->an_reqarg.cr_callback_arg = next_req; - areq->an_isdual = B_TRUE; - - /* - * We would like to call kcf_submit_request() here. But, - * that is not possible as that routine allocates a new - * kcf_areq_node_t request structure, while we need to - * reuse the existing request structure. - */ - switch (pd->pd_prov_type) { - case CRYPTO_SW_PROVIDER: - error = common_submit_request(pd, NULL, params, - KCF_RHNDL(KM_NOSLEEP)); - break; - - case CRYPTO_HW_PROVIDER: { - kcf_provider_desc_t *old_pd; - taskq_t *taskq = pd->pd_sched_info.ks_taskq; - - /* - * Set the params for the second step in the - * dual-ops. - */ - areq->an_params = *params; - old_pd = areq->an_provider; - KCF_PROV_REFRELE(old_pd); - KCF_PROV_REFHOLD(pd); - areq->an_provider = pd; - - /* - * Note that we have to do a taskq_dispatch() - * here as we may be in interrupt context. - */ - if (taskq_dispatch(taskq, process_req_hwp, areq, - TQ_NOSLEEP) == (taskqid_t)0) { - error = CRYPTO_HOST_MEMORY; - } else { - error = CRYPTO_QUEUED; - } - break; - } - default: - break; - } - - /* - * We have to release the holds on the request and the provider - * in all cases. - */ - KCF_AREQ_REFRELE(areq); - KCF_PROV_REFRELE(pd); - - if (error != CRYPTO_QUEUED) { - /* restore, clean up, and invoke the client's callback */ - - ct->dd_offset1 = next_req->kr_saveoffset; - ct->dd_len1 = next_req->kr_savelen; - areq->an_reqarg = next_req->kr_callreq; - kmem_free(next_req, sizeof (kcf_dual_req_t)); - areq->an_isdual = B_FALSE; - kcf_aop_done(areq, error); - } -} - -/* - * Last part of an emulated dual operation. - * Clean up and restore ... - */ -void -kcf_last_req(void *last_req_arg, int status) -{ - kcf_dual_req_t *last_req = (kcf_dual_req_t *)last_req_arg; - - kcf_req_params_t *params = &(last_req->kr_params); - kcf_areq_node_t *areq = last_req->kr_areq; - crypto_dual_data_t *ct = NULL; - - switch (params->rp_opgrp) { - case KCF_OG_MAC: { - kcf_mac_ops_params_t *mops = &(params->rp_u.mac_params); - - ct = (crypto_dual_data_t *)mops->mo_data; - break; - } - case KCF_OG_DECRYPT: { - kcf_decrypt_ops_params_t *dcrops = - &(params->rp_u.decrypt_params); - - ct = (crypto_dual_data_t *)dcrops->dop_ciphertext; - break; - } - default: { - panic("invalid kcf_op_group_t %d", (int)params->rp_opgrp); - return; - } - } - ct->dd_offset1 = last_req->kr_saveoffset; - ct->dd_len1 = last_req->kr_savelen; - - /* The submitter used kcf_last_req as its callback */ - - if (areq == NULL) { - crypto_call_req_t *cr = &last_req->kr_callreq; - - (*(cr->cr_callback_func))(cr->cr_callback_arg, status); - kmem_free(last_req, sizeof (kcf_dual_req_t)); - return; - } - areq->an_reqarg = last_req->kr_callreq; - KCF_AREQ_REFRELE(areq); - kmem_free(last_req, sizeof (kcf_dual_req_t)); - areq->an_isdual = B_FALSE; - kcf_aop_done(areq, status); } diff --git a/sys/contrib/openzfs/module/icp/illumos-crypto.c b/sys/contrib/openzfs/module/icp/illumos-crypto.c index 3c5ef4393940..13f05c06ed5c 100644 --- a/sys/contrib/openzfs/module/icp/illumos-crypto.c +++ b/sys/contrib/openzfs/module/icp/illumos-crypto.c @@ -7,7 +7,7 @@ * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -36,7 +36,6 @@ #include <sys/crypto/api.h> #include <sys/crypto/impl.h> #include <sys/crypto/sched_impl.h> -#include <sys/modhash_impl.h> #include <sys/crypto/icp.h> /* @@ -61,16 +60,15 @@ * the future it may make sense to have encryption algorithms that are * loadable into the ICP at runtime via separate kernel modules. * However, considering that this code will probably not see much use - * outside of zfs and zfs encryption only requires aes and sha256 + * outside of zfs and zfs encryption only requires a select few * algorithms it seemed like more trouble than it was worth to port over * Illumos's kernel module structure to a Linux kernel module. In * addition, The Illumos code related to keeping track of kernel modules - * is very much tied to the Illumos OS and proved difficult to port to - * Linux. Therefore, the structure of the ICP was simplified to work - * statically and several pieces of code responsible for keeping track - * of Illumos kernel modules were removed and simplified. All module - * initialization and destruction is now called in this file during - * Linux kernel module loading and unloading. + * is very much tied to the Illumos OS and proved difficult to port. + * Therefore, the structure of the ICP was simplified to work + * statically and all the Illumos kernel module loading subsystem was removed. + * All module initialization and destruction is now called in this file + * during kernel module loading and unloading. * * 4) Adding destructors: The Illumos Crypto Layer is built into * the Illumos kernel and is not meant to be unloaded. Some destructors @@ -106,27 +104,21 @@ * ZFS Makefiles. */ -void __exit +void icp_fini(void) { skein_mod_fini(); sha2_mod_fini(); - sha1_mod_fini(); - edonr_mod_fini(); aes_mod_fini(); kcf_sched_destroy(); kcf_prov_tab_destroy(); kcf_destroy_mech_tabs(); - mod_hash_fini(); } /* roughly equivalent to kcf.c: _init() */ int __init icp_init(void) { - /* initialize the mod hash module */ - mod_hash_init(); - /* initialize the mechanisms tables supported out-of-the-box */ kcf_init_mech_tabs(); @@ -141,18 +133,13 @@ icp_init(void) /* initialize algorithms */ aes_mod_init(); - edonr_mod_init(); - sha1_mod_init(); sha2_mod_init(); skein_mod_init(); return (0); } -#if defined(_KERNEL) +#if defined(_KERNEL) && defined(__FreeBSD__) module_exit(icp_fini); module_init(icp_init); -MODULE_AUTHOR(ZFS_META_AUTHOR); -MODULE_LICENSE(ZFS_META_LICENSE); -MODULE_VERSION(ZFS_META_VERSION "-" ZFS_META_RELEASE); #endif diff --git a/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h b/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h index 41dccaa3848a..66eb4a6c8fb6 100644 --- a/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h +++ b/sys/contrib/openzfs/module/icp/include/aes/aes_impl.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -36,6 +36,7 @@ extern "C" { #include <sys/zfs_context.h> #include <sys/crypto/common.h> +#include <sys/asm_linkage.h> /* Similar to sysmacros.h IS_P2ALIGNED, but checks two pointers: */ #define IS_P2ALIGNED2(v, w, a) \ @@ -83,14 +84,7 @@ extern "C" { /* AES key size definitions */ #define AES_MINBITS 128 -#define AES_MINBYTES ((AES_MINBITS) >> 3) #define AES_MAXBITS 256 -#define AES_MAXBYTES ((AES_MAXBITS) >> 3) - -#define AES_MIN_KEY_BYTES ((AES_MINBITS) >> 3) -#define AES_MAX_KEY_BYTES ((AES_MAXBITS) >> 3) -#define AES_192_KEY_BYTES 24 -#define AES_IV_LEN 16 /* AES key schedule may be implemented with 32- or 64-bit elements: */ #define AES_32BIT_KS 32 @@ -197,13 +191,13 @@ extern const aes_impl_ops_t aes_generic_impl; extern const aes_impl_ops_t aes_x86_64_impl; /* These functions are used to execute amd64 instructions for AMD or Intel: */ -extern int rijndael_key_setup_enc_amd64(uint32_t rk[], +extern ASMABI int rijndael_key_setup_enc_amd64(uint32_t rk[], const uint32_t cipherKey[], int keyBits); -extern int rijndael_key_setup_dec_amd64(uint32_t rk[], +extern ASMABI int rijndael_key_setup_dec_amd64(uint32_t rk[], const uint32_t cipherKey[], int keyBits); -extern void aes_encrypt_amd64(const uint32_t rk[], int Nr, +extern ASMABI void aes_encrypt_amd64(const uint32_t rk[], int Nr, const uint32_t pt[4], uint32_t ct[4]); -extern void aes_decrypt_amd64(const uint32_t rk[], int Nr, +extern ASMABI void aes_decrypt_amd64(const uint32_t rk[], int Nr, const uint32_t ct[4], uint32_t pt[4]); #endif #if defined(__x86_64) && defined(HAVE_AES) diff --git a/sys/contrib/openzfs/module/icp/include/generic_impl.c b/sys/contrib/openzfs/module/icp/include/generic_impl.c new file mode 100644 index 000000000000..16f802cf7558 --- /dev/null +++ b/sys/contrib/openzfs/module/icp/include/generic_impl.c @@ -0,0 +1,233 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or https://opensource.org/licenses/CDDL-1.0. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Copyright (c) 2003, 2010 Oracle and/or its affiliates. + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> + */ + +/* + * This file gets included by c files for implementing the full set + * of zfs_impl.h defines. + * + * It's ment for easier maintaining multiple implementations of + * algorithms. Look into blake3_impl.c, sha256_impl.c or sha512_impl.c + * for reference. + */ + +#include <sys/zfs_context.h> +#include <sys/zio_checksum.h> +#include <sys/zfs_impl.h> + +/* Two default implementations */ +#define IMPL_FASTEST (UINT32_MAX) +#define IMPL_CYCLE (UINT32_MAX - 1) + +#define IMPL_READ(i) (*(volatile uint32_t *) &(i)) + +/* Implementation that contains the fastest method */ +static IMPL_OPS_T generic_fastest_impl = { + .name = "fastest" +}; + +/* Hold all supported implementations */ +static const IMPL_OPS_T *generic_supp_impls[ARRAY_SIZE(IMPL_ARRAY)]; +static uint32_t generic_supp_impls_cnt = 0; + +/* Currently selected implementation */ +static uint32_t generic_impl_chosen = IMPL_FASTEST; + +static struct generic_impl_selector { + const char *name; + uint32_t sel; +} generic_impl_selectors[] = { + { "cycle", IMPL_CYCLE }, + { "fastest", IMPL_FASTEST } +}; + +/* check the supported implementations */ +static void +generic_impl_init(void) +{ + int i, c; + + /* init only once */ + if (likely(generic_supp_impls_cnt != 0)) + return; + + /* Move supported implementations into generic_supp_impls */ + for (i = 0, c = 0; i < ARRAY_SIZE(IMPL_ARRAY); i++) { + const IMPL_OPS_T *impl = IMPL_ARRAY[i]; + + if (impl->is_supported && impl->is_supported()) + generic_supp_impls[c++] = impl; + } + generic_supp_impls_cnt = c; + + /* first init generic impl, may be changed via set_fastest() */ + memcpy(&generic_fastest_impl, generic_supp_impls[0], + sizeof (generic_fastest_impl)); +} + +/* get number of supported implementations */ +static uint32_t +generic_impl_getcnt(void) +{ + generic_impl_init(); + return (generic_supp_impls_cnt); +} + +/* get id of selected implementation */ +static uint32_t +generic_impl_getid(void) +{ + generic_impl_init(); + return (IMPL_READ(generic_impl_chosen)); +} + +/* get name of selected implementation */ +static const char * +generic_impl_getname(void) +{ + uint32_t impl = IMPL_READ(generic_impl_chosen); + + generic_impl_init(); + switch (impl) { + case IMPL_FASTEST: + return ("fastest"); + case IMPL_CYCLE: + return ("cycle"); + default: + return (generic_supp_impls[impl]->name); + } +} + +/* set implementation by id */ +static void +generic_impl_setid(uint32_t id) +{ + generic_impl_init(); + switch (id) { + case IMPL_FASTEST: + atomic_swap_32(&generic_impl_chosen, IMPL_FASTEST); + break; + case IMPL_CYCLE: + atomic_swap_32(&generic_impl_chosen, IMPL_CYCLE); + break; + default: + ASSERT3U(id, <, generic_supp_impls_cnt); + atomic_swap_32(&generic_impl_chosen, id); + break; + } +} + +/* set implementation by name */ +static int +generic_impl_setname(const char *val) +{ + uint32_t impl = IMPL_READ(generic_impl_chosen); + size_t val_len; + int i, err = -EINVAL; + + generic_impl_init(); + val_len = strlen(val); + while ((val_len > 0) && !!isspace(val[val_len-1])) /* trim '\n' */ + val_len--; + + /* check mandatory implementations */ + for (i = 0; i < ARRAY_SIZE(generic_impl_selectors); i++) { + const char *name = generic_impl_selectors[i].name; + + if (val_len == strlen(name) && + strncmp(val, name, val_len) == 0) { + impl = generic_impl_selectors[i].sel; + err = 0; + break; + } + } + + /* check all supported implementations */ + if (err != 0) { + for (i = 0; i < generic_supp_impls_cnt; i++) { + const char *name = generic_supp_impls[i]->name; + + if (val_len == strlen(name) && + strncmp(val, name, val_len) == 0) { + impl = i; + err = 0; + break; + } + } + } + + if (err == 0) { + atomic_swap_32(&generic_impl_chosen, impl); + } + + return (err); +} + +/* setup id as fastest implementation */ +static void +generic_impl_set_fastest(uint32_t id) +{ + generic_impl_init(); + memcpy(&generic_fastest_impl, generic_supp_impls[id], + sizeof (generic_fastest_impl)); +} + +/* return impl iterating functions */ +const zfs_impl_t ZFS_IMPL_OPS = { + .name = IMPL_NAME, + .getcnt = generic_impl_getcnt, + .getid = generic_impl_getid, + .getname = generic_impl_getname, + .set_fastest = generic_impl_set_fastest, + .setid = generic_impl_setid, + .setname = generic_impl_setname +}; + +/* get impl ops_t of selected implementation */ +const IMPL_OPS_T * +IMPL_GET_OPS(void) +{ + const IMPL_OPS_T *ops = NULL; + uint32_t idx, impl = IMPL_READ(generic_impl_chosen); + static uint32_t cycle_count = 0; + + generic_impl_init(); + switch (impl) { + case IMPL_FASTEST: + ops = &generic_fastest_impl; + break; + case IMPL_CYCLE: + idx = (++cycle_count) % generic_supp_impls_cnt; + ops = generic_supp_impls[idx]; + break; + default: + ASSERT3U(impl, <, generic_supp_impls_cnt); + ops = generic_supp_impls[impl]; + break; + } + + ASSERT3P(ops, !=, NULL); + return (ops); +} diff --git a/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h b/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h index 28c8f63a7d46..3afc9e2c6317 100644 --- a/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h +++ b/sys/contrib/openzfs/module/icp/include/modes/gcm_impl.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/include/modes/modes.h b/sys/contrib/openzfs/module/icp/include/modes/modes.h index ab71197542eb..23bf46ab51a0 100644 --- a/sys/contrib/openzfs/module/icp/include/modes/modes.h +++ b/sys/contrib/openzfs/module/icp/include/modes/modes.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -207,10 +207,6 @@ typedef struct ccm_ctx { * * gcm_len_a_len_c: 64-bit representations of the bit lengths of * AAD and ciphertext. - * - * gcm_kmflag: Current value of kmflag. Used for allocating - * the plaintext buffer during decryption and a - * gcm_avx_chunk_size'd buffer for avx enabled encryption. */ typedef struct gcm_ctx { struct common_ctx gcm_common; @@ -231,7 +227,6 @@ typedef struct gcm_ctx { uint64_t gcm_J0[2]; uint64_t gcm_len_a_len_c[2]; uint8_t *gcm_pt_buf; - int gcm_kmflag; #ifdef CAN_USE_GCM_ASM boolean_t gcm_use_avx; #endif @@ -249,6 +244,8 @@ typedef struct gcm_ctx { #define AES_GMAC_IV_LEN 12 #define AES_GMAC_TAG_BITS 128 +void gcm_clear_ctx(gcm_ctx_t *ctx); + typedef struct aes_ctx { union { ecb_ctx_t acu_ecb; @@ -402,7 +399,6 @@ extern void *ccm_alloc_ctx(int); extern void *gcm_alloc_ctx(int); extern void *gmac_alloc_ctx(int); extern void crypto_free_mode_ctx(void *); -extern void gcm_set_kmflag(gcm_ctx_t *, int); #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1.h deleted file mode 100644 index 251b64fcaeee..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sha1/sha1.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2007 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_SHA1_H -#define _SYS_SHA1_H - -#include <sys/types.h> /* for uint_* */ - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * NOTE: n2rng (Niagara2 RNG driver) accesses the state field of - * SHA1_CTX directly. NEVER change this structure without verifying - * compatibility with n2rng. The important thing is that the state - * must be in a field declared as uint32_t state[5]. - */ -/* SHA-1 context. */ -typedef struct { - uint32_t state[5]; /* state (ABCDE) */ - uint32_t count[2]; /* number of bits, modulo 2^64 (msb first) */ - union { - uint8_t buf8[64]; /* undigested input */ - uint32_t buf32[16]; /* realigned input */ - } buf_un; -} SHA1_CTX; - -#define SHA1_DIGEST_LENGTH 20 - -void SHA1Init(SHA1_CTX *); -void SHA1Update(SHA1_CTX *, const void *, size_t); -void SHA1Final(void *, SHA1_CTX *); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_SHA1_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h deleted file mode 100644 index 848d25ef050f..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sha1/sha1_consts.h +++ /dev/null @@ -1,65 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright (c) 1998, by Sun Microsystems, Inc. - * All rights reserved. - */ - -#ifndef _SYS_SHA1_CONSTS_H -#define _SYS_SHA1_CONSTS_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * as explained in sha1.c, loading 32-bit constants on a sparc is expensive - * since it involves both a `sethi' and an `or'. thus, we instead use `ld' - * to load the constants from an array called `sha1_consts'. however, on - * intel (and perhaps other processors), it is cheaper to load the constant - * directly. thus, the c code in SHA1Transform() uses the macro SHA1_CONST() - * which either expands to a constant or an array reference, depending on - * the architecture the code is being compiled for. - */ - -#include <sys/types.h> /* uint32_t */ - -extern const uint32_t sha1_consts[]; - -#if defined(__sparc) -#define SHA1_CONST(x) (sha1_consts[x]) -#else -#define SHA1_CONST(x) (SHA1_CONST_ ## x) -#endif - -/* constants, as provided in FIPS 180-1 */ - -#define SHA1_CONST_0 0x5a827999U -#define SHA1_CONST_1 0x6ed9eba1U -#define SHA1_CONST_2 0x8f1bbcdcU -#define SHA1_CONST_3 0xca62c1d6U - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_SHA1_CONSTS_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h b/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h deleted file mode 100644 index 1c1f8728f9b5..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sha1/sha1_impl.h +++ /dev/null @@ -1,73 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SHA1_IMPL_H -#define _SHA1_IMPL_H - - -#ifdef __cplusplus -extern "C" { -#endif - -#define SHA1_HASH_SIZE 20 /* SHA_1 digest length in bytes */ -#define SHA1_DIGEST_LENGTH 20 /* SHA1 digest length in bytes */ -#define SHA1_HMAC_BLOCK_SIZE 64 /* SHA1-HMAC block size */ -#define SHA1_HMAC_MIN_KEY_LEN 1 /* SHA1-HMAC min key length in bytes */ -#define SHA1_HMAC_MAX_KEY_LEN INT_MAX /* SHA1-HMAC max key length in bytes */ -#define SHA1_HMAC_INTS_PER_BLOCK (SHA1_HMAC_BLOCK_SIZE/sizeof (uint32_t)) - -/* - * CSPI information (entry points, provider info, etc.) - */ -typedef enum sha1_mech_type { - SHA1_MECH_INFO_TYPE, /* SUN_CKM_SHA1 */ - SHA1_HMAC_MECH_INFO_TYPE, /* SUN_CKM_SHA1_HMAC */ - SHA1_HMAC_GEN_MECH_INFO_TYPE /* SUN_CKM_SHA1_HMAC_GENERAL */ -} sha1_mech_type_t; - -/* - * Context for SHA1 mechanism. - */ -typedef struct sha1_ctx { - sha1_mech_type_t sc_mech_type; /* type of context */ - SHA1_CTX sc_sha1_ctx; /* SHA1 context */ -} sha1_ctx_t; - -/* - * Context for SHA1-HMAC and SHA1-HMAC-GENERAL mechanisms. - */ -typedef struct sha1_hmac_ctx { - sha1_mech_type_t hc_mech_type; /* type of context */ - uint32_t hc_digest_len; /* digest len in bytes */ - SHA1_CTX hc_icontext; /* inner SHA1 context */ - SHA1_CTX hc_ocontext; /* outer SHA1 context */ -} sha1_hmac_ctx_t; - - -#ifdef __cplusplus -} -#endif - -#endif /* _SHA1_IMPL_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h b/sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h deleted file mode 100644 index 3a6645508fe9..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sha2/sha2_consts.h +++ /dev/null @@ -1,219 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_SHA2_CONSTS_H -#define _SYS_SHA2_CONSTS_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Loading 32-bit constants on a sparc is expensive since it involves both - * a `sethi' and an `or'. thus, we instead use `ld' to load the constants - * from an array called `sha2_consts'. however, on intel (and perhaps other - * processors), it is cheaper to load the constant directly. thus, the c - * code in SHA transform functions uses the macro SHA2_CONST() which either - * expands to a constant or an array reference, depending on - * the architecture the code is being compiled for. - * - * SHA512 constants are used for SHA384 - */ - -#include <sys/types.h> /* uint32_t */ - -extern const uint32_t sha256_consts[]; -extern const uint64_t sha512_consts[]; - -#if defined(__sparc) -#define SHA256_CONST(x) (sha256_consts[x]) -#define SHA512_CONST(x) (sha512_consts[x]) -#else -#define SHA256_CONST(x) (SHA256_CONST_ ## x) -#define SHA512_CONST(x) (SHA512_CONST_ ## x) -#endif - -/* constants, as provided in FIPS 180-2 */ - -#define SHA256_CONST_0 0x428a2f98U -#define SHA256_CONST_1 0x71374491U -#define SHA256_CONST_2 0xb5c0fbcfU -#define SHA256_CONST_3 0xe9b5dba5U -#define SHA256_CONST_4 0x3956c25bU -#define SHA256_CONST_5 0x59f111f1U -#define SHA256_CONST_6 0x923f82a4U -#define SHA256_CONST_7 0xab1c5ed5U - -#define SHA256_CONST_8 0xd807aa98U -#define SHA256_CONST_9 0x12835b01U -#define SHA256_CONST_10 0x243185beU -#define SHA256_CONST_11 0x550c7dc3U -#define SHA256_CONST_12 0x72be5d74U -#define SHA256_CONST_13 0x80deb1feU -#define SHA256_CONST_14 0x9bdc06a7U -#define SHA256_CONST_15 0xc19bf174U - -#define SHA256_CONST_16 0xe49b69c1U -#define SHA256_CONST_17 0xefbe4786U -#define SHA256_CONST_18 0x0fc19dc6U -#define SHA256_CONST_19 0x240ca1ccU -#define SHA256_CONST_20 0x2de92c6fU -#define SHA256_CONST_21 0x4a7484aaU -#define SHA256_CONST_22 0x5cb0a9dcU -#define SHA256_CONST_23 0x76f988daU - -#define SHA256_CONST_24 0x983e5152U -#define SHA256_CONST_25 0xa831c66dU -#define SHA256_CONST_26 0xb00327c8U -#define SHA256_CONST_27 0xbf597fc7U -#define SHA256_CONST_28 0xc6e00bf3U -#define SHA256_CONST_29 0xd5a79147U -#define SHA256_CONST_30 0x06ca6351U -#define SHA256_CONST_31 0x14292967U - -#define SHA256_CONST_32 0x27b70a85U -#define SHA256_CONST_33 0x2e1b2138U -#define SHA256_CONST_34 0x4d2c6dfcU -#define SHA256_CONST_35 0x53380d13U -#define SHA256_CONST_36 0x650a7354U -#define SHA256_CONST_37 0x766a0abbU -#define SHA256_CONST_38 0x81c2c92eU -#define SHA256_CONST_39 0x92722c85U - -#define SHA256_CONST_40 0xa2bfe8a1U -#define SHA256_CONST_41 0xa81a664bU -#define SHA256_CONST_42 0xc24b8b70U -#define SHA256_CONST_43 0xc76c51a3U -#define SHA256_CONST_44 0xd192e819U -#define SHA256_CONST_45 0xd6990624U -#define SHA256_CONST_46 0xf40e3585U -#define SHA256_CONST_47 0x106aa070U - -#define SHA256_CONST_48 0x19a4c116U -#define SHA256_CONST_49 0x1e376c08U -#define SHA256_CONST_50 0x2748774cU -#define SHA256_CONST_51 0x34b0bcb5U -#define SHA256_CONST_52 0x391c0cb3U -#define SHA256_CONST_53 0x4ed8aa4aU -#define SHA256_CONST_54 0x5b9cca4fU -#define SHA256_CONST_55 0x682e6ff3U - -#define SHA256_CONST_56 0x748f82eeU -#define SHA256_CONST_57 0x78a5636fU -#define SHA256_CONST_58 0x84c87814U -#define SHA256_CONST_59 0x8cc70208U -#define SHA256_CONST_60 0x90befffaU -#define SHA256_CONST_61 0xa4506cebU -#define SHA256_CONST_62 0xbef9a3f7U -#define SHA256_CONST_63 0xc67178f2U - -#define SHA512_CONST_0 0x428a2f98d728ae22ULL -#define SHA512_CONST_1 0x7137449123ef65cdULL -#define SHA512_CONST_2 0xb5c0fbcfec4d3b2fULL -#define SHA512_CONST_3 0xe9b5dba58189dbbcULL -#define SHA512_CONST_4 0x3956c25bf348b538ULL -#define SHA512_CONST_5 0x59f111f1b605d019ULL -#define SHA512_CONST_6 0x923f82a4af194f9bULL -#define SHA512_CONST_7 0xab1c5ed5da6d8118ULL -#define SHA512_CONST_8 0xd807aa98a3030242ULL -#define SHA512_CONST_9 0x12835b0145706fbeULL -#define SHA512_CONST_10 0x243185be4ee4b28cULL -#define SHA512_CONST_11 0x550c7dc3d5ffb4e2ULL -#define SHA512_CONST_12 0x72be5d74f27b896fULL -#define SHA512_CONST_13 0x80deb1fe3b1696b1ULL -#define SHA512_CONST_14 0x9bdc06a725c71235ULL -#define SHA512_CONST_15 0xc19bf174cf692694ULL -#define SHA512_CONST_16 0xe49b69c19ef14ad2ULL -#define SHA512_CONST_17 0xefbe4786384f25e3ULL -#define SHA512_CONST_18 0x0fc19dc68b8cd5b5ULL -#define SHA512_CONST_19 0x240ca1cc77ac9c65ULL -#define SHA512_CONST_20 0x2de92c6f592b0275ULL -#define SHA512_CONST_21 0x4a7484aa6ea6e483ULL -#define SHA512_CONST_22 0x5cb0a9dcbd41fbd4ULL -#define SHA512_CONST_23 0x76f988da831153b5ULL -#define SHA512_CONST_24 0x983e5152ee66dfabULL -#define SHA512_CONST_25 0xa831c66d2db43210ULL -#define SHA512_CONST_26 0xb00327c898fb213fULL -#define SHA512_CONST_27 0xbf597fc7beef0ee4ULL -#define SHA512_CONST_28 0xc6e00bf33da88fc2ULL -#define SHA512_CONST_29 0xd5a79147930aa725ULL -#define SHA512_CONST_30 0x06ca6351e003826fULL -#define SHA512_CONST_31 0x142929670a0e6e70ULL -#define SHA512_CONST_32 0x27b70a8546d22ffcULL -#define SHA512_CONST_33 0x2e1b21385c26c926ULL -#define SHA512_CONST_34 0x4d2c6dfc5ac42aedULL -#define SHA512_CONST_35 0x53380d139d95b3dfULL -#define SHA512_CONST_36 0x650a73548baf63deULL -#define SHA512_CONST_37 0x766a0abb3c77b2a8ULL -#define SHA512_CONST_38 0x81c2c92e47edaee6ULL -#define SHA512_CONST_39 0x92722c851482353bULL -#define SHA512_CONST_40 0xa2bfe8a14cf10364ULL -#define SHA512_CONST_41 0xa81a664bbc423001ULL -#define SHA512_CONST_42 0xc24b8b70d0f89791ULL -#define SHA512_CONST_43 0xc76c51a30654be30ULL -#define SHA512_CONST_44 0xd192e819d6ef5218ULL -#define SHA512_CONST_45 0xd69906245565a910ULL -#define SHA512_CONST_46 0xf40e35855771202aULL -#define SHA512_CONST_47 0x106aa07032bbd1b8ULL -#define SHA512_CONST_48 0x19a4c116b8d2d0c8ULL -#define SHA512_CONST_49 0x1e376c085141ab53ULL -#define SHA512_CONST_50 0x2748774cdf8eeb99ULL -#define SHA512_CONST_51 0x34b0bcb5e19b48a8ULL -#define SHA512_CONST_52 0x391c0cb3c5c95a63ULL -#define SHA512_CONST_53 0x4ed8aa4ae3418acbULL -#define SHA512_CONST_54 0x5b9cca4f7763e373ULL -#define SHA512_CONST_55 0x682e6ff3d6b2b8a3ULL -#define SHA512_CONST_56 0x748f82ee5defb2fcULL -#define SHA512_CONST_57 0x78a5636f43172f60ULL -#define SHA512_CONST_58 0x84c87814a1f0ab72ULL -#define SHA512_CONST_59 0x8cc702081a6439ecULL -#define SHA512_CONST_60 0x90befffa23631e28ULL -#define SHA512_CONST_61 0xa4506cebde82bde9ULL -#define SHA512_CONST_62 0xbef9a3f7b2c67915ULL -#define SHA512_CONST_63 0xc67178f2e372532bULL -#define SHA512_CONST_64 0xca273eceea26619cULL -#define SHA512_CONST_65 0xd186b8c721c0c207ULL -#define SHA512_CONST_66 0xeada7dd6cde0eb1eULL -#define SHA512_CONST_67 0xf57d4f7fee6ed178ULL -#define SHA512_CONST_68 0x06f067aa72176fbaULL -#define SHA512_CONST_69 0x0a637dc5a2c898a6ULL -#define SHA512_CONST_70 0x113f9804bef90daeULL -#define SHA512_CONST_71 0x1b710b35131c471bULL -#define SHA512_CONST_72 0x28db77f523047d84ULL -#define SHA512_CONST_73 0x32caab7b40c72493ULL -#define SHA512_CONST_74 0x3c9ebe0a15c9bebcULL -#define SHA512_CONST_75 0x431d67c49c100d4cULL -#define SHA512_CONST_76 0x4cc5d4becb3e42b6ULL -#define SHA512_CONST_77 0x597f299cfc657e2aULL -#define SHA512_CONST_78 0x5fcb6fab3ad6faecULL -#define SHA512_CONST_79 0x6c44198c4a475817ULL - - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_SHA2_CONSTS_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h b/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h index b9768d344e95..9a1bd38f1a77 100644 --- a/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h +++ b/sys/contrib/openzfs/module/icp/include/sha2/sha2_impl.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -18,9 +18,10 @@ * * CDDL HEADER END */ + /* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Copyright (c) 2009 Sun Microsystems, Inc. All rights reserved. + * Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> */ #ifndef _SHA2_IMPL_H @@ -32,6 +33,28 @@ extern "C" { #endif +/* transform function definition */ +typedef void (*sha256_f)(uint32_t state[8], const void *data, size_t blks); +typedef void (*sha512_f)(uint64_t state[8], const void *data, size_t blks); + +/* needed for checking valid implementations */ +typedef boolean_t (*sha2_is_supported_f)(void); + +typedef struct { + const char *name; + sha256_f transform; + sha2_is_supported_f is_supported; +} sha256_ops_t; + +typedef struct { + const char *name; + sha512_f transform; + sha2_is_supported_f is_supported; +} sha512_ops_t; + +extern const sha256_ops_t *sha256_get_ops(void); +extern const sha512_ops_t *sha512_get_ops(void); + typedef enum { SHA1_TYPE, SHA256_TYPE, diff --git a/sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h b/sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h deleted file mode 100644 index 49a494b46e0b..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/asm_linkage.h +++ /dev/null @@ -1,46 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_ASM_LINKAGE_H -#define _SYS_ASM_LINKAGE_H - -#if defined(__i386) || defined(__amd64) - -#include <sys/ia32/asm_linkage.h> /* XX64 x86/sys/asm_linkage.h */ - -#endif - -#if defined(_KERNEL) && defined(HAVE_KERNEL_OBJTOOL) - -#include <asm/frame.h> - -#else /* userspace */ -#define FRAME_BEGIN -#define FRAME_END -#endif - - -#endif /* _SYS_ASM_LINKAGE_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/bitmap.h b/sys/contrib/openzfs/module/icp/include/sys/bitmap.h deleted file mode 100644 index 4e86ee70ed9e..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/bitmap.h +++ /dev/null @@ -1,183 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ -/* All Rights Reserved */ - - -#ifndef _SYS_BITMAP_H -#define _SYS_BITMAP_H - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(__GNUC__) && defined(_ASM_INLINES) && \ - (defined(__i386) || defined(__amd64)) -#include <asm/bitmap.h> -#endif - -/* - * Operations on bitmaps of arbitrary size - * A bitmap is a vector of 1 or more ulong_t's. - * The user of the package is responsible for range checks and keeping - * track of sizes. - */ - -#ifdef _LP64 -#define BT_ULSHIFT 6 /* log base 2 of BT_NBIPUL, to extract word index */ -#define BT_ULSHIFT32 5 /* log base 2 of BT_NBIPUL, to extract word index */ -#else -#define BT_ULSHIFT 5 /* log base 2 of BT_NBIPUL, to extract word index */ -#endif - -#define BT_NBIPUL (1 << BT_ULSHIFT) /* n bits per ulong_t */ -#define BT_ULMASK (BT_NBIPUL - 1) /* to extract bit index */ - -#ifdef _LP64 -#define BT_NBIPUL32 (1 << BT_ULSHIFT32) /* n bits per ulong_t */ -#define BT_ULMASK32 (BT_NBIPUL32 - 1) /* to extract bit index */ -#define BT_ULMAXMASK 0xffffffffffffffff /* used by bt_getlowbit */ -#else -#define BT_ULMAXMASK 0xffffffff -#endif - -/* - * bitmap is a ulong_t *, bitindex an index_t - * - * The macros BT_WIM and BT_BIW internal; there is no need - * for users of this package to use them. - */ - -/* - * word in map - */ -#define BT_WIM(bitmap, bitindex) \ - ((bitmap)[(bitindex) >> BT_ULSHIFT]) -/* - * bit in word - */ -#define BT_BIW(bitindex) \ - (1UL << ((bitindex) & BT_ULMASK)) - -#ifdef _LP64 -#define BT_WIM32(bitmap, bitindex) \ - ((bitmap)[(bitindex) >> BT_ULSHIFT32]) - -#define BT_BIW32(bitindex) \ - (1UL << ((bitindex) & BT_ULMASK32)) -#endif - -/* - * These are public macros - * - * BT_BITOUL == n bits to n ulong_t's - */ -#define BT_BITOUL(nbits) \ - (((nbits) + BT_NBIPUL - 1l) / BT_NBIPUL) -#define BT_SIZEOFMAP(nbits) \ - (BT_BITOUL(nbits) * sizeof (ulong_t)) -#define BT_TEST(bitmap, bitindex) \ - ((BT_WIM((bitmap), (bitindex)) & BT_BIW(bitindex)) ? 1 : 0) -#define BT_SET(bitmap, bitindex) \ - { BT_WIM((bitmap), (bitindex)) |= BT_BIW(bitindex); } -#define BT_CLEAR(bitmap, bitindex) \ - { BT_WIM((bitmap), (bitindex)) &= ~BT_BIW(bitindex); } - -#ifdef _LP64 -#define BT_BITOUL32(nbits) \ - (((nbits) + BT_NBIPUL32 - 1l) / BT_NBIPUL32) -#define BT_SIZEOFMAP32(nbits) \ - (BT_BITOUL32(nbits) * sizeof (uint_t)) -#define BT_TEST32(bitmap, bitindex) \ - ((BT_WIM32((bitmap), (bitindex)) & BT_BIW32(bitindex)) ? 1 : 0) -#define BT_SET32(bitmap, bitindex) \ - { BT_WIM32((bitmap), (bitindex)) |= BT_BIW32(bitindex); } -#define BT_CLEAR32(bitmap, bitindex) \ - { BT_WIM32((bitmap), (bitindex)) &= ~BT_BIW32(bitindex); } -#endif /* _LP64 */ - - -/* - * BIT_ONLYONESET is a private macro not designed for bitmaps of - * arbitrary size. u must be an unsigned integer/long. It returns - * true if one and only one bit is set in u. - */ -#define BIT_ONLYONESET(u) \ - ((((u) == 0) ? 0 : ((u) & ((u) - 1)) == 0)) - -#ifndef _ASM - -/* - * return next available bit index from map with specified number of bits - */ -extern index_t bt_availbit(ulong_t *bitmap, size_t nbits); -/* - * find the highest order bit that is on, and is within or below - * the word specified by wx - */ -extern int bt_gethighbit(ulong_t *mapp, int wx); -extern int bt_range(ulong_t *bitmap, size_t *pos1, size_t *pos2, - size_t end_pos); -extern int bt_getlowbit(ulong_t *bitmap, size_t start, size_t stop); -extern void bt_copy(ulong_t *, ulong_t *, ulong_t); - -/* - * find the parity - */ -extern int odd_parity(ulong_t); - -/* - * Atomically set/clear bits - * Atomic exclusive operations will set "result" to "-1" - * if the bit is already set/cleared. "result" will be set - * to 0 otherwise. - */ -#define BT_ATOMIC_SET(bitmap, bitindex) \ - { atomic_or_ulong(&(BT_WIM(bitmap, bitindex)), BT_BIW(bitindex)); } -#define BT_ATOMIC_CLEAR(bitmap, bitindex) \ - { atomic_and_ulong(&(BT_WIM(bitmap, bitindex)), ~BT_BIW(bitindex)); } - -#define BT_ATOMIC_SET_EXCL(bitmap, bitindex, result) \ - { result = atomic_set_long_excl(&(BT_WIM(bitmap, bitindex)), \ - (bitindex) % BT_NBIPUL); } -#define BT_ATOMIC_CLEAR_EXCL(bitmap, bitindex, result) \ - { result = atomic_clear_long_excl(&(BT_WIM(bitmap, bitindex)), \ - (bitindex) % BT_NBIPUL); } - -/* - * Extracts bits between index h (high, inclusive) and l (low, exclusive) from - * u, which must be an unsigned integer. - */ -#define BITX(u, h, l) (((u) >> (l)) & ((1LU << ((h) - (l) + 1LU)) - 1LU)) - -#endif /* _ASM */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_BITMAP_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h deleted file mode 100644 index 5432f0c8d607..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/crypto/elfsign.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_CRYPTO_ELFSIGN_H -#define _SYS_CRYPTO_ELFSIGN_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Consolidation Private Interface for elfsign/libpkcs11/kcfd - */ - -#include <sys/zfs_context.h> - -/* - * Project Private structures and types used for communication between kcfd - * and KCF over the door. - */ - -typedef enum ELFsign_status_e { - ELFSIGN_UNKNOWN, - ELFSIGN_SUCCESS, - ELFSIGN_FAILED, - ELFSIGN_NOTSIGNED, - ELFSIGN_INVALID_CERTPATH, - ELFSIGN_INVALID_ELFOBJ, - ELFSIGN_RESTRICTED -} ELFsign_status_t; - -#define KCF_KCFD_VERSION1 1 -#define SIG_MAX_LENGTH 1024 - -#define ELF_SIGNATURE_SECTION ".SUNW_signature" - -typedef struct kcf_door_arg_s { - short da_version; - boolean_t da_iskernel; - - union { - char filename[MAXPATHLEN]; /* For request */ - - struct kcf_door_result_s { /* For response */ - ELFsign_status_t status; - uint32_t siglen; - uchar_t signature[1]; - } result; - } da_u; -} kcf_door_arg_t; - -typedef uint32_t filesig_vers_t; - -/* - * File Signature Structure - * Applicable to ELF and other file formats - */ -struct filesignatures { - uint32_t filesig_cnt; /* count of signatures */ - uint32_t filesig_pad; /* unused */ - union { - char filesig_data[1]; - struct filesig { /* one of these for each signature */ - uint32_t filesig_size; - filesig_vers_t filesig_version; - union { - struct filesig_version1 { - uint32_t filesig_v1_dnsize; - uint32_t filesig_v1_sigsize; - uint32_t filesig_v1_oidsize; - char filesig_v1_data[1]; - } filesig_v1; - struct filesig_version3 { - uint64_t filesig_v3_time; - uint32_t filesig_v3_dnsize; - uint32_t filesig_v3_sigsize; - uint32_t filesig_v3_oidsize; - char filesig_v3_data[1]; - } filesig_v3; - } _u2; - } filesig_sig; - uint64_t filesig_align; - } _u1; -}; -#define filesig_sig _u1.filesig_sig - -#define filesig_v1_dnsize _u2.filesig_v1.filesig_v1_dnsize -#define filesig_v1_sigsize _u2.filesig_v1.filesig_v1_sigsize -#define filesig_v1_oidsize _u2.filesig_v1.filesig_v1_oidsize -#define filesig_v1_data _u2.filesig_v1.filesig_v1_data - -#define filesig_v3_time _u2.filesig_v3.filesig_v3_time -#define filesig_v3_dnsize _u2.filesig_v3.filesig_v3_dnsize -#define filesig_v3_sigsize _u2.filesig_v3.filesig_v3_sigsize -#define filesig_v3_oidsize _u2.filesig_v3.filesig_v3_oidsize -#define filesig_v3_data _u2.filesig_v3.filesig_v3_data - -#define filesig_ALIGN(s) (((s) + sizeof (uint64_t) - 1) & \ - (-sizeof (uint64_t))) -#define filesig_next(ptr) (struct filesig *)((void *)((char *)(ptr) + \ - filesig_ALIGN((ptr)->filesig_size))) - -#define FILESIG_UNKNOWN 0 /* unrecognized version */ -#define FILESIG_VERSION1 1 /* version1, all but sig section */ -#define FILESIG_VERSION2 2 /* version1 format, SHF_ALLOC only */ -#define FILESIG_VERSION3 3 /* version3, all but sig section */ -#define FILESIG_VERSION4 4 /* version3 format, SHF_ALLOC only */ - -#define _PATH_KCFD_DOOR "/etc/svc/volatile/kcfd_door" - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_CRYPTO_ELFSIGN_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h index 0f37f3f63532..4d17221ea9a3 100644 --- a/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h +++ b/sys/contrib/openzfs/module/icp/include/sys/crypto/impl.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -34,80 +34,17 @@ #include <sys/crypto/common.h> #include <sys/crypto/api.h> #include <sys/crypto/spi.h> -#include <sys/crypto/ioctl.h> +#include <sys/avl.h> #ifdef __cplusplus extern "C" { #endif -#define KCF_MODULE "kcf" - /* * Prefixes convention: structures internal to the kernel cryptographic * framework start with 'kcf_'. Exposed structure start with 'crypto_'. */ -/* Provider stats. Not protected. */ -typedef struct kcf_prov_stats { - kstat_named_t ps_ops_total; - kstat_named_t ps_ops_passed; - kstat_named_t ps_ops_failed; - kstat_named_t ps_ops_busy_rval; -} kcf_prov_stats_t; - -/* Various kcf stats. Not protected. */ -typedef struct kcf_stats { - kstat_named_t ks_thrs_in_pool; - kstat_named_t ks_idle_thrs; - kstat_named_t ks_minthrs; - kstat_named_t ks_maxthrs; - kstat_named_t ks_swq_njobs; - kstat_named_t ks_swq_maxjobs; - kstat_named_t ks_taskq_threads; - kstat_named_t ks_taskq_minalloc; - kstat_named_t ks_taskq_maxalloc; -} kcf_stats_t; - -/* - * Keep all the information needed by the scheduler from - * this provider. - */ -typedef struct kcf_sched_info { - /* The number of operations dispatched. */ - uint64_t ks_ndispatches; - - /* The number of operations that failed. */ - uint64_t ks_nfails; - - /* The number of operations that returned CRYPTO_BUSY. */ - uint64_t ks_nbusy_rval; - - /* taskq used to dispatch crypto requests */ - taskq_t *ks_taskq; -} kcf_sched_info_t; - -/* - * pd_irefcnt approximates the number of inflight requests to the - * provider. Though we increment this counter during registration for - * other purposes, that base value is mostly same across all providers. - * So, it is a good measure of the load on a provider when it is not - * in a busy state. Once a provider notifies it is busy, requests - * backup in the taskq. So, we use tq_nalloc in that case which gives - * the number of task entries in the task queue. Note that we do not - * acquire any locks here as it is not critical to get the exact number - * and the lock contention may be too costly for this code path. - */ -#define KCF_PROV_LOAD(pd) ((pd)->pd_state != KCF_PROV_BUSY ? \ - (pd)->pd_irefcnt : (pd)->pd_sched_info.ks_taskq->tq_nalloc) - -#define KCF_PROV_INCRSTATS(pd, error) { \ - (pd)->pd_sched_info.ks_ndispatches++; \ - if (error == CRYPTO_BUSY) \ - (pd)->pd_sched_info.ks_nbusy_rval++; \ - else if (error != CRYPTO_SUCCESS && error != CRYPTO_QUEUED) \ - (pd)->pd_sched_info.ks_nfails++; \ -} - /* * The following two macros should be @@ -118,7 +55,7 @@ typedef struct kcf_sched_info { * When impl.h is broken up (bug# 4703218), this will be done. For now, * we hardcode these values. */ -#define KCF_OPS_CLASSSIZE 8 +#define KCF_OPS_CLASSSIZE 4 #define KCF_MAXMECHTAB 32 /* @@ -126,21 +63,17 @@ typedef struct kcf_sched_info { * the elements is important. * * Routines which get a provider or the list of providers - * should pick only those that are either in KCF_PROV_READY state - * or in KCF_PROV_BUSY state. + * should pick only those that are in KCF_PROV_READY state. */ typedef enum { KCF_PROV_ALLOCATED = 1, - KCF_PROV_UNVERIFIED, - KCF_PROV_VERIFICATION_FAILED, /* * state < KCF_PROV_READY means the provider can not * be used at all. */ KCF_PROV_READY, - KCF_PROV_BUSY, /* - * state > KCF_PROV_BUSY means the provider can not + * state > KCF_PROV_READY means the provider can not * be used for new requests. */ KCF_PROV_FAILED, @@ -153,112 +86,66 @@ typedef enum { KCF_PROV_FREED } kcf_prov_state_t; -#define KCF_IS_PROV_UNVERIFIED(pd) ((pd)->pd_state == KCF_PROV_UNVERIFIED) -#define KCF_IS_PROV_USABLE(pd) ((pd)->pd_state == KCF_PROV_READY || \ - (pd)->pd_state == KCF_PROV_BUSY) +#define KCF_IS_PROV_USABLE(pd) ((pd)->pd_state == KCF_PROV_READY) #define KCF_IS_PROV_REMOVED(pd) ((pd)->pd_state >= KCF_PROV_REMOVED) -/* Internal flags valid for pd_flags field */ -#define KCF_PROV_RESTRICTED 0x40000000 -#define KCF_LPROV_MEMBER 0x80000000 /* is member of a logical provider */ - /* * A provider descriptor structure. There is one such structure per * provider. It is allocated and initialized at registration time and * freed when the provider unregisters. * - * pd_prov_type: Provider type, hardware or software - * pd_sid: Session ID of the provider used by kernel clients. - * This is valid only for session-oriented providers. * pd_refcnt: Reference counter to this provider descriptor * pd_irefcnt: References held by the framework internal structs - * pd_lock: lock protects pd_state and pd_provider_list + * pd_lock: lock protects pd_state * pd_state: State value of the provider - * pd_provider_list: Used to cross-reference logical providers and their - * members. Not used for software providers. - * pd_resume_cv: cv to wait for state to change from KCF_PROV_BUSY - * pd_prov_handle: Provider handle specified by provider * pd_ops_vector: The ops vector specified by Provider * pd_mech_indx: Lookup table which maps a core framework mechanism * number to an index in pd_mechanisms array * pd_mechanisms: Array of mechanisms supported by the provider, specified * by the provider during registration - * pd_sched_info: Scheduling information associated with the provider * pd_mech_list_count: The number of entries in pi_mechanisms, specified * by the provider during registration - * pd_name: Device name or module name - * pd_instance: Device instance - * pd_module_id: Module ID returned by modload - * pd_mctlp: Pointer to modctl structure for this provider * pd_remove_cv: cv to wait on while the provider queue drains * pd_description: Provider description string - * pd_flags bitwise OR of pi_flags from crypto_provider_info_t - * and other internal flags defined above. - * pd_hash_limit Maximum data size that hash mechanisms of this provider - * can support. * pd_kcf_prov_handle: KCF-private handle assigned by KCF * pd_prov_id: Identification # assigned by KCF to provider - * pd_kstat: kstat associated with the provider - * pd_ks_data: kstat data */ typedef struct kcf_provider_desc { - crypto_provider_type_t pd_prov_type; - crypto_session_id_t pd_sid; uint_t pd_refcnt; uint_t pd_irefcnt; kmutex_t pd_lock; kcf_prov_state_t pd_state; - struct kcf_provider_list *pd_provider_list; - kcondvar_t pd_resume_cv; - crypto_provider_handle_t pd_prov_handle; - crypto_ops_t *pd_ops_vector; + const crypto_ops_t *pd_ops_vector; ushort_t pd_mech_indx[KCF_OPS_CLASSSIZE]\ [KCF_MAXMECHTAB]; - crypto_mech_info_t *pd_mechanisms; - kcf_sched_info_t pd_sched_info; + const crypto_mech_info_t *pd_mechanisms; uint_t pd_mech_list_count; - // char *pd_name; - // uint_t pd_instance; - // int pd_module_id; - // struct modctl *pd_mctlp; kcondvar_t pd_remove_cv; - char *pd_description; - uint_t pd_flags; - uint_t pd_hash_limit; + const char *pd_description; crypto_kcf_provider_handle_t pd_kcf_prov_handle; crypto_provider_id_t pd_prov_id; - kstat_t *pd_kstat; - kcf_prov_stats_t pd_ks_data; } kcf_provider_desc_t; -/* useful for making a list of providers */ -typedef struct kcf_provider_list { - struct kcf_provider_list *pl_next; - struct kcf_provider_desc *pl_provider; -} kcf_provider_list_t; - -/* atomic operations in linux implicitly form a memory barrier */ -#define membar_exit() - /* * If a component has a reference to a kcf_provider_desc_t, * it REFHOLD()s. A new provider descriptor which is referenced only * by the providers table has a reference counter of one. */ -#define KCF_PROV_REFHOLD(desc) { \ - atomic_add_32(&(desc)->pd_refcnt, 1); \ - ASSERT((desc)->pd_refcnt != 0); \ +#define KCF_PROV_REFHOLD(desc) { \ + int newval = atomic_add_32_nv(&(desc)->pd_refcnt, 1); \ + ASSERT(newval != 0); \ } -#define KCF_PROV_IREFHOLD(desc) { \ - atomic_add_32(&(desc)->pd_irefcnt, 1); \ - ASSERT((desc)->pd_irefcnt != 0); \ +#define KCF_PROV_IREFHOLD(desc) { \ + int newval = atomic_add_32_nv(&(desc)->pd_irefcnt, 1); \ + ASSERT(newval != 0); \ } #define KCF_PROV_IREFRELE(desc) { \ - ASSERT((desc)->pd_irefcnt != 0); \ - membar_exit(); \ - if (atomic_add_32_nv(&(desc)->pd_irefcnt, -1) == 0) { \ + membar_producer(); \ + int newval = atomic_add_32_nv(&(desc)->pd_irefcnt, -1); \ + ASSERT(newval != -1); \ + if (newval == 0) { \ cv_broadcast(&(desc)->pd_remove_cv); \ } \ } @@ -266,22 +153,15 @@ typedef struct kcf_provider_list { #define KCF_PROV_REFHELD(desc) ((desc)->pd_refcnt >= 1) #define KCF_PROV_REFRELE(desc) { \ - ASSERT((desc)->pd_refcnt != 0); \ - membar_exit(); \ - if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0) { \ + membar_producer(); \ + int newval = atomic_add_32_nv(&(desc)->pd_refcnt, -1); \ + ASSERT(newval != -1); \ + if (newval == 0) { \ kcf_provider_zero_refcnt((desc)); \ } \ } -/* list of crypto_mech_info_t valid as the second mech in a dual operation */ - -typedef struct crypto_mech_info_list { - struct crypto_mech_info_list *ml_next; - crypto_mech_type_t ml_kcf_mechid; /* KCF's id */ - crypto_mech_info_t ml_mech_info; -} crypto_mech_info_list_t; - /* * An element in a mechanism provider descriptors chain. * The kcf_prov_mech_desc_t is duplicated in every chain the provider belongs @@ -293,15 +173,9 @@ typedef struct kcf_prov_mech_desc { struct kcf_mech_entry *pm_me; /* Back to the head */ struct kcf_prov_mech_desc *pm_next; /* Next in the chain */ crypto_mech_info_t pm_mech_info; /* Provider mech info */ - crypto_mech_info_list_t *pm_mi_list; /* list for duals */ kcf_provider_desc_t *pm_prov_desc; /* Common desc. */ } kcf_prov_mech_desc_t; -/* and the notation shortcuts ... */ -#define pm_provider_type pm_prov_desc.pd_provider_type -#define pm_provider_handle pm_prov_desc.pd_provider_handle -#define pm_ops_vector pm_prov_desc.pd_ops_vector - /* * A mechanism entry in an xxx_mech_tab[]. me_pad was deemed * to be unnecessary and removed. @@ -309,55 +183,18 @@ typedef struct kcf_prov_mech_desc { typedef struct kcf_mech_entry { crypto_mech_name_t me_name; /* mechanism name */ crypto_mech_type_t me_mechid; /* Internal id for mechanism */ - kmutex_t me_mutex; /* access protection */ - kcf_prov_mech_desc_t *me_hw_prov_chain; /* list of HW providers */ - kcf_prov_mech_desc_t *me_sw_prov; /* SW provider */ - /* - * Number of HW providers in the chain. There is only one - * SW provider. So, we need only a count of HW providers. - */ - int me_num_hwprov; - /* - * When a SW provider is present, this is the generation number that - * ensures no objects from old SW providers are used in the new one - */ - uint32_t me_gen_swprov; - /* - * threshold for using hardware providers for this mech - */ - size_t me_threshold; + kcf_prov_mech_desc_t *me_sw_prov; /* provider */ + avl_node_t me_node; } kcf_mech_entry_t; /* - * A policy descriptor structure. It is allocated and initialized - * when administrative ioctls load disabled mechanisms. - * - * pd_prov_type: Provider type, hardware or software - * pd_name: Device name or module name. - * pd_instance: Device instance. - * pd_refcnt: Reference counter for this policy descriptor - * pd_mutex: Protects array and count of disabled mechanisms. - * pd_disabled_count: Count of disabled mechanisms. - * pd_disabled_mechs: Array of disabled mechanisms. - */ -typedef struct kcf_policy_desc { - crypto_provider_type_t pd_prov_type; - char *pd_name; - uint_t pd_instance; - uint_t pd_refcnt; - kmutex_t pd_mutex; - uint_t pd_disabled_count; - crypto_mech_name_t *pd_disabled_mechs; -} kcf_policy_desc_t; - -/* * If a component has a reference to a kcf_policy_desc_t, * it REFHOLD()s. A new policy descriptor which is referenced only * by the policy table has a reference count of one. */ -#define KCF_POLICY_REFHOLD(desc) { \ - atomic_add_32(&(desc)->pd_refcnt, 1); \ - ASSERT((desc)->pd_refcnt != 0); \ +#define KCF_POLICY_REFHOLD(desc) { \ + int newval = atomic_add_32_nv(&(desc)->pd_refcnt, 1); \ + ASSERT(newval != 0); \ } /* @@ -365,63 +202,36 @@ typedef struct kcf_policy_desc { * reference is released, the descriptor is freed. */ #define KCF_POLICY_REFRELE(desc) { \ - ASSERT((desc)->pd_refcnt != 0); \ - membar_exit(); \ - if (atomic_add_32_nv(&(desc)->pd_refcnt, -1) == 0) \ + membar_producer(); \ + int newval = atomic_add_32_nv(&(desc)->pd_refcnt, -1); \ + ASSERT(newval != -1); \ + if (newval == 0) \ kcf_policy_free_desc(desc); \ } /* - * This entry stores the name of a software module and its - * mechanisms. The mechanisms are 'hints' that are used to - * trigger loading of the module. - */ -typedef struct kcf_soft_conf_entry { - struct kcf_soft_conf_entry *ce_next; - char *ce_name; - crypto_mech_name_t *ce_mechs; - uint_t ce_count; -} kcf_soft_conf_entry_t; - -extern kmutex_t soft_config_mutex; -extern kcf_soft_conf_entry_t *soft_config_list; - -/* * Global tables. The sizes are from the predefined PKCS#11 v2.20 mechanisms, * with a margin of few extra empty entry points */ #define KCF_MAXDIGEST 16 /* Digests */ -#define KCF_MAXCIPHER 64 /* Ciphers */ +#define KCF_MAXCIPHER 32 /* Ciphers */ #define KCF_MAXMAC 40 /* Message authentication codes */ -#define KCF_MAXSIGN 24 /* Sign/Verify */ -#define KCF_MAXKEYOPS 116 /* Key generation and derivation */ -#define KCF_MAXMISC 16 /* Others ... */ - -#define KCF_MAXMECHS KCF_MAXDIGEST + KCF_MAXCIPHER + KCF_MAXMAC + \ - KCF_MAXSIGN + KCF_MAXKEYOPS + \ - KCF_MAXMISC -extern kcf_mech_entry_t kcf_digest_mechs_tab[]; -extern kcf_mech_entry_t kcf_cipher_mechs_tab[]; -extern kcf_mech_entry_t kcf_mac_mechs_tab[]; -extern kcf_mech_entry_t kcf_sign_mechs_tab[]; -extern kcf_mech_entry_t kcf_keyops_mechs_tab[]; -extern kcf_mech_entry_t kcf_misc_mechs_tab[]; - -extern kmutex_t kcf_mech_tabs_lock; +_Static_assert(KCF_MAXCIPHER == KCF_MAXMECHTAB, + "KCF_MAXCIPHER != KCF_MAXMECHTAB"); /* See KCF_MAXMECHTAB comment */ typedef enum { KCF_DIGEST_CLASS = 1, KCF_CIPHER_CLASS, KCF_MAC_CLASS, - KCF_SIGN_CLASS, - KCF_KEYOPS_CLASS, - KCF_MISC_CLASS } kcf_ops_class_t; #define KCF_FIRST_OPSCLASS KCF_DIGEST_CLASS -#define KCF_LAST_OPSCLASS KCF_MISC_CLASS +#define KCF_LAST_OPSCLASS KCF_MAC_CLASS +_Static_assert( + KCF_OPS_CLASSSIZE == (KCF_LAST_OPSCLASS - KCF_FIRST_OPSCLASS + 2), + "KCF_OPS_CLASSSIZE doesn't match kcf_ops_class_t!"); /* The table of all the kcf_xxx_mech_tab[]s, indexed by kcf_ops_class */ @@ -430,14 +240,14 @@ typedef struct kcf_mech_entry_tab { kcf_mech_entry_t *met_tab; /* the table */ } kcf_mech_entry_tab_t; -extern kcf_mech_entry_tab_t kcf_mech_tabs_tab[]; +extern const kcf_mech_entry_tab_t kcf_mech_tabs_tab[]; #define KCF_MECHID(class, index) \ (((crypto_mech_type_t)(class) << 32) | (crypto_mech_type_t)(index)) #define KCF_MECH2CLASS(mech_type) ((kcf_ops_class_t)((mech_type) >> 32)) -#define KCF_MECH2INDEX(mech_type) ((int)(mech_type)) +#define KCF_MECH2INDEX(mech_type) ((int)((mech_type) & 0xFFFFFFFF)) #define KCF_TO_PROV_MECH_INDX(pd, mech_type) \ ((pd)->pd_mech_indx[KCF_MECH2CLASS(mech_type)] \ @@ -449,58 +259,6 @@ extern kcf_mech_entry_tab_t kcf_mech_tabs_tab[]; #define KCF_TO_PROV_MECHNUM(pd, mech_type) \ (KCF_TO_PROV_MECHINFO(pd, mech_type).cm_mech_number) -#define KCF_CAN_SHARE_OPSTATE(pd, mech_type) \ - ((KCF_TO_PROV_MECHINFO(pd, mech_type).cm_mech_flags) & \ - CRYPTO_CAN_SHARE_OPSTATE) - -/* ps_refcnt is protected by cm_lock in the crypto_minor structure */ -typedef struct crypto_provider_session { - struct crypto_provider_session *ps_next; - crypto_session_id_t ps_session; - kcf_provider_desc_t *ps_provider; - kcf_provider_desc_t *ps_real_provider; - uint_t ps_refcnt; -} crypto_provider_session_t; - -typedef struct crypto_session_data { - kmutex_t sd_lock; - kcondvar_t sd_cv; - uint32_t sd_flags; - int sd_pre_approved_amount; - crypto_ctx_t *sd_digest_ctx; - crypto_ctx_t *sd_encr_ctx; - crypto_ctx_t *sd_decr_ctx; - crypto_ctx_t *sd_sign_ctx; - crypto_ctx_t *sd_verify_ctx; - crypto_ctx_t *sd_sign_recover_ctx; - crypto_ctx_t *sd_verify_recover_ctx; - kcf_provider_desc_t *sd_provider; - void *sd_find_init_cookie; - crypto_provider_session_t *sd_provider_session; -} crypto_session_data_t; - -#define CRYPTO_SESSION_IN_USE 0x00000001 -#define CRYPTO_SESSION_IS_BUSY 0x00000002 -#define CRYPTO_SESSION_IS_CLOSED 0x00000004 - -#define KCF_MAX_PIN_LEN 1024 - -/* - * Per-minor info. - * - * cm_lock protects everything in this structure except for cm_refcnt. - */ -typedef struct crypto_minor { - uint_t cm_refcnt; - kmutex_t cm_lock; - kcondvar_t cm_cv; - crypto_session_data_t **cm_session_table; - uint_t cm_session_table_count; - kcf_provider_desc_t **cm_provider_array; - uint_t cm_provider_count; - crypto_provider_session_t *cm_provider_session; -} crypto_minor_t; - /* * Return codes for internal functions */ @@ -512,849 +270,118 @@ typedef struct crypto_minor { #define KCF_INVALID_INDX ((ushort_t)-1) /* - * kCF internal mechanism and function group for tracking RNG providers. - */ -#define SUN_RANDOM "random" -#define CRYPTO_FG_RANDOM 0x80000000 /* generate_random() */ - -/* * Wrappers for ops vectors. In the wrapper definitions below, the pd * argument always corresponds to a pointer to a provider descriptor * of type kcf_prov_desc_t. */ -#define KCF_PROV_CONTROL_OPS(pd) ((pd)->pd_ops_vector->co_control_ops) -#define KCF_PROV_CTX_OPS(pd) ((pd)->pd_ops_vector->co_ctx_ops) #define KCF_PROV_DIGEST_OPS(pd) ((pd)->pd_ops_vector->co_digest_ops) #define KCF_PROV_CIPHER_OPS(pd) ((pd)->pd_ops_vector->co_cipher_ops) #define KCF_PROV_MAC_OPS(pd) ((pd)->pd_ops_vector->co_mac_ops) -#define KCF_PROV_SIGN_OPS(pd) ((pd)->pd_ops_vector->co_sign_ops) -#define KCF_PROV_VERIFY_OPS(pd) ((pd)->pd_ops_vector->co_verify_ops) -#define KCF_PROV_DUAL_OPS(pd) ((pd)->pd_ops_vector->co_dual_ops) -#define KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) \ - ((pd)->pd_ops_vector->co_dual_cipher_mac_ops) -#define KCF_PROV_RANDOM_OPS(pd) ((pd)->pd_ops_vector->co_random_ops) -#define KCF_PROV_SESSION_OPS(pd) ((pd)->pd_ops_vector->co_session_ops) -#define KCF_PROV_OBJECT_OPS(pd) ((pd)->pd_ops_vector->co_object_ops) -#define KCF_PROV_KEY_OPS(pd) ((pd)->pd_ops_vector->co_key_ops) -#define KCF_PROV_PROVIDER_OPS(pd) ((pd)->pd_ops_vector->co_provider_ops) -#define KCF_PROV_MECH_OPS(pd) ((pd)->pd_ops_vector->co_mech_ops) -#define KCF_PROV_NOSTORE_KEY_OPS(pd) \ - ((pd)->pd_ops_vector->co_nostore_key_ops) - -/* - * Wrappers for crypto_control_ops(9S) entry points. - */ - -#define KCF_PROV_STATUS(pd, status) ( \ - (KCF_PROV_CONTROL_OPS(pd) && \ - KCF_PROV_CONTROL_OPS(pd)->provider_status) ? \ - KCF_PROV_CONTROL_OPS(pd)->provider_status( \ - (pd)->pd_prov_handle, status) : \ - CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_ctx_ops(9S) entry points. - */ - -#define KCF_PROV_CREATE_CTX_TEMPLATE(pd, mech, key, template, size, req) ( \ - (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->create_ctx_template) ? \ - KCF_PROV_CTX_OPS(pd)->create_ctx_template( \ - (pd)->pd_prov_handle, mech, key, template, size, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_FREE_CONTEXT(pd, ctx) ( \ - (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->free_context) ? \ - KCF_PROV_CTX_OPS(pd)->free_context(ctx) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_COPYIN_MECH(pd, umech, kmech, errorp, mode) ( \ - (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->copyin_mechanism) ? \ - KCF_PROV_MECH_OPS(pd)->copyin_mechanism( \ - (pd)->pd_prov_handle, umech, kmech, errorp, mode) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_COPYOUT_MECH(pd, kmech, umech, errorp, mode) ( \ - (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->copyout_mechanism) ? \ - KCF_PROV_MECH_OPS(pd)->copyout_mechanism( \ - (pd)->pd_prov_handle, kmech, umech, errorp, mode) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_FREE_MECH(pd, prov_mech) ( \ - (KCF_PROV_MECH_OPS(pd) && KCF_PROV_MECH_OPS(pd)->free_mechanism) ? \ - KCF_PROV_MECH_OPS(pd)->free_mechanism( \ - (pd)->pd_prov_handle, prov_mech) : CRYPTO_NOT_SUPPORTED) +#define KCF_PROV_CTX_OPS(pd) ((pd)->pd_ops_vector->co_ctx_ops) /* * Wrappers for crypto_digest_ops(9S) entry points. */ -#define KCF_PROV_DIGEST_INIT(pd, ctx, mech, req) ( \ +#define KCF_PROV_DIGEST_INIT(pd, ctx, mech) ( \ (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_init) ? \ - KCF_PROV_DIGEST_OPS(pd)->digest_init(ctx, mech, req) : \ - CRYPTO_NOT_SUPPORTED) - -/* - * The _ (underscore) in _digest is needed to avoid replacing the - * function digest(). - */ -#define KCF_PROV_DIGEST(pd, ctx, data, _digest, req) ( \ - (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest) ? \ - KCF_PROV_DIGEST_OPS(pd)->digest(ctx, data, _digest, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DIGEST_UPDATE(pd, ctx, data, req) ( \ - (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_update) ? \ - KCF_PROV_DIGEST_OPS(pd)->digest_update(ctx, data, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DIGEST_KEY(pd, ctx, key, req) ( \ - (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_key) ? \ - KCF_PROV_DIGEST_OPS(pd)->digest_key(ctx, key, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DIGEST_FINAL(pd, ctx, digest, req) ( \ - (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_final) ? \ - KCF_PROV_DIGEST_OPS(pd)->digest_final(ctx, digest, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DIGEST_ATOMIC(pd, session, mech, data, digest, req) ( \ - (KCF_PROV_DIGEST_OPS(pd) && KCF_PROV_DIGEST_OPS(pd)->digest_atomic) ? \ - KCF_PROV_DIGEST_OPS(pd)->digest_atomic( \ - (pd)->pd_prov_handle, session, mech, data, digest, req) : \ + KCF_PROV_DIGEST_OPS(pd)->digest_init(ctx, mech) : \ CRYPTO_NOT_SUPPORTED) /* * Wrappers for crypto_cipher_ops(9S) entry points. */ -#define KCF_PROV_ENCRYPT_INIT(pd, ctx, mech, key, template, req) ( \ +#define KCF_PROV_ENCRYPT_INIT(pd, ctx, mech, key, template) ( \ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_init) ? \ - KCF_PROV_CIPHER_OPS(pd)->encrypt_init(ctx, mech, key, template, \ - req) : \ + KCF_PROV_CIPHER_OPS(pd)->encrypt_init(ctx, mech, key, template) : \ CRYPTO_NOT_SUPPORTED) -#define KCF_PROV_ENCRYPT(pd, ctx, plaintext, ciphertext, req) ( \ - (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt) ? \ - KCF_PROV_CIPHER_OPS(pd)->encrypt(ctx, plaintext, ciphertext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_ENCRYPT_UPDATE(pd, ctx, plaintext, ciphertext, req) ( \ - (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_update) ? \ - KCF_PROV_CIPHER_OPS(pd)->encrypt_update(ctx, plaintext, \ - ciphertext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_ENCRYPT_FINAL(pd, ctx, ciphertext, req) ( \ - (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_final) ? \ - KCF_PROV_CIPHER_OPS(pd)->encrypt_final(ctx, ciphertext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_ENCRYPT_ATOMIC(pd, session, mech, key, plaintext, ciphertext, \ - template, req) ( \ +#define KCF_PROV_ENCRYPT_ATOMIC(pd, mech, key, plaintext, ciphertext, \ + template) ( \ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->encrypt_atomic) ? \ KCF_PROV_CIPHER_OPS(pd)->encrypt_atomic( \ - (pd)->pd_prov_handle, session, mech, key, plaintext, ciphertext, \ - template, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DECRYPT_INIT(pd, ctx, mech, key, template, req) ( \ - (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_init) ? \ - KCF_PROV_CIPHER_OPS(pd)->decrypt_init(ctx, mech, key, template, \ - req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DECRYPT(pd, ctx, ciphertext, plaintext, req) ( \ - (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt) ? \ - KCF_PROV_CIPHER_OPS(pd)->decrypt(ctx, ciphertext, plaintext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DECRYPT_UPDATE(pd, ctx, ciphertext, plaintext, req) ( \ - (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_update) ? \ - KCF_PROV_CIPHER_OPS(pd)->decrypt_update(ctx, ciphertext, \ - plaintext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DECRYPT_FINAL(pd, ctx, plaintext, req) ( \ - (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_final) ? \ - KCF_PROV_CIPHER_OPS(pd)->decrypt_final(ctx, plaintext, req) : \ + mech, key, plaintext, ciphertext, template) : \ CRYPTO_NOT_SUPPORTED) -#define KCF_PROV_DECRYPT_ATOMIC(pd, session, mech, key, ciphertext, plaintext, \ - template, req) ( \ +#define KCF_PROV_DECRYPT_ATOMIC(pd, mech, key, ciphertext, plaintext, \ + template) ( \ (KCF_PROV_CIPHER_OPS(pd) && KCF_PROV_CIPHER_OPS(pd)->decrypt_atomic) ? \ KCF_PROV_CIPHER_OPS(pd)->decrypt_atomic( \ - (pd)->pd_prov_handle, session, mech, key, ciphertext, plaintext, \ - template, req) : \ + mech, key, ciphertext, plaintext, template) : \ CRYPTO_NOT_SUPPORTED) /* * Wrappers for crypto_mac_ops(9S) entry points. */ -#define KCF_PROV_MAC_INIT(pd, ctx, mech, key, template, req) ( \ +#define KCF_PROV_MAC_INIT(pd, ctx, mech, key, template) ( \ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_init) ? \ - KCF_PROV_MAC_OPS(pd)->mac_init(ctx, mech, key, template, req) \ + KCF_PROV_MAC_OPS(pd)->mac_init(ctx, mech, key, template) \ : CRYPTO_NOT_SUPPORTED) /* * The _ (underscore) in _mac is needed to avoid replacing the * function mac(). */ -#define KCF_PROV_MAC(pd, ctx, data, _mac, req) ( \ - (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac) ? \ - KCF_PROV_MAC_OPS(pd)->mac(ctx, data, _mac, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_UPDATE(pd, ctx, data, req) ( \ +#define KCF_PROV_MAC_UPDATE(pd, ctx, data) ( \ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_update) ? \ - KCF_PROV_MAC_OPS(pd)->mac_update(ctx, data, req) : \ + KCF_PROV_MAC_OPS(pd)->mac_update(ctx, data) : \ CRYPTO_NOT_SUPPORTED) -#define KCF_PROV_MAC_FINAL(pd, ctx, mac, req) ( \ +#define KCF_PROV_MAC_FINAL(pd, ctx, mac) ( \ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_final) ? \ - KCF_PROV_MAC_OPS(pd)->mac_final(ctx, mac, req) : \ + KCF_PROV_MAC_OPS(pd)->mac_final(ctx, mac) : \ CRYPTO_NOT_SUPPORTED) -#define KCF_PROV_MAC_ATOMIC(pd, session, mech, key, data, mac, template, \ - req) ( \ +#define KCF_PROV_MAC_ATOMIC(pd, mech, key, data, mac, template) ( \ (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_atomic) ? \ KCF_PROV_MAC_OPS(pd)->mac_atomic( \ - (pd)->pd_prov_handle, session, mech, key, data, mac, template, \ - req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_VERIFY_ATOMIC(pd, session, mech, key, data, mac, \ - template, req) ( \ - (KCF_PROV_MAC_OPS(pd) && KCF_PROV_MAC_OPS(pd)->mac_verify_atomic) ? \ - KCF_PROV_MAC_OPS(pd)->mac_verify_atomic( \ - (pd)->pd_prov_handle, session, mech, key, data, mac, template, \ - req) : \ - CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_sign_ops(9S) entry points. - */ - -#define KCF_PROV_SIGN_INIT(pd, ctx, mech, key, template, req) ( \ - (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_init) ? \ - KCF_PROV_SIGN_OPS(pd)->sign_init( \ - ctx, mech, key, template, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SIGN(pd, ctx, data, sig, req) ( \ - (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign) ? \ - KCF_PROV_SIGN_OPS(pd)->sign(ctx, data, sig, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SIGN_UPDATE(pd, ctx, data, req) ( \ - (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_update) ? \ - KCF_PROV_SIGN_OPS(pd)->sign_update(ctx, data, req) : \ + mech, key, data, mac, template) : \ CRYPTO_NOT_SUPPORTED) -#define KCF_PROV_SIGN_FINAL(pd, ctx, sig, req) ( \ - (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_final) ? \ - KCF_PROV_SIGN_OPS(pd)->sign_final(ctx, sig, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SIGN_ATOMIC(pd, session, mech, key, data, template, \ - sig, req) ( \ - (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_atomic) ? \ - KCF_PROV_SIGN_OPS(pd)->sign_atomic( \ - (pd)->pd_prov_handle, session, mech, key, data, sig, template, \ - req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SIGN_RECOVER_INIT(pd, ctx, mech, key, template, \ - req) ( \ - (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_recover_init) ? \ - KCF_PROV_SIGN_OPS(pd)->sign_recover_init(ctx, mech, key, template, \ - req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SIGN_RECOVER(pd, ctx, data, sig, req) ( \ - (KCF_PROV_SIGN_OPS(pd) && KCF_PROV_SIGN_OPS(pd)->sign_recover) ? \ - KCF_PROV_SIGN_OPS(pd)->sign_recover(ctx, data, sig, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SIGN_RECOVER_ATOMIC(pd, session, mech, key, data, template, \ - sig, req) ( \ - (KCF_PROV_SIGN_OPS(pd) && \ - KCF_PROV_SIGN_OPS(pd)->sign_recover_atomic) ? \ - KCF_PROV_SIGN_OPS(pd)->sign_recover_atomic( \ - (pd)->pd_prov_handle, session, mech, key, data, sig, template, \ - req) : CRYPTO_NOT_SUPPORTED) - /* - * Wrappers for crypto_verify_ops(9S) entry points. - */ - -#define KCF_PROV_VERIFY_INIT(pd, ctx, mech, key, template, req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_init) ? \ - KCF_PROV_VERIFY_OPS(pd)->verify_init(ctx, mech, key, template, \ - req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_VERIFY(pd, ctx, data, sig, req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->do_verify) ? \ - KCF_PROV_VERIFY_OPS(pd)->do_verify(ctx, data, sig, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_VERIFY_UPDATE(pd, ctx, data, req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_update) ? \ - KCF_PROV_VERIFY_OPS(pd)->verify_update(ctx, data, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_VERIFY_FINAL(pd, ctx, sig, req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_final) ? \ - KCF_PROV_VERIFY_OPS(pd)->verify_final(ctx, sig, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_VERIFY_ATOMIC(pd, session, mech, key, data, template, sig, \ - req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_atomic) ? \ - KCF_PROV_VERIFY_OPS(pd)->verify_atomic( \ - (pd)->pd_prov_handle, session, mech, key, data, sig, template, \ - req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_VERIFY_RECOVER_INIT(pd, ctx, mech, key, template, \ - req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && \ - KCF_PROV_VERIFY_OPS(pd)->verify_recover_init) ? \ - KCF_PROV_VERIFY_OPS(pd)->verify_recover_init(ctx, mech, key, \ - template, req) : CRYPTO_NOT_SUPPORTED) - -/* verify_recover() CSPI routine has different argument order than verify() */ -#define KCF_PROV_VERIFY_RECOVER(pd, ctx, sig, data, req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && KCF_PROV_VERIFY_OPS(pd)->verify_recover) ? \ - KCF_PROV_VERIFY_OPS(pd)->verify_recover(ctx, sig, data, req) : \ - CRYPTO_NOT_SUPPORTED) - -/* - * verify_recover_atomic() CSPI routine has different argument order - * than verify_atomic(). - */ -#define KCF_PROV_VERIFY_RECOVER_ATOMIC(pd, session, mech, key, sig, \ - template, data, req) ( \ - (KCF_PROV_VERIFY_OPS(pd) && \ - KCF_PROV_VERIFY_OPS(pd)->verify_recover_atomic) ? \ - KCF_PROV_VERIFY_OPS(pd)->verify_recover_atomic( \ - (pd)->pd_prov_handle, session, mech, key, sig, data, template, \ - req) : CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_dual_ops(9S) entry points. - */ - -#define KCF_PROV_DIGEST_ENCRYPT_UPDATE(digest_ctx, encrypt_ctx, plaintext, \ - ciphertext, req) ( \ - (KCF_PROV_DUAL_OPS(pd) && \ - KCF_PROV_DUAL_OPS(pd)->digest_encrypt_update) ? \ - KCF_PROV_DUAL_OPS(pd)->digest_encrypt_update( \ - digest_ctx, encrypt_ctx, plaintext, ciphertext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DECRYPT_DIGEST_UPDATE(decrypt_ctx, digest_ctx, ciphertext, \ - plaintext, req) ( \ - (KCF_PROV_DUAL_OPS(pd) && \ - KCF_PROV_DUAL_OPS(pd)->decrypt_digest_update) ? \ - KCF_PROV_DUAL_OPS(pd)->decrypt_digest_update( \ - decrypt_ctx, digest_ctx, ciphertext, plaintext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SIGN_ENCRYPT_UPDATE(sign_ctx, encrypt_ctx, plaintext, \ - ciphertext, req) ( \ - (KCF_PROV_DUAL_OPS(pd) && \ - KCF_PROV_DUAL_OPS(pd)->sign_encrypt_update) ? \ - KCF_PROV_DUAL_OPS(pd)->sign_encrypt_update( \ - sign_ctx, encrypt_ctx, plaintext, ciphertext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_DECRYPT_VERIFY_UPDATE(decrypt_ctx, verify_ctx, ciphertext, \ - plaintext, req) ( \ - (KCF_PROV_DUAL_OPS(pd) && \ - KCF_PROV_DUAL_OPS(pd)->decrypt_verify_update) ? \ - KCF_PROV_DUAL_OPS(pd)->decrypt_verify_update( \ - decrypt_ctx, verify_ctx, ciphertext, plaintext, req) : \ - CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_dual_cipher_mac_ops(9S) entry points. - */ - -#define KCF_PROV_ENCRYPT_MAC_INIT(pd, ctx, encr_mech, encr_key, mac_mech, \ - mac_key, encr_ctx_template, mac_ctx_template, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_init) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_init( \ - ctx, encr_mech, encr_key, mac_mech, mac_key, encr_ctx_template, \ - mac_ctx_template, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_ENCRYPT_MAC(pd, ctx, plaintext, ciphertext, mac, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac( \ - ctx, plaintext, ciphertext, mac, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_ENCRYPT_MAC_UPDATE(pd, ctx, plaintext, ciphertext, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_update) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_update( \ - ctx, plaintext, ciphertext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_ENCRYPT_MAC_FINAL(pd, ctx, ciphertext, mac, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_final) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_final( \ - ctx, ciphertext, mac, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_ENCRYPT_MAC_ATOMIC(pd, session, encr_mech, encr_key, \ - mac_mech, mac_key, plaintext, ciphertext, mac, \ - encr_ctx_template, mac_ctx_template, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_atomic) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->encrypt_mac_atomic( \ - (pd)->pd_prov_handle, session, encr_mech, encr_key, \ - mac_mech, mac_key, plaintext, ciphertext, mac, \ - encr_ctx_template, mac_ctx_template, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_DECRYPT_INIT(pd, ctx, mac_mech, mac_key, decr_mech, \ - decr_key, mac_ctx_template, decr_ctx_template, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_init) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_init( \ - ctx, mac_mech, mac_key, decr_mech, decr_key, mac_ctx_template, \ - decr_ctx_template, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_DECRYPT(pd, ctx, ciphertext, mac, plaintext, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt( \ - ctx, ciphertext, mac, plaintext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_DECRYPT_UPDATE(pd, ctx, ciphertext, plaintext, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_update) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_update( \ - ctx, ciphertext, plaintext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_DECRYPT_FINAL(pd, ctx, mac, plaintext, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_final) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_final( \ - ctx, mac, plaintext, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_DECRYPT_ATOMIC(pd, session, mac_mech, mac_key, \ - decr_mech, decr_key, ciphertext, mac, plaintext, \ - mac_ctx_template, decr_ctx_template, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_atomic) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_decrypt_atomic( \ - (pd)->pd_prov_handle, session, mac_mech, mac_key, \ - decr_mech, decr_key, ciphertext, mac, plaintext, \ - mac_ctx_template, decr_ctx_template, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_MAC_VERIFY_DECRYPT_ATOMIC(pd, session, mac_mech, mac_key, \ - decr_mech, decr_key, ciphertext, mac, plaintext, \ - mac_ctx_template, decr_ctx_template, req) ( \ - (KCF_PROV_DUAL_CIPHER_MAC_OPS(pd) && \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_verify_decrypt_atomic \ - != NULL) ? \ - KCF_PROV_DUAL_CIPHER_MAC_OPS(pd)->mac_verify_decrypt_atomic( \ - (pd)->pd_prov_handle, session, mac_mech, mac_key, \ - decr_mech, decr_key, ciphertext, mac, plaintext, \ - mac_ctx_template, decr_ctx_template, req) : \ - CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_random_number_ops(9S) entry points. - */ - -#define KCF_PROV_SEED_RANDOM(pd, session, buf, len, est, flags, req) ( \ - (KCF_PROV_RANDOM_OPS(pd) && KCF_PROV_RANDOM_OPS(pd)->seed_random) ? \ - KCF_PROV_RANDOM_OPS(pd)->seed_random((pd)->pd_prov_handle, \ - session, buf, len, est, flags, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_GENERATE_RANDOM(pd, session, buf, len, req) ( \ - (KCF_PROV_RANDOM_OPS(pd) && \ - KCF_PROV_RANDOM_OPS(pd)->generate_random) ? \ - KCF_PROV_RANDOM_OPS(pd)->generate_random((pd)->pd_prov_handle, \ - session, buf, len, req) : CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_session_ops(9S) entry points. - * - * ops_pd is the provider descriptor that supplies the ops_vector. - * pd is the descriptor that supplies the provider handle. - * Only session open/close needs two handles. - */ - -#define KCF_PROV_SESSION_OPEN(ops_pd, session, req, pd) ( \ - (KCF_PROV_SESSION_OPS(ops_pd) && \ - KCF_PROV_SESSION_OPS(ops_pd)->session_open) ? \ - KCF_PROV_SESSION_OPS(ops_pd)->session_open((pd)->pd_prov_handle, \ - session, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SESSION_CLOSE(ops_pd, session, req, pd) ( \ - (KCF_PROV_SESSION_OPS(ops_pd) && \ - KCF_PROV_SESSION_OPS(ops_pd)->session_close) ? \ - KCF_PROV_SESSION_OPS(ops_pd)->session_close((pd)->pd_prov_handle, \ - session, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SESSION_LOGIN(pd, session, user_type, pin, len, req) ( \ - (KCF_PROV_SESSION_OPS(pd) && \ - KCF_PROV_SESSION_OPS(pd)->session_login) ? \ - KCF_PROV_SESSION_OPS(pd)->session_login((pd)->pd_prov_handle, \ - session, user_type, pin, len, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SESSION_LOGOUT(pd, session, req) ( \ - (KCF_PROV_SESSION_OPS(pd) && \ - KCF_PROV_SESSION_OPS(pd)->session_logout) ? \ - KCF_PROV_SESSION_OPS(pd)->session_logout((pd)->pd_prov_handle, \ - session, req) : CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_object_ops(9S) entry points. - */ - -#define KCF_PROV_OBJECT_CREATE(pd, session, template, count, object, req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_create) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_create((pd)->pd_prov_handle, \ - session, template, count, object, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_COPY(pd, session, object, template, count, \ - new_object, req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_copy) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_copy((pd)->pd_prov_handle, \ - session, object, template, count, new_object, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_DESTROY(pd, session, object, req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_destroy) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_destroy((pd)->pd_prov_handle, \ - session, object, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_GET_SIZE(pd, session, object, size, req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && \ - KCF_PROV_OBJECT_OPS(pd)->object_get_size) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_get_size((pd)->pd_prov_handle, \ - session, object, size, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_GET_ATTRIBUTE_VALUE(pd, session, object, template, \ - count, req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && \ - KCF_PROV_OBJECT_OPS(pd)->object_get_attribute_value) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_get_attribute_value( \ - (pd)->pd_prov_handle, session, object, template, count, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_SET_ATTRIBUTE_VALUE(pd, session, object, template, \ - count, req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && \ - KCF_PROV_OBJECT_OPS(pd)->object_set_attribute_value) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_set_attribute_value( \ - (pd)->pd_prov_handle, session, object, template, count, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_FIND_INIT(pd, session, template, count, ppriv, \ - req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && \ - KCF_PROV_OBJECT_OPS(pd)->object_find_init) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_find_init((pd)->pd_prov_handle, \ - session, template, count, ppriv, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_FIND(pd, ppriv, objects, max_objects, object_count, \ - req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && KCF_PROV_OBJECT_OPS(pd)->object_find) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_find( \ - (pd)->pd_prov_handle, ppriv, objects, max_objects, object_count, \ - req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_OBJECT_FIND_FINAL(pd, ppriv, req) ( \ - (KCF_PROV_OBJECT_OPS(pd) && \ - KCF_PROV_OBJECT_OPS(pd)->object_find_final) ? \ - KCF_PROV_OBJECT_OPS(pd)->object_find_final( \ - (pd)->pd_prov_handle, ppriv, req) : CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_key_ops(9S) entry points. + * Wrappers for crypto_ctx_ops(9S) entry points. */ -#define KCF_PROV_KEY_GENERATE(pd, session, mech, template, count, object, \ - req) ( \ - (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_generate) ? \ - KCF_PROV_KEY_OPS(pd)->key_generate((pd)->pd_prov_handle, \ - session, mech, template, count, object, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_KEY_GENERATE_PAIR(pd, session, mech, pub_template, \ - pub_count, priv_template, priv_count, pub_key, priv_key, req) ( \ - (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_generate_pair) ? \ - KCF_PROV_KEY_OPS(pd)->key_generate_pair((pd)->pd_prov_handle, \ - session, mech, pub_template, pub_count, priv_template, \ - priv_count, pub_key, priv_key, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_KEY_WRAP(pd, session, mech, wrapping_key, key, wrapped_key, \ - wrapped_key_len, req) ( \ - (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_wrap) ? \ - KCF_PROV_KEY_OPS(pd)->key_wrap((pd)->pd_prov_handle, \ - session, mech, wrapping_key, key, wrapped_key, wrapped_key_len, \ - req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_KEY_UNWRAP(pd, session, mech, unwrapping_key, wrapped_key, \ - wrapped_key_len, template, count, key, req) ( \ - (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_unwrap) ? \ - KCF_PROV_KEY_OPS(pd)->key_unwrap((pd)->pd_prov_handle, \ - session, mech, unwrapping_key, wrapped_key, wrapped_key_len, \ - template, count, key, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_KEY_DERIVE(pd, session, mech, base_key, template, count, \ - key, req) ( \ - (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_derive) ? \ - KCF_PROV_KEY_OPS(pd)->key_derive((pd)->pd_prov_handle, \ - session, mech, base_key, template, count, key, req) : \ - CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_KEY_CHECK(pd, mech, key) ( \ - (KCF_PROV_KEY_OPS(pd) && KCF_PROV_KEY_OPS(pd)->key_check) ? \ - KCF_PROV_KEY_OPS(pd)->key_check((pd)->pd_prov_handle, mech, key) : \ +#define KCF_PROV_CREATE_CTX_TEMPLATE(pd, mech, key, template, size) ( \ + (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->create_ctx_template) ? \ + KCF_PROV_CTX_OPS(pd)->create_ctx_template( \ + mech, key, template, size) : \ CRYPTO_NOT_SUPPORTED) -/* - * Wrappers for crypto_provider_management_ops(9S) entry points. - * - * ops_pd is the provider descriptor that supplies the ops_vector. - * pd is the descriptor that supplies the provider handle. - * Only ext_info needs two handles. - */ - -#define KCF_PROV_EXT_INFO(ops_pd, provext_info, req, pd) ( \ - (KCF_PROV_PROVIDER_OPS(ops_pd) && \ - KCF_PROV_PROVIDER_OPS(ops_pd)->ext_info) ? \ - KCF_PROV_PROVIDER_OPS(ops_pd)->ext_info((pd)->pd_prov_handle, \ - provext_info, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_INIT_TOKEN(pd, pin, pin_len, label, req) ( \ - (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->init_token) ? \ - KCF_PROV_PROVIDER_OPS(pd)->init_token((pd)->pd_prov_handle, \ - pin, pin_len, label, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_INIT_PIN(pd, session, pin, pin_len, req) ( \ - (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->init_pin) ? \ - KCF_PROV_PROVIDER_OPS(pd)->init_pin((pd)->pd_prov_handle, \ - session, pin, pin_len, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_SET_PIN(pd, session, old_pin, old_len, new_pin, new_len, \ - req) ( \ - (KCF_PROV_PROVIDER_OPS(pd) && KCF_PROV_PROVIDER_OPS(pd)->set_pin) ? \ - KCF_PROV_PROVIDER_OPS(pd)->set_pin((pd)->pd_prov_handle, \ - session, old_pin, old_len, new_pin, new_len, req) : \ - CRYPTO_NOT_SUPPORTED) - -/* - * Wrappers for crypto_nostore_key_ops(9S) entry points. - */ - -#define KCF_PROV_NOSTORE_KEY_GENERATE(pd, session, mech, template, count, \ - out_template, out_count, req) ( \ - (KCF_PROV_NOSTORE_KEY_OPS(pd) && \ - KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate) ? \ - KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate( \ - (pd)->pd_prov_handle, session, mech, template, count, \ - out_template, out_count, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_NOSTORE_KEY_GENERATE_PAIR(pd, session, mech, pub_template, \ - pub_count, priv_template, priv_count, out_pub_template, \ - out_pub_count, out_priv_template, out_priv_count, req) ( \ - (KCF_PROV_NOSTORE_KEY_OPS(pd) && \ - KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate_pair) ? \ - KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_generate_pair( \ - (pd)->pd_prov_handle, session, mech, pub_template, pub_count, \ - priv_template, priv_count, out_pub_template, out_pub_count, \ - out_priv_template, out_priv_count, req) : CRYPTO_NOT_SUPPORTED) - -#define KCF_PROV_NOSTORE_KEY_DERIVE(pd, session, mech, base_key, template, \ - count, out_template, out_count, req) ( \ - (KCF_PROV_NOSTORE_KEY_OPS(pd) && \ - KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_derive) ? \ - KCF_PROV_NOSTORE_KEY_OPS(pd)->nostore_key_derive( \ - (pd)->pd_prov_handle, session, mech, base_key, template, count, \ - out_template, out_count, req) : CRYPTO_NOT_SUPPORTED) - -/* - * The following routines are exported by the kcf module (/kernel/misc/kcf) - * to the crypto and cryptoadmin modules. - */ - -/* Digest/mac/cipher entry points that take a provider descriptor and session */ -extern int crypto_digest_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - -extern int crypto_mac_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - -extern int crypto_encrypt_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - -extern int crypto_decrypt_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - - -/* Other private digest/mac/cipher entry points not exported through k-API */ -extern int crypto_digest_key_prov(crypto_context_t, crypto_key_t *, - crypto_call_req_t *); - -/* Private sign entry points exported by KCF */ -extern int crypto_sign_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - -extern int crypto_sign_recover_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - -/* Private verify entry points exported by KCF */ -extern int crypto_verify_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - -extern int crypto_verify_recover_single(crypto_context_t, crypto_data_t *, - crypto_data_t *, crypto_call_req_t *); - -/* Private dual operations entry points exported by KCF */ -extern int crypto_digest_encrypt_update(crypto_context_t, crypto_context_t, - crypto_data_t *, crypto_data_t *, crypto_call_req_t *); -extern int crypto_decrypt_digest_update(crypto_context_t, crypto_context_t, - crypto_data_t *, crypto_data_t *, crypto_call_req_t *); -extern int crypto_sign_encrypt_update(crypto_context_t, crypto_context_t, - crypto_data_t *, crypto_data_t *, crypto_call_req_t *); -extern int crypto_decrypt_verify_update(crypto_context_t, crypto_context_t, - crypto_data_t *, crypto_data_t *, crypto_call_req_t *); - -/* Random Number Generation */ -int crypto_seed_random(crypto_provider_handle_t provider, uchar_t *buf, - size_t len, crypto_call_req_t *req); -int crypto_generate_random(crypto_provider_handle_t provider, uchar_t *buf, - size_t len, crypto_call_req_t *req); - -/* Provider Management */ -int crypto_get_provider_info(crypto_provider_id_t id, - crypto_provider_info_t **info, crypto_call_req_t *req); -int crypto_get_provider_mechanisms(crypto_minor_t *, crypto_provider_id_t id, - uint_t *count, crypto_mech_name_t **list); -int crypto_init_token(crypto_provider_handle_t provider, char *pin, - size_t pin_len, char *label, crypto_call_req_t *); -int crypto_init_pin(crypto_provider_handle_t provider, char *pin, - size_t pin_len, crypto_call_req_t *req); -int crypto_set_pin(crypto_provider_handle_t provider, char *old_pin, - size_t old_len, char *new_pin, size_t new_len, crypto_call_req_t *req); -void crypto_free_provider_list(crypto_provider_entry_t *list, uint_t count); -void crypto_free_provider_info(crypto_provider_info_t *info); +#define KCF_PROV_FREE_CONTEXT(pd, ctx) ( \ + (KCF_PROV_CTX_OPS(pd) && KCF_PROV_CTX_OPS(pd)->free_context) ? \ + KCF_PROV_CTX_OPS(pd)->free_context(ctx) : CRYPTO_NOT_SUPPORTED) -/* Administrative */ -int crypto_get_dev_list(uint_t *count, crypto_dev_list_entry_t **list); -int crypto_get_soft_list(uint_t *count, char **list, size_t *len); -int crypto_get_dev_info(char *name, uint_t instance, uint_t *count, - crypto_mech_name_t **list); -int crypto_get_soft_info(caddr_t name, uint_t *count, - crypto_mech_name_t **list); -int crypto_load_dev_disabled(char *name, uint_t instance, uint_t count, - crypto_mech_name_t *list); -int crypto_load_soft_disabled(caddr_t name, uint_t count, - crypto_mech_name_t *list); -int crypto_unload_soft_module(caddr_t path); -int crypto_load_soft_config(caddr_t name, uint_t count, - crypto_mech_name_t *list); -int crypto_load_door(uint_t did); -void crypto_free_mech_list(crypto_mech_name_t *list, uint_t count); -void crypto_free_dev_list(crypto_dev_list_entry_t *list, uint_t count); /* Miscellaneous */ -int crypto_get_mechanism_number(caddr_t name, crypto_mech_type_t *number); -int crypto_get_function_list(crypto_provider_id_t id, - crypto_function_list_t **list, int kmflag); -void crypto_free_function_list(crypto_function_list_t *list); -int crypto_build_permitted_mech_names(kcf_provider_desc_t *, - crypto_mech_name_t **, uint_t *, int); extern void kcf_destroy_mech_tabs(void); extern void kcf_init_mech_tabs(void); extern int kcf_add_mech_provider(short, kcf_provider_desc_t *, kcf_prov_mech_desc_t **); -extern void kcf_remove_mech_provider(char *, kcf_provider_desc_t *); +extern void kcf_remove_mech_provider(const char *, kcf_provider_desc_t *); extern int kcf_get_mech_entry(crypto_mech_type_t, kcf_mech_entry_t **); -extern kcf_provider_desc_t *kcf_alloc_provider_desc(crypto_provider_info_t *); +extern kcf_provider_desc_t *kcf_alloc_provider_desc(void); extern void kcf_provider_zero_refcnt(kcf_provider_desc_t *); extern void kcf_free_provider_desc(kcf_provider_desc_t *); -extern void kcf_soft_config_init(void); -extern int get_sw_provider_for_mech(crypto_mech_name_t, char **); -extern crypto_mech_type_t crypto_mech2id_common(char *, boolean_t); extern void undo_register_provider(kcf_provider_desc_t *, boolean_t); -extern void redo_register_provider(kcf_provider_desc_t *); -extern void kcf_rnd_init(void); -extern boolean_t kcf_rngprov_check(void); -extern int kcf_rnd_get_pseudo_bytes(uint8_t *, size_t); -extern int kcf_rnd_get_bytes(uint8_t *, size_t, boolean_t, boolean_t); -extern int random_add_pseudo_entropy(uint8_t *, size_t, uint_t); -extern void kcf_rnd_schedule_timeout(boolean_t); -extern int crypto_uio_data(crypto_data_t *, uchar_t *, int, cmd_type_t, - void *, void (*update)(void)); -extern int crypto_mblk_data(crypto_data_t *, uchar_t *, int, cmd_type_t, - void *, void (*update)(void)); extern int crypto_put_output_data(uchar_t *, crypto_data_t *, int); -extern int crypto_get_input_data(crypto_data_t *, uchar_t **, uchar_t *); -extern int crypto_copy_key_to_ctx(crypto_key_t *, crypto_key_t **, size_t *, - int kmflag); -extern int crypto_digest_data(crypto_data_t *, void *, uchar_t *, - void (*update)(void), void (*final)(void), uchar_t); extern int crypto_update_iov(void *, crypto_data_t *, crypto_data_t *, - int (*cipher)(void *, caddr_t, size_t, crypto_data_t *), - void (*copy_block)(uint8_t *, uint64_t *)); + int (*cipher)(void *, caddr_t, size_t, crypto_data_t *)); extern int crypto_update_uio(void *, crypto_data_t *, crypto_data_t *, - int (*cipher)(void *, caddr_t, size_t, crypto_data_t *), - void (*copy_block)(uint8_t *, uint64_t *)); -extern int crypto_update_mp(void *, crypto_data_t *, crypto_data_t *, - int (*cipher)(void *, caddr_t, size_t, crypto_data_t *), - void (*copy_block)(uint8_t *, uint64_t *)); -extern int crypto_get_key_attr(crypto_key_t *, crypto_attr_type_t, uchar_t **, - ssize_t *); + int (*cipher)(void *, caddr_t, size_t, crypto_data_t *)); /* Access to the provider's table */ extern void kcf_prov_tab_destroy(void); extern void kcf_prov_tab_init(void); extern int kcf_prov_tab_add_provider(kcf_provider_desc_t *); extern int kcf_prov_tab_rem_provider(crypto_provider_id_t); -extern kcf_provider_desc_t *kcf_prov_tab_lookup_by_name(char *); -extern kcf_provider_desc_t *kcf_prov_tab_lookup_by_dev(char *, uint_t); -extern int kcf_get_hw_prov_tab(uint_t *, kcf_provider_desc_t ***, int, - char *, uint_t, boolean_t); -extern int kcf_get_slot_list(uint_t *, kcf_provider_desc_t ***, boolean_t); -extern void kcf_free_provider_tab(uint_t, kcf_provider_desc_t **); extern kcf_provider_desc_t *kcf_prov_tab_lookup(crypto_provider_id_t); extern int kcf_get_sw_prov(crypto_mech_type_t, kcf_provider_desc_t **, kcf_mech_entry_t **, boolean_t); -/* Access to the policy table */ -extern boolean_t is_mech_disabled(kcf_provider_desc_t *, crypto_mech_name_t); -extern boolean_t is_mech_disabled_byname(crypto_provider_type_t, char *, - uint_t, crypto_mech_name_t); -extern void kcf_policy_tab_init(void); -extern void kcf_policy_free_desc(kcf_policy_desc_t *); -extern void kcf_policy_remove_by_name(char *, uint_t *, crypto_mech_name_t **); -extern void kcf_policy_remove_by_dev(char *, uint_t, uint_t *, - crypto_mech_name_t **); -extern kcf_policy_desc_t *kcf_policy_lookup_by_name(char *); -extern kcf_policy_desc_t *kcf_policy_lookup_by_dev(char *, uint_t); -extern int kcf_policy_load_soft_disabled(char *, uint_t, crypto_mech_name_t *, - uint_t *, crypto_mech_name_t **); -extern int kcf_policy_load_dev_disabled(char *, uint_t, uint_t, - crypto_mech_name_t *, uint_t *, crypto_mech_name_t **); -extern boolean_t in_soft_config_list(char *); - #ifdef __cplusplus } diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h deleted file mode 100644 index 6e371e343945..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctl.h +++ /dev/null @@ -1,1480 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_CRYPTO_IOCTL_H -#define _SYS_CRYPTO_IOCTL_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/zfs_context.h> -#include <sys/crypto/api.h> -#include <sys/crypto/spi.h> -#include <sys/crypto/common.h> - -#define CRYPTO_MAX_ATTRIBUTE_COUNT 128 - -#define CRYPTO_IOFLAGS_RW_SESSION 0x00000001 - -#define CRYPTO(x) (('y' << 8) | (x)) - -#define MAX_NUM_THRESHOLD 7 - -/* the PKCS11 Mechanisms */ -#define CKM_RC4 0x00000111 -#define CKM_DES3_ECB 0x00000132 -#define CKM_DES3_CBC 0x00000133 -#define CKM_MD5 0x00000210 -#define CKM_SHA_1 0x00000220 -#define CKM_AES_ECB 0x00001081 -#define CKM_AES_CBC 0x00001082 - -/* - * General Purpose Ioctls - */ - -typedef struct fl_mechs_threshold { - int mech_type; - uint32_t mech_threshold; -} fl_mechs_threshold_t; - -typedef struct crypto_function_list { - boolean_t fl_digest_init; - boolean_t fl_digest; - boolean_t fl_digest_update; - boolean_t fl_digest_key; - boolean_t fl_digest_final; - - boolean_t fl_encrypt_init; - boolean_t fl_encrypt; - boolean_t fl_encrypt_update; - boolean_t fl_encrypt_final; - - boolean_t fl_decrypt_init; - boolean_t fl_decrypt; - boolean_t fl_decrypt_update; - boolean_t fl_decrypt_final; - - boolean_t fl_mac_init; - boolean_t fl_mac; - boolean_t fl_mac_update; - boolean_t fl_mac_final; - - boolean_t fl_sign_init; - boolean_t fl_sign; - boolean_t fl_sign_update; - boolean_t fl_sign_final; - boolean_t fl_sign_recover_init; - boolean_t fl_sign_recover; - - boolean_t fl_verify_init; - boolean_t fl_verify; - boolean_t fl_verify_update; - boolean_t fl_verify_final; - boolean_t fl_verify_recover_init; - boolean_t fl_verify_recover; - - boolean_t fl_digest_encrypt_update; - boolean_t fl_decrypt_digest_update; - boolean_t fl_sign_encrypt_update; - boolean_t fl_decrypt_verify_update; - - boolean_t fl_seed_random; - boolean_t fl_generate_random; - - boolean_t fl_session_open; - boolean_t fl_session_close; - boolean_t fl_session_login; - boolean_t fl_session_logout; - - boolean_t fl_object_create; - boolean_t fl_object_copy; - boolean_t fl_object_destroy; - boolean_t fl_object_get_size; - boolean_t fl_object_get_attribute_value; - boolean_t fl_object_set_attribute_value; - boolean_t fl_object_find_init; - boolean_t fl_object_find; - boolean_t fl_object_find_final; - - boolean_t fl_key_generate; - boolean_t fl_key_generate_pair; - boolean_t fl_key_wrap; - boolean_t fl_key_unwrap; - boolean_t fl_key_derive; - - boolean_t fl_init_token; - boolean_t fl_init_pin; - boolean_t fl_set_pin; - - boolean_t prov_is_limited; - uint32_t prov_hash_threshold; - uint32_t prov_hash_limit; - - int total_threshold_count; - fl_mechs_threshold_t fl_threshold[MAX_NUM_THRESHOLD]; -} crypto_function_list_t; - -typedef struct crypto_get_function_list { - uint_t fl_return_value; - crypto_provider_id_t fl_provider_id; - crypto_function_list_t fl_list; -} crypto_get_function_list_t; - -typedef struct crypto_get_mechanism_number { - uint_t pn_return_value; - caddr_t pn_mechanism_string; - size_t pn_mechanism_len; - crypto_mech_type_t pn_internal_number; -} crypto_get_mechanism_number_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_get_mechanism_number32 { - uint32_t pn_return_value; - caddr32_t pn_mechanism_string; - size32_t pn_mechanism_len; - crypto_mech_type_t pn_internal_number; -} crypto_get_mechanism_number32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_GET_FUNCTION_LIST CRYPTO(20) -#define CRYPTO_GET_MECHANISM_NUMBER CRYPTO(21) - -/* - * Session Ioctls - */ - -typedef uint32_t crypto_flags_t; - -typedef struct crypto_open_session { - uint_t os_return_value; - crypto_session_id_t os_session; - crypto_flags_t os_flags; - crypto_provider_id_t os_provider_id; -} crypto_open_session_t; - -typedef struct crypto_close_session { - uint_t cs_return_value; - crypto_session_id_t cs_session; -} crypto_close_session_t; - -typedef struct crypto_close_all_sessions { - uint_t as_return_value; - crypto_provider_id_t as_provider_id; -} crypto_close_all_sessions_t; - -#define CRYPTO_OPEN_SESSION CRYPTO(30) -#define CRYPTO_CLOSE_SESSION CRYPTO(31) -#define CRYPTO_CLOSE_ALL_SESSIONS CRYPTO(32) - -/* - * Login Ioctls - */ -typedef struct crypto_login { - uint_t co_return_value; - crypto_session_id_t co_session; - uint_t co_user_type; - uint_t co_pin_len; - caddr_t co_pin; -} crypto_login_t; - -typedef struct crypto_logout { - uint_t cl_return_value; - crypto_session_id_t cl_session; -} crypto_logout_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_login32 { - uint32_t co_return_value; - crypto_session_id_t co_session; - uint32_t co_user_type; - uint32_t co_pin_len; - caddr32_t co_pin; -} crypto_login32_t; - -typedef struct crypto_logout32 { - uint32_t cl_return_value; - crypto_session_id_t cl_session; -} crypto_logout32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_LOGIN CRYPTO(40) -#define CRYPTO_LOGOUT CRYPTO(41) - -/* - * Cryptographic Ioctls - */ -typedef struct crypto_encrypt { - uint_t ce_return_value; - crypto_session_id_t ce_session; - size_t ce_datalen; - caddr_t ce_databuf; - size_t ce_encrlen; - caddr_t ce_encrbuf; - uint_t ce_flags; -} crypto_encrypt_t; - -typedef struct crypto_encrypt_init { - uint_t ei_return_value; - crypto_session_id_t ei_session; - crypto_mechanism_t ei_mech; - crypto_key_t ei_key; -} crypto_encrypt_init_t; - -typedef struct crypto_encrypt_update { - uint_t eu_return_value; - crypto_session_id_t eu_session; - size_t eu_datalen; - caddr_t eu_databuf; - size_t eu_encrlen; - caddr_t eu_encrbuf; -} crypto_encrypt_update_t; - -typedef struct crypto_encrypt_final { - uint_t ef_return_value; - crypto_session_id_t ef_session; - size_t ef_encrlen; - caddr_t ef_encrbuf; -} crypto_encrypt_final_t; - -typedef struct crypto_decrypt { - uint_t cd_return_value; - crypto_session_id_t cd_session; - size_t cd_encrlen; - caddr_t cd_encrbuf; - size_t cd_datalen; - caddr_t cd_databuf; - uint_t cd_flags; -} crypto_decrypt_t; - -typedef struct crypto_decrypt_init { - uint_t di_return_value; - crypto_session_id_t di_session; - crypto_mechanism_t di_mech; - crypto_key_t di_key; -} crypto_decrypt_init_t; - -typedef struct crypto_decrypt_update { - uint_t du_return_value; - crypto_session_id_t du_session; - size_t du_encrlen; - caddr_t du_encrbuf; - size_t du_datalen; - caddr_t du_databuf; -} crypto_decrypt_update_t; - -typedef struct crypto_decrypt_final { - uint_t df_return_value; - crypto_session_id_t df_session; - size_t df_datalen; - caddr_t df_databuf; -} crypto_decrypt_final_t; - -typedef struct crypto_digest { - uint_t cd_return_value; - crypto_session_id_t cd_session; - size_t cd_datalen; - caddr_t cd_databuf; - size_t cd_digestlen; - caddr_t cd_digestbuf; -} crypto_digest_t; - -typedef struct crypto_digest_init { - uint_t di_return_value; - crypto_session_id_t di_session; - crypto_mechanism_t di_mech; -} crypto_digest_init_t; - -typedef struct crypto_digest_update { - uint_t du_return_value; - crypto_session_id_t du_session; - size_t du_datalen; - caddr_t du_databuf; -} crypto_digest_update_t; - -typedef struct crypto_digest_key { - uint_t dk_return_value; - crypto_session_id_t dk_session; - crypto_key_t dk_key; -} crypto_digest_key_t; - -typedef struct crypto_digest_final { - uint_t df_return_value; - crypto_session_id_t df_session; - size_t df_digestlen; - caddr_t df_digestbuf; -} crypto_digest_final_t; - -typedef struct crypto_mac { - uint_t cm_return_value; - crypto_session_id_t cm_session; - size_t cm_datalen; - caddr_t cm_databuf; - size_t cm_maclen; - caddr_t cm_macbuf; -} crypto_mac_t; - -typedef struct crypto_mac_init { - uint_t mi_return_value; - crypto_session_id_t mi_session; - crypto_mechanism_t mi_mech; - crypto_key_t mi_key; -} crypto_mac_init_t; - -typedef struct crypto_mac_update { - uint_t mu_return_value; - crypto_session_id_t mu_session; - size_t mu_datalen; - caddr_t mu_databuf; -} crypto_mac_update_t; - -typedef struct crypto_mac_final { - uint_t mf_return_value; - crypto_session_id_t mf_session; - size_t mf_maclen; - caddr_t mf_macbuf; -} crypto_mac_final_t; - -typedef struct crypto_sign { - uint_t cs_return_value; - crypto_session_id_t cs_session; - size_t cs_datalen; - caddr_t cs_databuf; - size_t cs_signlen; - caddr_t cs_signbuf; -} crypto_sign_t; - -typedef struct crypto_sign_init { - uint_t si_return_value; - crypto_session_id_t si_session; - crypto_mechanism_t si_mech; - crypto_key_t si_key; -} crypto_sign_init_t; - -typedef struct crypto_sign_update { - uint_t su_return_value; - crypto_session_id_t su_session; - size_t su_datalen; - caddr_t su_databuf; -} crypto_sign_update_t; - -typedef struct crypto_sign_final { - uint_t sf_return_value; - crypto_session_id_t sf_session; - size_t sf_signlen; - caddr_t sf_signbuf; -} crypto_sign_final_t; - -typedef struct crypto_sign_recover_init { - uint_t ri_return_value; - crypto_session_id_t ri_session; - crypto_mechanism_t ri_mech; - crypto_key_t ri_key; -} crypto_sign_recover_init_t; - -typedef struct crypto_sign_recover { - uint_t sr_return_value; - crypto_session_id_t sr_session; - size_t sr_datalen; - caddr_t sr_databuf; - size_t sr_signlen; - caddr_t sr_signbuf; -} crypto_sign_recover_t; - -typedef struct crypto_verify { - uint_t cv_return_value; - crypto_session_id_t cv_session; - size_t cv_datalen; - caddr_t cv_databuf; - size_t cv_signlen; - caddr_t cv_signbuf; -} crypto_verify_t; - -typedef struct crypto_verify_init { - uint_t vi_return_value; - crypto_session_id_t vi_session; - crypto_mechanism_t vi_mech; - crypto_key_t vi_key; -} crypto_verify_init_t; - -typedef struct crypto_verify_update { - uint_t vu_return_value; - crypto_session_id_t vu_session; - size_t vu_datalen; - caddr_t vu_databuf; -} crypto_verify_update_t; - -typedef struct crypto_verify_final { - uint_t vf_return_value; - crypto_session_id_t vf_session; - size_t vf_signlen; - caddr_t vf_signbuf; -} crypto_verify_final_t; - -typedef struct crypto_verify_recover_init { - uint_t ri_return_value; - crypto_session_id_t ri_session; - crypto_mechanism_t ri_mech; - crypto_key_t ri_key; -} crypto_verify_recover_init_t; - -typedef struct crypto_verify_recover { - uint_t vr_return_value; - crypto_session_id_t vr_session; - size_t vr_signlen; - caddr_t vr_signbuf; - size_t vr_datalen; - caddr_t vr_databuf; -} crypto_verify_recover_t; - -typedef struct crypto_digest_encrypt_update { - uint_t eu_return_value; - crypto_session_id_t eu_session; - size_t eu_datalen; - caddr_t eu_databuf; - size_t eu_encrlen; - caddr_t eu_encrbuf; -} crypto_digest_encrypt_update_t; - -typedef struct crypto_decrypt_digest_update { - uint_t du_return_value; - crypto_session_id_t du_session; - size_t du_encrlen; - caddr_t du_encrbuf; - size_t du_datalen; - caddr_t du_databuf; -} crypto_decrypt_digest_update_t; - -typedef struct crypto_sign_encrypt_update { - uint_t eu_return_value; - crypto_session_id_t eu_session; - size_t eu_datalen; - caddr_t eu_databuf; - size_t eu_encrlen; - caddr_t eu_encrbuf; -} crypto_sign_encrypt_update_t; - -typedef struct crypto_decrypt_verify_update { - uint_t vu_return_value; - crypto_session_id_t vu_session; - size_t vu_encrlen; - caddr_t vu_encrbuf; - size_t vu_datalen; - caddr_t vu_databuf; -} crypto_decrypt_verify_update_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_encrypt32 { - uint32_t ce_return_value; - crypto_session_id_t ce_session; - size32_t ce_datalen; - caddr32_t ce_databuf; - size32_t ce_encrlen; - caddr32_t ce_encrbuf; - uint32_t ce_flags; -} crypto_encrypt32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_encrypt_init32 { - uint32_t ei_return_value; - crypto_session_id_t ei_session; - crypto_mechanism32_t ei_mech; - crypto_key32_t ei_key; -} crypto_encrypt_init32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_encrypt_update32 { - uint32_t eu_return_value; - crypto_session_id_t eu_session; - size32_t eu_datalen; - caddr32_t eu_databuf; - size32_t eu_encrlen; - caddr32_t eu_encrbuf; -} crypto_encrypt_update32_t; - -typedef struct crypto_encrypt_final32 { - uint32_t ef_return_value; - crypto_session_id_t ef_session; - size32_t ef_encrlen; - caddr32_t ef_encrbuf; -} crypto_encrypt_final32_t; - -typedef struct crypto_decrypt32 { - uint32_t cd_return_value; - crypto_session_id_t cd_session; - size32_t cd_encrlen; - caddr32_t cd_encrbuf; - size32_t cd_datalen; - caddr32_t cd_databuf; - uint32_t cd_flags; -} crypto_decrypt32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_decrypt_init32 { - uint32_t di_return_value; - crypto_session_id_t di_session; - crypto_mechanism32_t di_mech; - crypto_key32_t di_key; -} crypto_decrypt_init32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_decrypt_update32 { - uint32_t du_return_value; - crypto_session_id_t du_session; - size32_t du_encrlen; - caddr32_t du_encrbuf; - size32_t du_datalen; - caddr32_t du_databuf; -} crypto_decrypt_update32_t; - -typedef struct crypto_decrypt_final32 { - uint32_t df_return_value; - crypto_session_id_t df_session; - size32_t df_datalen; - caddr32_t df_databuf; -} crypto_decrypt_final32_t; - -typedef struct crypto_digest32 { - uint32_t cd_return_value; - crypto_session_id_t cd_session; - size32_t cd_datalen; - caddr32_t cd_databuf; - size32_t cd_digestlen; - caddr32_t cd_digestbuf; -} crypto_digest32_t; - -typedef struct crypto_digest_init32 { - uint32_t di_return_value; - crypto_session_id_t di_session; - crypto_mechanism32_t di_mech; -} crypto_digest_init32_t; - -typedef struct crypto_digest_update32 { - uint32_t du_return_value; - crypto_session_id_t du_session; - size32_t du_datalen; - caddr32_t du_databuf; -} crypto_digest_update32_t; - -typedef struct crypto_digest_key32 { - uint32_t dk_return_value; - crypto_session_id_t dk_session; - crypto_key32_t dk_key; -} crypto_digest_key32_t; - -typedef struct crypto_digest_final32 { - uint32_t df_return_value; - crypto_session_id_t df_session; - size32_t df_digestlen; - caddr32_t df_digestbuf; -} crypto_digest_final32_t; - -typedef struct crypto_mac32 { - uint32_t cm_return_value; - crypto_session_id_t cm_session; - size32_t cm_datalen; - caddr32_t cm_databuf; - size32_t cm_maclen; - caddr32_t cm_macbuf; -} crypto_mac32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_mac_init32 { - uint32_t mi_return_value; - crypto_session_id_t mi_session; - crypto_mechanism32_t mi_mech; - crypto_key32_t mi_key; -} crypto_mac_init32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_mac_update32 { - uint32_t mu_return_value; - crypto_session_id_t mu_session; - size32_t mu_datalen; - caddr32_t mu_databuf; -} crypto_mac_update32_t; - -typedef struct crypto_mac_final32 { - uint32_t mf_return_value; - crypto_session_id_t mf_session; - size32_t mf_maclen; - caddr32_t mf_macbuf; -} crypto_mac_final32_t; - -typedef struct crypto_sign32 { - uint32_t cs_return_value; - crypto_session_id_t cs_session; - size32_t cs_datalen; - caddr32_t cs_databuf; - size32_t cs_signlen; - caddr32_t cs_signbuf; -} crypto_sign32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_sign_init32 { - uint32_t si_return_value; - crypto_session_id_t si_session; - crypto_mechanism32_t si_mech; - crypto_key32_t si_key; -} crypto_sign_init32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_sign_update32 { - uint32_t su_return_value; - crypto_session_id_t su_session; - size32_t su_datalen; - caddr32_t su_databuf; -} crypto_sign_update32_t; - -typedef struct crypto_sign_final32 { - uint32_t sf_return_value; - crypto_session_id_t sf_session; - size32_t sf_signlen; - caddr32_t sf_signbuf; -} crypto_sign_final32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_sign_recover_init32 { - uint32_t ri_return_value; - crypto_session_id_t ri_session; - crypto_mechanism32_t ri_mech; - crypto_key32_t ri_key; -} crypto_sign_recover_init32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_sign_recover32 { - uint32_t sr_return_value; - crypto_session_id_t sr_session; - size32_t sr_datalen; - caddr32_t sr_databuf; - size32_t sr_signlen; - caddr32_t sr_signbuf; -} crypto_sign_recover32_t; - -typedef struct crypto_verify32 { - uint32_t cv_return_value; - crypto_session_id_t cv_session; - size32_t cv_datalen; - caddr32_t cv_databuf; - size32_t cv_signlen; - caddr32_t cv_signbuf; -} crypto_verify32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_verify_init32 { - uint32_t vi_return_value; - crypto_session_id_t vi_session; - crypto_mechanism32_t vi_mech; - crypto_key32_t vi_key; -} crypto_verify_init32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_verify_update32 { - uint32_t vu_return_value; - crypto_session_id_t vu_session; - size32_t vu_datalen; - caddr32_t vu_databuf; -} crypto_verify_update32_t; - -typedef struct crypto_verify_final32 { - uint32_t vf_return_value; - crypto_session_id_t vf_session; - size32_t vf_signlen; - caddr32_t vf_signbuf; -} crypto_verify_final32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_verify_recover_init32 { - uint32_t ri_return_value; - crypto_session_id_t ri_session; - crypto_mechanism32_t ri_mech; - crypto_key32_t ri_key; -} crypto_verify_recover_init32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_verify_recover32 { - uint32_t vr_return_value; - crypto_session_id_t vr_session; - size32_t vr_signlen; - caddr32_t vr_signbuf; - size32_t vr_datalen; - caddr32_t vr_databuf; -} crypto_verify_recover32_t; - -typedef struct crypto_digest_encrypt_update32 { - uint32_t eu_return_value; - crypto_session_id_t eu_session; - size32_t eu_datalen; - caddr32_t eu_databuf; - size32_t eu_encrlen; - caddr32_t eu_encrbuf; -} crypto_digest_encrypt_update32_t; - -typedef struct crypto_decrypt_digest_update32 { - uint32_t du_return_value; - crypto_session_id_t du_session; - size32_t du_encrlen; - caddr32_t du_encrbuf; - size32_t du_datalen; - caddr32_t du_databuf; -} crypto_decrypt_digest_update32_t; - -typedef struct crypto_sign_encrypt_update32 { - uint32_t eu_return_value; - crypto_session_id_t eu_session; - size32_t eu_datalen; - caddr32_t eu_databuf; - size32_t eu_encrlen; - caddr32_t eu_encrbuf; -} crypto_sign_encrypt_update32_t; - -typedef struct crypto_decrypt_verify_update32 { - uint32_t vu_return_value; - crypto_session_id_t vu_session; - size32_t vu_encrlen; - caddr32_t vu_encrbuf; - size32_t vu_datalen; - caddr32_t vu_databuf; -} crypto_decrypt_verify_update32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_ENCRYPT CRYPTO(50) -#define CRYPTO_ENCRYPT_INIT CRYPTO(51) -#define CRYPTO_ENCRYPT_UPDATE CRYPTO(52) -#define CRYPTO_ENCRYPT_FINAL CRYPTO(53) -#define CRYPTO_DECRYPT CRYPTO(54) -#define CRYPTO_DECRYPT_INIT CRYPTO(55) -#define CRYPTO_DECRYPT_UPDATE CRYPTO(56) -#define CRYPTO_DECRYPT_FINAL CRYPTO(57) - -#define CRYPTO_DIGEST CRYPTO(58) -#define CRYPTO_DIGEST_INIT CRYPTO(59) -#define CRYPTO_DIGEST_UPDATE CRYPTO(60) -#define CRYPTO_DIGEST_KEY CRYPTO(61) -#define CRYPTO_DIGEST_FINAL CRYPTO(62) -#define CRYPTO_MAC CRYPTO(63) -#define CRYPTO_MAC_INIT CRYPTO(64) -#define CRYPTO_MAC_UPDATE CRYPTO(65) -#define CRYPTO_MAC_FINAL CRYPTO(66) - -#define CRYPTO_SIGN CRYPTO(67) -#define CRYPTO_SIGN_INIT CRYPTO(68) -#define CRYPTO_SIGN_UPDATE CRYPTO(69) -#define CRYPTO_SIGN_FINAL CRYPTO(70) -#define CRYPTO_SIGN_RECOVER_INIT CRYPTO(71) -#define CRYPTO_SIGN_RECOVER CRYPTO(72) -#define CRYPTO_VERIFY CRYPTO(73) -#define CRYPTO_VERIFY_INIT CRYPTO(74) -#define CRYPTO_VERIFY_UPDATE CRYPTO(75) -#define CRYPTO_VERIFY_FINAL CRYPTO(76) -#define CRYPTO_VERIFY_RECOVER_INIT CRYPTO(77) -#define CRYPTO_VERIFY_RECOVER CRYPTO(78) - -#define CRYPTO_DIGEST_ENCRYPT_UPDATE CRYPTO(79) -#define CRYPTO_DECRYPT_DIGEST_UPDATE CRYPTO(80) -#define CRYPTO_SIGN_ENCRYPT_UPDATE CRYPTO(81) -#define CRYPTO_DECRYPT_VERIFY_UPDATE CRYPTO(82) - -/* - * Random Number Ioctls - */ -typedef struct crypto_seed_random { - uint_t sr_return_value; - crypto_session_id_t sr_session; - size_t sr_seedlen; - caddr_t sr_seedbuf; -} crypto_seed_random_t; - -typedef struct crypto_generate_random { - uint_t gr_return_value; - crypto_session_id_t gr_session; - caddr_t gr_buf; - size_t gr_buflen; -} crypto_generate_random_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_seed_random32 { - uint32_t sr_return_value; - crypto_session_id_t sr_session; - size32_t sr_seedlen; - caddr32_t sr_seedbuf; -} crypto_seed_random32_t; - -typedef struct crypto_generate_random32 { - uint32_t gr_return_value; - crypto_session_id_t gr_session; - caddr32_t gr_buf; - size32_t gr_buflen; -} crypto_generate_random32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_SEED_RANDOM CRYPTO(90) -#define CRYPTO_GENERATE_RANDOM CRYPTO(91) - -/* - * Object Management Ioctls - */ -typedef struct crypto_object_create { - uint_t oc_return_value; - crypto_session_id_t oc_session; - crypto_object_id_t oc_handle; - uint_t oc_count; - caddr_t oc_attributes; -} crypto_object_create_t; - -typedef struct crypto_object_copy { - uint_t oc_return_value; - crypto_session_id_t oc_session; - crypto_object_id_t oc_handle; - crypto_object_id_t oc_new_handle; - uint_t oc_count; - caddr_t oc_new_attributes; -} crypto_object_copy_t; - -typedef struct crypto_object_destroy { - uint_t od_return_value; - crypto_session_id_t od_session; - crypto_object_id_t od_handle; -} crypto_object_destroy_t; - -typedef struct crypto_object_get_attribute_value { - uint_t og_return_value; - crypto_session_id_t og_session; - crypto_object_id_t og_handle; - uint_t og_count; - caddr_t og_attributes; -} crypto_object_get_attribute_value_t; - -typedef struct crypto_object_get_size { - uint_t gs_return_value; - crypto_session_id_t gs_session; - crypto_object_id_t gs_handle; - size_t gs_size; -} crypto_object_get_size_t; - -typedef struct crypto_object_set_attribute_value { - uint_t sa_return_value; - crypto_session_id_t sa_session; - crypto_object_id_t sa_handle; - uint_t sa_count; - caddr_t sa_attributes; -} crypto_object_set_attribute_value_t; - -typedef struct crypto_object_find_init { - uint_t fi_return_value; - crypto_session_id_t fi_session; - uint_t fi_count; - caddr_t fi_attributes; -} crypto_object_find_init_t; - -typedef struct crypto_object_find_update { - uint_t fu_return_value; - crypto_session_id_t fu_session; - uint_t fu_max_count; - uint_t fu_count; - caddr_t fu_handles; -} crypto_object_find_update_t; - -typedef struct crypto_object_find_final { - uint_t ff_return_value; - crypto_session_id_t ff_session; -} crypto_object_find_final_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_object_create32 { - uint32_t oc_return_value; - crypto_session_id_t oc_session; - crypto_object_id_t oc_handle; - uint32_t oc_count; - caddr32_t oc_attributes; -} crypto_object_create32_t; - -typedef struct crypto_object_copy32 { - uint32_t oc_return_value; - crypto_session_id_t oc_session; - crypto_object_id_t oc_handle; - crypto_object_id_t oc_new_handle; - uint32_t oc_count; - caddr32_t oc_new_attributes; -} crypto_object_copy32_t; - -typedef struct crypto_object_destroy32 { - uint32_t od_return_value; - crypto_session_id_t od_session; - crypto_object_id_t od_handle; -} crypto_object_destroy32_t; - -typedef struct crypto_object_get_attribute_value32 { - uint32_t og_return_value; - crypto_session_id_t og_session; - crypto_object_id_t og_handle; - uint32_t og_count; - caddr32_t og_attributes; -} crypto_object_get_attribute_value32_t; - -typedef struct crypto_object_get_size32 { - uint32_t gs_return_value; - crypto_session_id_t gs_session; - crypto_object_id_t gs_handle; - size32_t gs_size; -} crypto_object_get_size32_t; - -typedef struct crypto_object_set_attribute_value32 { - uint32_t sa_return_value; - crypto_session_id_t sa_session; - crypto_object_id_t sa_handle; - uint32_t sa_count; - caddr32_t sa_attributes; -} crypto_object_set_attribute_value32_t; - -typedef struct crypto_object_find_init32 { - uint32_t fi_return_value; - crypto_session_id_t fi_session; - uint32_t fi_count; - caddr32_t fi_attributes; -} crypto_object_find_init32_t; - -typedef struct crypto_object_find_update32 { - uint32_t fu_return_value; - crypto_session_id_t fu_session; - uint32_t fu_max_count; - uint32_t fu_count; - caddr32_t fu_handles; -} crypto_object_find_update32_t; - -typedef struct crypto_object_find_final32 { - uint32_t ff_return_value; - crypto_session_id_t ff_session; -} crypto_object_find_final32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_OBJECT_CREATE CRYPTO(100) -#define CRYPTO_OBJECT_COPY CRYPTO(101) -#define CRYPTO_OBJECT_DESTROY CRYPTO(102) -#define CRYPTO_OBJECT_GET_ATTRIBUTE_VALUE CRYPTO(103) -#define CRYPTO_OBJECT_GET_SIZE CRYPTO(104) -#define CRYPTO_OBJECT_SET_ATTRIBUTE_VALUE CRYPTO(105) -#define CRYPTO_OBJECT_FIND_INIT CRYPTO(106) -#define CRYPTO_OBJECT_FIND_UPDATE CRYPTO(107) -#define CRYPTO_OBJECT_FIND_FINAL CRYPTO(108) - -/* - * Key Generation Ioctls - */ -typedef struct crypto_object_generate_key { - uint_t gk_return_value; - crypto_session_id_t gk_session; - crypto_object_id_t gk_handle; - crypto_mechanism_t gk_mechanism; - uint_t gk_count; - caddr_t gk_attributes; -} crypto_object_generate_key_t; - -typedef struct crypto_object_generate_key_pair { - uint_t kp_return_value; - crypto_session_id_t kp_session; - crypto_object_id_t kp_public_handle; - crypto_object_id_t kp_private_handle; - uint_t kp_public_count; - uint_t kp_private_count; - caddr_t kp_public_attributes; - caddr_t kp_private_attributes; - crypto_mechanism_t kp_mechanism; -} crypto_object_generate_key_pair_t; - -typedef struct crypto_object_wrap_key { - uint_t wk_return_value; - crypto_session_id_t wk_session; - crypto_mechanism_t wk_mechanism; - crypto_key_t wk_wrapping_key; - crypto_object_id_t wk_object_handle; - size_t wk_wrapped_key_len; - caddr_t wk_wrapped_key; -} crypto_object_wrap_key_t; - -typedef struct crypto_object_unwrap_key { - uint_t uk_return_value; - crypto_session_id_t uk_session; - crypto_mechanism_t uk_mechanism; - crypto_key_t uk_unwrapping_key; - crypto_object_id_t uk_object_handle; - size_t uk_wrapped_key_len; - caddr_t uk_wrapped_key; - uint_t uk_count; - caddr_t uk_attributes; -} crypto_object_unwrap_key_t; - -typedef struct crypto_derive_key { - uint_t dk_return_value; - crypto_session_id_t dk_session; - crypto_mechanism_t dk_mechanism; - crypto_key_t dk_base_key; - crypto_object_id_t dk_object_handle; - uint_t dk_count; - caddr_t dk_attributes; -} crypto_derive_key_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_object_generate_key32 { - uint32_t gk_return_value; - crypto_session_id_t gk_session; - crypto_object_id_t gk_handle; - crypto_mechanism32_t gk_mechanism; - uint32_t gk_count; - caddr32_t gk_attributes; -} crypto_object_generate_key32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -typedef struct crypto_object_generate_key_pair32 { - uint32_t kp_return_value; - crypto_session_id_t kp_session; - crypto_object_id_t kp_public_handle; - crypto_object_id_t kp_private_handle; - uint32_t kp_public_count; - uint32_t kp_private_count; - caddr32_t kp_public_attributes; - caddr32_t kp_private_attributes; - crypto_mechanism32_t kp_mechanism; -} crypto_object_generate_key_pair32_t; - -typedef struct crypto_object_wrap_key32 { - uint32_t wk_return_value; - crypto_session_id_t wk_session; - crypto_mechanism32_t wk_mechanism; - crypto_key32_t wk_wrapping_key; - crypto_object_id_t wk_object_handle; - size32_t wk_wrapped_key_len; - caddr32_t wk_wrapped_key; -} crypto_object_wrap_key32_t; - -typedef struct crypto_object_unwrap_key32 { - uint32_t uk_return_value; - crypto_session_id_t uk_session; - crypto_mechanism32_t uk_mechanism; - crypto_key32_t uk_unwrapping_key; - crypto_object_id_t uk_object_handle; - size32_t uk_wrapped_key_len; - caddr32_t uk_wrapped_key; - uint32_t uk_count; - caddr32_t uk_attributes; -} crypto_object_unwrap_key32_t; - -typedef struct crypto_derive_key32 { - uint32_t dk_return_value; - crypto_session_id_t dk_session; - crypto_mechanism32_t dk_mechanism; - crypto_key32_t dk_base_key; - crypto_object_id_t dk_object_handle; - uint32_t dk_count; - caddr32_t dk_attributes; -} crypto_derive_key32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_GENERATE_KEY CRYPTO(110) -#define CRYPTO_GENERATE_KEY_PAIR CRYPTO(111) -#define CRYPTO_WRAP_KEY CRYPTO(112) -#define CRYPTO_UNWRAP_KEY CRYPTO(113) -#define CRYPTO_DERIVE_KEY CRYPTO(114) - -/* - * Provider Management Ioctls - */ - -typedef struct crypto_get_provider_list { - uint_t pl_return_value; - uint_t pl_count; - crypto_provider_entry_t pl_list[1]; -} crypto_get_provider_list_t; - -typedef struct crypto_provider_data { - uchar_t pd_prov_desc[CRYPTO_PROVIDER_DESCR_MAX_LEN]; - uchar_t pd_label[CRYPTO_EXT_SIZE_LABEL]; - uchar_t pd_manufacturerID[CRYPTO_EXT_SIZE_MANUF]; - uchar_t pd_model[CRYPTO_EXT_SIZE_MODEL]; - uchar_t pd_serial_number[CRYPTO_EXT_SIZE_SERIAL]; - ulong_t pd_flags; - ulong_t pd_max_session_count; - ulong_t pd_session_count; - ulong_t pd_max_rw_session_count; - ulong_t pd_rw_session_count; - ulong_t pd_max_pin_len; - ulong_t pd_min_pin_len; - ulong_t pd_total_public_memory; - ulong_t pd_free_public_memory; - ulong_t pd_total_private_memory; - ulong_t pd_free_private_memory; - crypto_version_t pd_hardware_version; - crypto_version_t pd_firmware_version; - uchar_t pd_time[CRYPTO_EXT_SIZE_TIME]; -} crypto_provider_data_t; - -typedef struct crypto_get_provider_info { - uint_t gi_return_value; - crypto_provider_id_t gi_provider_id; - crypto_provider_data_t gi_provider_data; -} crypto_get_provider_info_t; - -typedef struct crypto_get_provider_mechanisms { - uint_t pm_return_value; - crypto_provider_id_t pm_provider_id; - uint_t pm_count; - crypto_mech_name_t pm_list[1]; -} crypto_get_provider_mechanisms_t; - -typedef struct crypto_get_provider_mechanism_info { - uint_t mi_return_value; - crypto_provider_id_t mi_provider_id; - crypto_mech_name_t mi_mechanism_name; - uint32_t mi_min_key_size; - uint32_t mi_max_key_size; - uint32_t mi_flags; -} crypto_get_provider_mechanism_info_t; - -typedef struct crypto_init_token { - uint_t it_return_value; - crypto_provider_id_t it_provider_id; - caddr_t it_pin; - size_t it_pin_len; - caddr_t it_label; -} crypto_init_token_t; - -typedef struct crypto_init_pin { - uint_t ip_return_value; - crypto_session_id_t ip_session; - caddr_t ip_pin; - size_t ip_pin_len; -} crypto_init_pin_t; - -typedef struct crypto_set_pin { - uint_t sp_return_value; - crypto_session_id_t sp_session; - caddr_t sp_old_pin; - size_t sp_old_len; - caddr_t sp_new_pin; - size_t sp_new_len; -} crypto_set_pin_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_get_provider_list32 { - uint32_t pl_return_value; - uint32_t pl_count; - crypto_provider_entry_t pl_list[1]; -} crypto_get_provider_list32_t; - -typedef struct crypto_version32 { - uchar_t cv_major; - uchar_t cv_minor; -} crypto_version32_t; - -typedef struct crypto_provider_data32 { - uchar_t pd_prov_desc[CRYPTO_PROVIDER_DESCR_MAX_LEN]; - uchar_t pd_label[CRYPTO_EXT_SIZE_LABEL]; - uchar_t pd_manufacturerID[CRYPTO_EXT_SIZE_MANUF]; - uchar_t pd_model[CRYPTO_EXT_SIZE_MODEL]; - uchar_t pd_serial_number[CRYPTO_EXT_SIZE_SERIAL]; - uint32_t pd_flags; - uint32_t pd_max_session_count; - uint32_t pd_session_count; - uint32_t pd_max_rw_session_count; - uint32_t pd_rw_session_count; - uint32_t pd_max_pin_len; - uint32_t pd_min_pin_len; - uint32_t pd_total_public_memory; - uint32_t pd_free_public_memory; - uint32_t pd_total_private_memory; - uint32_t pd_free_private_memory; - crypto_version32_t pd_hardware_version; - crypto_version32_t pd_firmware_version; - uchar_t pd_time[CRYPTO_EXT_SIZE_TIME]; -} crypto_provider_data32_t; - -typedef struct crypto_get_provider_info32 { - uint32_t gi_return_value; - crypto_provider_id_t gi_provider_id; - crypto_provider_data32_t gi_provider_data; -} crypto_get_provider_info32_t; - -typedef struct crypto_get_provider_mechanisms32 { - uint32_t pm_return_value; - crypto_provider_id_t pm_provider_id; - uint32_t pm_count; - crypto_mech_name_t pm_list[1]; -} crypto_get_provider_mechanisms32_t; - -typedef struct crypto_init_token32 { - uint32_t it_return_value; - crypto_provider_id_t it_provider_id; - caddr32_t it_pin; - size32_t it_pin_len; - caddr32_t it_label; -} crypto_init_token32_t; - -typedef struct crypto_init_pin32 { - uint32_t ip_return_value; - crypto_session_id_t ip_session; - caddr32_t ip_pin; - size32_t ip_pin_len; -} crypto_init_pin32_t; - -typedef struct crypto_set_pin32 { - uint32_t sp_return_value; - crypto_session_id_t sp_session; - caddr32_t sp_old_pin; - size32_t sp_old_len; - caddr32_t sp_new_pin; - size32_t sp_new_len; -} crypto_set_pin32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_GET_PROVIDER_LIST CRYPTO(120) -#define CRYPTO_GET_PROVIDER_INFO CRYPTO(121) -#define CRYPTO_GET_PROVIDER_MECHANISMS CRYPTO(122) -#define CRYPTO_GET_PROVIDER_MECHANISM_INFO CRYPTO(123) -#define CRYPTO_INIT_TOKEN CRYPTO(124) -#define CRYPTO_INIT_PIN CRYPTO(125) -#define CRYPTO_SET_PIN CRYPTO(126) - -/* - * No (Key) Store Key Generation Ioctls - */ -typedef struct crypto_nostore_generate_key { - uint_t ngk_return_value; - crypto_session_id_t ngk_session; - crypto_mechanism_t ngk_mechanism; - uint_t ngk_in_count; - uint_t ngk_out_count; - caddr_t ngk_in_attributes; - caddr_t ngk_out_attributes; -} crypto_nostore_generate_key_t; - -typedef struct crypto_nostore_generate_key_pair { - uint_t nkp_return_value; - crypto_session_id_t nkp_session; - uint_t nkp_in_public_count; - uint_t nkp_in_private_count; - uint_t nkp_out_public_count; - uint_t nkp_out_private_count; - caddr_t nkp_in_public_attributes; - caddr_t nkp_in_private_attributes; - caddr_t nkp_out_public_attributes; - caddr_t nkp_out_private_attributes; - crypto_mechanism_t nkp_mechanism; -} crypto_nostore_generate_key_pair_t; - -typedef struct crypto_nostore_derive_key { - uint_t ndk_return_value; - crypto_session_id_t ndk_session; - crypto_mechanism_t ndk_mechanism; - crypto_key_t ndk_base_key; - uint_t ndk_in_count; - uint_t ndk_out_count; - caddr_t ndk_in_attributes; - caddr_t ndk_out_attributes; -} crypto_nostore_derive_key_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_nostore_generate_key32 { - uint32_t ngk_return_value; - crypto_session_id_t ngk_session; - crypto_mechanism32_t ngk_mechanism; - uint32_t ngk_in_count; - uint32_t ngk_out_count; - caddr32_t ngk_in_attributes; - caddr32_t ngk_out_attributes; -} crypto_nostore_generate_key32_t; - -typedef struct crypto_nostore_generate_key_pair32 { - uint32_t nkp_return_value; - crypto_session_id_t nkp_session; - uint32_t nkp_in_public_count; - uint32_t nkp_in_private_count; - uint32_t nkp_out_public_count; - uint32_t nkp_out_private_count; - caddr32_t nkp_in_public_attributes; - caddr32_t nkp_in_private_attributes; - caddr32_t nkp_out_public_attributes; - caddr32_t nkp_out_private_attributes; - crypto_mechanism32_t nkp_mechanism; -} crypto_nostore_generate_key_pair32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack(4) -#endif - -typedef struct crypto_nostore_derive_key32 { - uint32_t ndk_return_value; - crypto_session_id_t ndk_session; - crypto_mechanism32_t ndk_mechanism; - crypto_key32_t ndk_base_key; - uint32_t ndk_in_count; - uint32_t ndk_out_count; - caddr32_t ndk_in_attributes; - caddr32_t ndk_out_attributes; -} crypto_nostore_derive_key32_t; - -#if _LONG_LONG_ALIGNMENT == 8 && _LONG_LONG_ALIGNMENT_32 == 4 -#pragma pack() -#endif - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_NOSTORE_GENERATE_KEY CRYPTO(127) -#define CRYPTO_NOSTORE_GENERATE_KEY_PAIR CRYPTO(128) -#define CRYPTO_NOSTORE_DERIVE_KEY CRYPTO(129) - -/* - * Mechanism Ioctls - */ - -typedef struct crypto_get_mechanism_list { - uint_t ml_return_value; - uint_t ml_count; - crypto_mech_name_t ml_list[1]; -} crypto_get_mechanism_list_t; - -typedef struct crypto_get_all_mechanism_info { - uint_t mi_return_value; - crypto_mech_name_t mi_mechanism_name; - uint_t mi_count; - crypto_mechanism_info_t mi_list[1]; -} crypto_get_all_mechanism_info_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_get_mechanism_list32 { - uint32_t ml_return_value; - uint32_t ml_count; - crypto_mech_name_t ml_list[1]; -} crypto_get_mechanism_list32_t; - -typedef struct crypto_get_all_mechanism_info32 { - uint32_t mi_return_value; - crypto_mech_name_t mi_mechanism_name; - uint32_t mi_count; - crypto_mechanism_info32_t mi_list[1]; -} crypto_get_all_mechanism_info32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_GET_MECHANISM_LIST CRYPTO(140) -#define CRYPTO_GET_ALL_MECHANISM_INFO CRYPTO(141) - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_CRYPTO_IOCTL_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h deleted file mode 100644 index 24babd7755cc..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/crypto/ioctladmin.h +++ /dev/null @@ -1,136 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2005 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_CRYPTO_IOCTLADMIN_H -#define _SYS_CRYPTO_IOCTLADMIN_H - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/zfs_context.h> -#include <sys/crypto/common.h> - -#define ADMIN_IOCTL_DEVICE "/dev/cryptoadm" - -#define CRYPTOADMIN(x) (('y' << 8) | (x)) - -/* - * Administrative IOCTLs - */ - -typedef struct crypto_get_dev_list { - uint_t dl_return_value; - uint_t dl_dev_count; - crypto_dev_list_entry_t dl_devs[1]; -} crypto_get_dev_list_t; - -typedef struct crypto_get_soft_list { - uint_t sl_return_value; - uint_t sl_soft_count; - size_t sl_soft_len; - caddr_t sl_soft_names; -} crypto_get_soft_list_t; - -typedef struct crypto_get_dev_info { - uint_t di_return_value; - char di_dev_name[MAXNAMELEN]; - uint_t di_dev_instance; - uint_t di_count; - crypto_mech_name_t di_list[1]; -} crypto_get_dev_info_t; - -typedef struct crypto_get_soft_info { - uint_t si_return_value; - char si_name[MAXNAMELEN]; - uint_t si_count; - crypto_mech_name_t si_list[1]; -} crypto_get_soft_info_t; - -typedef struct crypto_load_dev_disabled { - uint_t dd_return_value; - char dd_dev_name[MAXNAMELEN]; - uint_t dd_dev_instance; - uint_t dd_count; - crypto_mech_name_t dd_list[1]; -} crypto_load_dev_disabled_t; - -typedef struct crypto_load_soft_disabled { - uint_t sd_return_value; - char sd_name[MAXNAMELEN]; - uint_t sd_count; - crypto_mech_name_t sd_list[1]; -} crypto_load_soft_disabled_t; - -typedef struct crypto_unload_soft_module { - uint_t sm_return_value; - char sm_name[MAXNAMELEN]; -} crypto_unload_soft_module_t; - -typedef struct crypto_load_soft_config { - uint_t sc_return_value; - char sc_name[MAXNAMELEN]; - uint_t sc_count; - crypto_mech_name_t sc_list[1]; -} crypto_load_soft_config_t; - -typedef struct crypto_load_door { - uint_t ld_return_value; - uint_t ld_did; -} crypto_load_door_t; - -#ifdef _KERNEL -#ifdef _SYSCALL32 - -typedef struct crypto_get_soft_list32 { - uint32_t sl_return_value; - uint32_t sl_soft_count; - size32_t sl_soft_len; - caddr32_t sl_soft_names; -} crypto_get_soft_list32_t; - -#endif /* _SYSCALL32 */ -#endif /* _KERNEL */ - -#define CRYPTO_GET_VERSION CRYPTOADMIN(1) -#define CRYPTO_GET_DEV_LIST CRYPTOADMIN(2) -#define CRYPTO_GET_SOFT_LIST CRYPTOADMIN(3) -#define CRYPTO_GET_DEV_INFO CRYPTOADMIN(4) -#define CRYPTO_GET_SOFT_INFO CRYPTOADMIN(5) -#define CRYPTO_LOAD_DEV_DISABLED CRYPTOADMIN(8) -#define CRYPTO_LOAD_SOFT_DISABLED CRYPTOADMIN(9) -#define CRYPTO_UNLOAD_SOFT_MODULE CRYPTOADMIN(10) -#define CRYPTO_LOAD_SOFT_CONFIG CRYPTOADMIN(11) -#define CRYPTO_POOL_CREATE CRYPTOADMIN(12) -#define CRYPTO_POOL_WAIT CRYPTOADMIN(13) -#define CRYPTO_POOL_RUN CRYPTOADMIN(14) -#define CRYPTO_LOAD_DOOR CRYPTOADMIN(15) - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_CRYPTO_IOCTLADMIN_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h deleted file mode 100644 index 230d74b063fc..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/crypto/ops_impl.h +++ /dev/null @@ -1,630 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_CRYPTO_OPS_IMPL_H -#define _SYS_CRYPTO_OPS_IMPL_H - -/* - * Scheduler internal structures. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/zfs_context.h> -#include <sys/crypto/api.h> -#include <sys/crypto/spi.h> -#include <sys/crypto/impl.h> -#include <sys/crypto/common.h> - -/* - * The parameters needed for each function group are batched - * in one structure. This is much simpler than having a - * separate structure for each function. - * - * In some cases, a field is generically named to keep the - * structure small. The comments indicate these cases. - */ -typedef struct kcf_digest_ops_params { - crypto_session_id_t do_sid; - crypto_mech_type_t do_framework_mechtype; - crypto_mechanism_t do_mech; - crypto_data_t *do_data; - crypto_data_t *do_digest; - crypto_key_t *do_digest_key; /* Argument for digest_key() */ -} kcf_digest_ops_params_t; - -typedef struct kcf_mac_ops_params { - crypto_session_id_t mo_sid; - crypto_mech_type_t mo_framework_mechtype; - crypto_mechanism_t mo_mech; - crypto_key_t *mo_key; - crypto_data_t *mo_data; - crypto_data_t *mo_mac; - crypto_spi_ctx_template_t mo_templ; -} kcf_mac_ops_params_t; - -typedef struct kcf_encrypt_ops_params { - crypto_session_id_t eo_sid; - crypto_mech_type_t eo_framework_mechtype; - crypto_mechanism_t eo_mech; - crypto_key_t *eo_key; - crypto_data_t *eo_plaintext; - crypto_data_t *eo_ciphertext; - crypto_spi_ctx_template_t eo_templ; -} kcf_encrypt_ops_params_t; - -typedef struct kcf_decrypt_ops_params { - crypto_session_id_t dop_sid; - crypto_mech_type_t dop_framework_mechtype; - crypto_mechanism_t dop_mech; - crypto_key_t *dop_key; - crypto_data_t *dop_ciphertext; - crypto_data_t *dop_plaintext; - crypto_spi_ctx_template_t dop_templ; -} kcf_decrypt_ops_params_t; - -typedef struct kcf_sign_ops_params { - crypto_session_id_t so_sid; - crypto_mech_type_t so_framework_mechtype; - crypto_mechanism_t so_mech; - crypto_key_t *so_key; - crypto_data_t *so_data; - crypto_data_t *so_signature; - crypto_spi_ctx_template_t so_templ; -} kcf_sign_ops_params_t; - -typedef struct kcf_verify_ops_params { - crypto_session_id_t vo_sid; - crypto_mech_type_t vo_framework_mechtype; - crypto_mechanism_t vo_mech; - crypto_key_t *vo_key; - crypto_data_t *vo_data; - crypto_data_t *vo_signature; - crypto_spi_ctx_template_t vo_templ; -} kcf_verify_ops_params_t; - -typedef struct kcf_encrypt_mac_ops_params { - crypto_session_id_t em_sid; - crypto_mech_type_t em_framework_encr_mechtype; - crypto_mechanism_t em_encr_mech; - crypto_key_t *em_encr_key; - crypto_mech_type_t em_framework_mac_mechtype; - crypto_mechanism_t em_mac_mech; - crypto_key_t *em_mac_key; - crypto_data_t *em_plaintext; - crypto_dual_data_t *em_ciphertext; - crypto_data_t *em_mac; - crypto_spi_ctx_template_t em_encr_templ; - crypto_spi_ctx_template_t em_mac_templ; -} kcf_encrypt_mac_ops_params_t; - -typedef struct kcf_mac_decrypt_ops_params { - crypto_session_id_t md_sid; - crypto_mech_type_t md_framework_mac_mechtype; - crypto_mechanism_t md_mac_mech; - crypto_key_t *md_mac_key; - crypto_mech_type_t md_framework_decr_mechtype; - crypto_mechanism_t md_decr_mech; - crypto_key_t *md_decr_key; - crypto_dual_data_t *md_ciphertext; - crypto_data_t *md_mac; - crypto_data_t *md_plaintext; - crypto_spi_ctx_template_t md_mac_templ; - crypto_spi_ctx_template_t md_decr_templ; -} kcf_mac_decrypt_ops_params_t; - -typedef struct kcf_random_number_ops_params { - crypto_session_id_t rn_sid; - uchar_t *rn_buf; - size_t rn_buflen; - uint_t rn_entropy_est; - uint32_t rn_flags; -} kcf_random_number_ops_params_t; - -/* - * so_pd is useful when the provider descriptor (pd) supplying the - * provider handle is different from the pd supplying the ops vector. - * This is the case for session open/close where so_pd can be the pd - * of a logical provider. The pd supplying the ops vector is passed - * as an argument to kcf_submit_request(). - */ -typedef struct kcf_session_ops_params { - crypto_session_id_t *so_sid_ptr; - crypto_session_id_t so_sid; - crypto_user_type_t so_user_type; - char *so_pin; - size_t so_pin_len; - kcf_provider_desc_t *so_pd; -} kcf_session_ops_params_t; - -typedef struct kcf_object_ops_params { - crypto_session_id_t oo_sid; - crypto_object_id_t oo_object_id; - crypto_object_attribute_t *oo_template; - uint_t oo_attribute_count; - crypto_object_id_t *oo_object_id_ptr; - size_t *oo_object_size; - void **oo_find_init_pp_ptr; - void *oo_find_pp; - uint_t oo_max_object_count; - uint_t *oo_object_count_ptr; -} kcf_object_ops_params_t; - -/* - * ko_key is used to encode wrapping key in key_wrap() and - * unwrapping key in key_unwrap(). ko_key_template and - * ko_key_attribute_count are used to encode public template - * and public template attr count in key_generate_pair(). - * kops->ko_key_object_id_ptr is used to encode public key - * in key_generate_pair(). - */ -typedef struct kcf_key_ops_params { - crypto_session_id_t ko_sid; - crypto_mech_type_t ko_framework_mechtype; - crypto_mechanism_t ko_mech; - crypto_object_attribute_t *ko_key_template; - uint_t ko_key_attribute_count; - crypto_object_id_t *ko_key_object_id_ptr; - crypto_object_attribute_t *ko_private_key_template; - uint_t ko_private_key_attribute_count; - crypto_object_id_t *ko_private_key_object_id_ptr; - crypto_key_t *ko_key; - uchar_t *ko_wrapped_key; - size_t *ko_wrapped_key_len_ptr; - crypto_object_attribute_t *ko_out_template1; - crypto_object_attribute_t *ko_out_template2; - uint_t ko_out_attribute_count1; - uint_t ko_out_attribute_count2; -} kcf_key_ops_params_t; - -/* - * po_pin and po_pin_len are used to encode new_pin and new_pin_len - * when wrapping set_pin() function parameters. - * - * po_pd is useful when the provider descriptor (pd) supplying the - * provider handle is different from the pd supplying the ops vector. - * This is true for the ext_info provider entry point where po_pd - * can be the pd of a logical provider. The pd supplying the ops vector - * is passed as an argument to kcf_submit_request(). - */ -typedef struct kcf_provmgmt_ops_params { - crypto_session_id_t po_sid; - char *po_pin; - size_t po_pin_len; - char *po_old_pin; - size_t po_old_pin_len; - char *po_label; - crypto_provider_ext_info_t *po_ext_info; - kcf_provider_desc_t *po_pd; -} kcf_provmgmt_ops_params_t; - -/* - * The operation type within a function group. - */ -typedef enum kcf_op_type { - /* common ops for all mechanisms */ - KCF_OP_INIT = 1, - KCF_OP_SINGLE, /* pkcs11 sense. So, INIT is already done */ - KCF_OP_UPDATE, - KCF_OP_FINAL, - KCF_OP_ATOMIC, - - /* digest_key op */ - KCF_OP_DIGEST_KEY, - - /* mac specific op */ - KCF_OP_MAC_VERIFY_ATOMIC, - - /* mac/cipher specific op */ - KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC, - - /* sign_recover ops */ - KCF_OP_SIGN_RECOVER_INIT, - KCF_OP_SIGN_RECOVER, - KCF_OP_SIGN_RECOVER_ATOMIC, - - /* verify_recover ops */ - KCF_OP_VERIFY_RECOVER_INIT, - KCF_OP_VERIFY_RECOVER, - KCF_OP_VERIFY_RECOVER_ATOMIC, - - /* random number ops */ - KCF_OP_RANDOM_SEED, - KCF_OP_RANDOM_GENERATE, - - /* session management ops */ - KCF_OP_SESSION_OPEN, - KCF_OP_SESSION_CLOSE, - KCF_OP_SESSION_LOGIN, - KCF_OP_SESSION_LOGOUT, - - /* object management ops */ - KCF_OP_OBJECT_CREATE, - KCF_OP_OBJECT_COPY, - KCF_OP_OBJECT_DESTROY, - KCF_OP_OBJECT_GET_SIZE, - KCF_OP_OBJECT_GET_ATTRIBUTE_VALUE, - KCF_OP_OBJECT_SET_ATTRIBUTE_VALUE, - KCF_OP_OBJECT_FIND_INIT, - KCF_OP_OBJECT_FIND, - KCF_OP_OBJECT_FIND_FINAL, - - /* key management ops */ - KCF_OP_KEY_GENERATE, - KCF_OP_KEY_GENERATE_PAIR, - KCF_OP_KEY_WRAP, - KCF_OP_KEY_UNWRAP, - KCF_OP_KEY_DERIVE, - KCF_OP_KEY_CHECK, - - /* provider management ops */ - KCF_OP_MGMT_EXTINFO, - KCF_OP_MGMT_INITTOKEN, - KCF_OP_MGMT_INITPIN, - KCF_OP_MGMT_SETPIN -} kcf_op_type_t; - -/* - * The operation groups that need wrapping of parameters. This is somewhat - * similar to the function group type in spi.h except that this also includes - * all the functions that don't have a mechanism. - * - * The wrapper macros should never take these enum values as an argument. - * Rather, they are assigned in the macro itself since they are known - * from the macro name. - */ -typedef enum kcf_op_group { - KCF_OG_DIGEST = 1, - KCF_OG_MAC, - KCF_OG_ENCRYPT, - KCF_OG_DECRYPT, - KCF_OG_SIGN, - KCF_OG_VERIFY, - KCF_OG_ENCRYPT_MAC, - KCF_OG_MAC_DECRYPT, - KCF_OG_RANDOM, - KCF_OG_SESSION, - KCF_OG_OBJECT, - KCF_OG_KEY, - KCF_OG_PROVMGMT, - KCF_OG_NOSTORE_KEY -} kcf_op_group_t; - -/* - * The kcf_op_type_t enum values used here should be only for those - * operations for which there is a k-api routine in sys/crypto/api.h. - */ -#define IS_INIT_OP(ftype) ((ftype) == KCF_OP_INIT) -#define IS_SINGLE_OP(ftype) ((ftype) == KCF_OP_SINGLE) -#define IS_UPDATE_OP(ftype) ((ftype) == KCF_OP_UPDATE) -#define IS_FINAL_OP(ftype) ((ftype) == KCF_OP_FINAL) -#define IS_ATOMIC_OP(ftype) ( \ - (ftype) == KCF_OP_ATOMIC || (ftype) == KCF_OP_MAC_VERIFY_ATOMIC || \ - (ftype) == KCF_OP_MAC_VERIFY_DECRYPT_ATOMIC || \ - (ftype) == KCF_OP_SIGN_RECOVER_ATOMIC || \ - (ftype) == KCF_OP_VERIFY_RECOVER_ATOMIC) - -/* - * Keep the parameters associated with a request around. - * We need to pass them to the SPI. - */ -typedef struct kcf_req_params { - kcf_op_group_t rp_opgrp; - kcf_op_type_t rp_optype; - - union { - kcf_digest_ops_params_t digest_params; - kcf_mac_ops_params_t mac_params; - kcf_encrypt_ops_params_t encrypt_params; - kcf_decrypt_ops_params_t decrypt_params; - kcf_sign_ops_params_t sign_params; - kcf_verify_ops_params_t verify_params; - kcf_encrypt_mac_ops_params_t encrypt_mac_params; - kcf_mac_decrypt_ops_params_t mac_decrypt_params; - kcf_random_number_ops_params_t random_number_params; - kcf_session_ops_params_t session_params; - kcf_object_ops_params_t object_params; - kcf_key_ops_params_t key_params; - kcf_provmgmt_ops_params_t provmgmt_params; - } rp_u; -} kcf_req_params_t; - - -/* - * The ioctl/k-api code should bundle the parameters into a kcf_req_params_t - * structure before calling a scheduler routine. The following macros are - * available for that purpose. - * - * For the most part, the macro arguments closely correspond to the - * function parameters. In some cases, we use generic names. The comments - * for the structure should indicate these cases. - */ -#define KCF_WRAP_DIGEST_OPS_PARAMS(req, ftype, _sid, _mech, _key, \ - _data, _digest) { \ - kcf_digest_ops_params_t *dops = &(req)->rp_u.digest_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_DIGEST; \ - (req)->rp_optype = ftype; \ - dops->do_sid = _sid; \ - if (mechp != NULL) { \ - dops->do_mech = *mechp; \ - dops->do_framework_mechtype = mechp->cm_type; \ - } \ - dops->do_digest_key = _key; \ - dops->do_data = _data; \ - dops->do_digest = _digest; \ -} - -#define KCF_WRAP_MAC_OPS_PARAMS(req, ftype, _sid, _mech, _key, \ - _data, _mac, _templ) { \ - kcf_mac_ops_params_t *mops = &(req)->rp_u.mac_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_MAC; \ - (req)->rp_optype = ftype; \ - mops->mo_sid = _sid; \ - if (mechp != NULL) { \ - mops->mo_mech = *mechp; \ - mops->mo_framework_mechtype = mechp->cm_type; \ - } \ - mops->mo_key = _key; \ - mops->mo_data = _data; \ - mops->mo_mac = _mac; \ - mops->mo_templ = _templ; \ -} - -#define KCF_WRAP_ENCRYPT_OPS_PARAMS(req, ftype, _sid, _mech, _key, \ - _plaintext, _ciphertext, _templ) { \ - kcf_encrypt_ops_params_t *cops = &(req)->rp_u.encrypt_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_ENCRYPT; \ - (req)->rp_optype = ftype; \ - cops->eo_sid = _sid; \ - if (mechp != NULL) { \ - cops->eo_mech = *mechp; \ - cops->eo_framework_mechtype = mechp->cm_type; \ - } \ - cops->eo_key = _key; \ - cops->eo_plaintext = _plaintext; \ - cops->eo_ciphertext = _ciphertext; \ - cops->eo_templ = _templ; \ -} - -#define KCF_WRAP_DECRYPT_OPS_PARAMS(req, ftype, _sid, _mech, _key, \ - _ciphertext, _plaintext, _templ) { \ - kcf_decrypt_ops_params_t *cops = &(req)->rp_u.decrypt_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_DECRYPT; \ - (req)->rp_optype = ftype; \ - cops->dop_sid = _sid; \ - if (mechp != NULL) { \ - cops->dop_mech = *mechp; \ - cops->dop_framework_mechtype = mechp->cm_type; \ - } \ - cops->dop_key = _key; \ - cops->dop_ciphertext = _ciphertext; \ - cops->dop_plaintext = _plaintext; \ - cops->dop_templ = _templ; \ -} - -#define KCF_WRAP_SIGN_OPS_PARAMS(req, ftype, _sid, _mech, _key, \ - _data, _signature, _templ) { \ - kcf_sign_ops_params_t *sops = &(req)->rp_u.sign_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_SIGN; \ - (req)->rp_optype = ftype; \ - sops->so_sid = _sid; \ - if (mechp != NULL) { \ - sops->so_mech = *mechp; \ - sops->so_framework_mechtype = mechp->cm_type; \ - } \ - sops->so_key = _key; \ - sops->so_data = _data; \ - sops->so_signature = _signature; \ - sops->so_templ = _templ; \ -} - -#define KCF_WRAP_VERIFY_OPS_PARAMS(req, ftype, _sid, _mech, _key, \ - _data, _signature, _templ) { \ - kcf_verify_ops_params_t *vops = &(req)->rp_u.verify_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_VERIFY; \ - (req)->rp_optype = ftype; \ - vops->vo_sid = _sid; \ - if (mechp != NULL) { \ - vops->vo_mech = *mechp; \ - vops->vo_framework_mechtype = mechp->cm_type; \ - } \ - vops->vo_key = _key; \ - vops->vo_data = _data; \ - vops->vo_signature = _signature; \ - vops->vo_templ = _templ; \ -} - -#define KCF_WRAP_ENCRYPT_MAC_OPS_PARAMS(req, ftype, _sid, _encr_key, \ - _mac_key, _plaintext, _ciphertext, _mac, _encr_templ, _mac_templ) { \ - kcf_encrypt_mac_ops_params_t *cmops = &(req)->rp_u.encrypt_mac_params; \ - \ - (req)->rp_opgrp = KCF_OG_ENCRYPT_MAC; \ - (req)->rp_optype = ftype; \ - cmops->em_sid = _sid; \ - cmops->em_encr_key = _encr_key; \ - cmops->em_mac_key = _mac_key; \ - cmops->em_plaintext = _plaintext; \ - cmops->em_ciphertext = _ciphertext; \ - cmops->em_mac = _mac; \ - cmops->em_encr_templ = _encr_templ; \ - cmops->em_mac_templ = _mac_templ; \ -} - -#define KCF_WRAP_MAC_DECRYPT_OPS_PARAMS(req, ftype, _sid, _mac_key, \ - _decr_key, _ciphertext, _mac, _plaintext, _mac_templ, _decr_templ) { \ - kcf_mac_decrypt_ops_params_t *cmops = &(req)->rp_u.mac_decrypt_params; \ - \ - (req)->rp_opgrp = KCF_OG_MAC_DECRYPT; \ - (req)->rp_optype = ftype; \ - cmops->md_sid = _sid; \ - cmops->md_mac_key = _mac_key; \ - cmops->md_decr_key = _decr_key; \ - cmops->md_ciphertext = _ciphertext; \ - cmops->md_mac = _mac; \ - cmops->md_plaintext = _plaintext; \ - cmops->md_mac_templ = _mac_templ; \ - cmops->md_decr_templ = _decr_templ; \ -} - -#define KCF_WRAP_RANDOM_OPS_PARAMS(req, ftype, _sid, _buf, _buflen, \ - _est, _flags) { \ - kcf_random_number_ops_params_t *rops = \ - &(req)->rp_u.random_number_params; \ - \ - (req)->rp_opgrp = KCF_OG_RANDOM; \ - (req)->rp_optype = ftype; \ - rops->rn_sid = _sid; \ - rops->rn_buf = _buf; \ - rops->rn_buflen = _buflen; \ - rops->rn_entropy_est = _est; \ - rops->rn_flags = _flags; \ -} - -#define KCF_WRAP_SESSION_OPS_PARAMS(req, ftype, _sid_ptr, _sid, \ - _user_type, _pin, _pin_len, _pd) { \ - kcf_session_ops_params_t *sops = &(req)->rp_u.session_params; \ - \ - (req)->rp_opgrp = KCF_OG_SESSION; \ - (req)->rp_optype = ftype; \ - sops->so_sid_ptr = _sid_ptr; \ - sops->so_sid = _sid; \ - sops->so_user_type = _user_type; \ - sops->so_pin = _pin; \ - sops->so_pin_len = _pin_len; \ - sops->so_pd = _pd; \ -} - -#define KCF_WRAP_OBJECT_OPS_PARAMS(req, ftype, _sid, _object_id, \ - _template, _attribute_count, _object_id_ptr, _object_size, \ - _find_init_pp_ptr, _find_pp, _max_object_count, _object_count_ptr) { \ - kcf_object_ops_params_t *jops = &(req)->rp_u.object_params; \ - \ - (req)->rp_opgrp = KCF_OG_OBJECT; \ - (req)->rp_optype = ftype; \ - jops->oo_sid = _sid; \ - jops->oo_object_id = _object_id; \ - jops->oo_template = _template; \ - jops->oo_attribute_count = _attribute_count; \ - jops->oo_object_id_ptr = _object_id_ptr; \ - jops->oo_object_size = _object_size; \ - jops->oo_find_init_pp_ptr = _find_init_pp_ptr; \ - jops->oo_find_pp = _find_pp; \ - jops->oo_max_object_count = _max_object_count; \ - jops->oo_object_count_ptr = _object_count_ptr; \ -} - -#define KCF_WRAP_KEY_OPS_PARAMS(req, ftype, _sid, _mech, _key_template, \ - _key_attribute_count, _key_object_id_ptr, _private_key_template, \ - _private_key_attribute_count, _private_key_object_id_ptr, \ - _key, _wrapped_key, _wrapped_key_len_ptr) { \ - kcf_key_ops_params_t *kops = &(req)->rp_u.key_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_KEY; \ - (req)->rp_optype = ftype; \ - kops->ko_sid = _sid; \ - if (mechp != NULL) { \ - kops->ko_mech = *mechp; \ - kops->ko_framework_mechtype = mechp->cm_type; \ - } \ - kops->ko_key_template = _key_template; \ - kops->ko_key_attribute_count = _key_attribute_count; \ - kops->ko_key_object_id_ptr = _key_object_id_ptr; \ - kops->ko_private_key_template = _private_key_template; \ - kops->ko_private_key_attribute_count = _private_key_attribute_count; \ - kops->ko_private_key_object_id_ptr = _private_key_object_id_ptr; \ - kops->ko_key = _key; \ - kops->ko_wrapped_key = _wrapped_key; \ - kops->ko_wrapped_key_len_ptr = _wrapped_key_len_ptr; \ -} - -#define KCF_WRAP_PROVMGMT_OPS_PARAMS(req, ftype, _sid, _old_pin, \ - _old_pin_len, _pin, _pin_len, _label, _ext_info, _pd) { \ - kcf_provmgmt_ops_params_t *pops = &(req)->rp_u.provmgmt_params; \ - \ - (req)->rp_opgrp = KCF_OG_PROVMGMT; \ - (req)->rp_optype = ftype; \ - pops->po_sid = _sid; \ - pops->po_pin = _pin; \ - pops->po_pin_len = _pin_len; \ - pops->po_old_pin = _old_pin; \ - pops->po_old_pin_len = _old_pin_len; \ - pops->po_label = _label; \ - pops->po_ext_info = _ext_info; \ - pops->po_pd = _pd; \ -} - -#define KCF_WRAP_NOSTORE_KEY_OPS_PARAMS(req, ftype, _sid, _mech, \ - _key_template, _key_attribute_count, _private_key_template, \ - _private_key_attribute_count, _key, _out_template1, \ - _out_attribute_count1, _out_template2, _out_attribute_count2) { \ - kcf_key_ops_params_t *kops = &(req)->rp_u.key_params; \ - crypto_mechanism_t *mechp = _mech; \ - \ - (req)->rp_opgrp = KCF_OG_NOSTORE_KEY; \ - (req)->rp_optype = ftype; \ - kops->ko_sid = _sid; \ - if (mechp != NULL) { \ - kops->ko_mech = *mechp; \ - kops->ko_framework_mechtype = mechp->cm_type; \ - } \ - kops->ko_key_template = _key_template; \ - kops->ko_key_attribute_count = _key_attribute_count; \ - kops->ko_key_object_id_ptr = NULL; \ - kops->ko_private_key_template = _private_key_template; \ - kops->ko_private_key_attribute_count = _private_key_attribute_count; \ - kops->ko_private_key_object_id_ptr = NULL; \ - kops->ko_key = _key; \ - kops->ko_wrapped_key = NULL; \ - kops->ko_wrapped_key_len_ptr = 0; \ - kops->ko_out_template1 = _out_template1; \ - kops->ko_out_template2 = _out_template2; \ - kops->ko_out_attribute_count1 = _out_attribute_count1; \ - kops->ko_out_attribute_count2 = _out_attribute_count2; \ -} - -#define KCF_SET_PROVIDER_MECHNUM(fmtype, pd, mechp) \ - (mechp)->cm_type = \ - KCF_TO_PROV_MECHNUM(pd, fmtype); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_CRYPTO_OPS_IMPL_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h index 85ea0ba1d092..355c1a87faa4 100644 --- a/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h +++ b/sys/contrib/openzfs/module/icp/include/sys/crypto/sched_impl.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -39,60 +39,6 @@ extern "C" { #include <sys/crypto/spi.h> #include <sys/crypto/impl.h> #include <sys/crypto/common.h> -#include <sys/crypto/ops_impl.h> - -typedef void (kcf_func_t)(void *, int); - -typedef enum kcf_req_status { - REQ_ALLOCATED = 1, - REQ_WAITING, /* At the framework level */ - REQ_INPROGRESS, /* At the provider level */ - REQ_DONE, - REQ_CANCELED -} kcf_req_status_t; - -typedef enum kcf_call_type { - CRYPTO_SYNCH = 1, - CRYPTO_ASYNCH -} kcf_call_type_t; - -#define CHECK_RESTRICT(crq) (crq != NULL && \ - ((crq)->cr_flag & CRYPTO_RESTRICTED)) - -#define CHECK_RESTRICT_FALSE B_FALSE - -#define CHECK_FASTPATH(crq, pd) ((crq) == NULL || \ - !((crq)->cr_flag & CRYPTO_ALWAYS_QUEUE)) && \ - (pd)->pd_prov_type == CRYPTO_SW_PROVIDER - -#define KCF_KMFLAG(crq) (((crq) == NULL) ? KM_SLEEP : KM_NOSLEEP) - -/* - * The framework keeps an internal handle to use in the adaptive - * asynchronous case. This is the case when a client has the - * CRYPTO_ALWAYS_QUEUE bit clear and a software provider is used for - * the request. The request is completed in the context of the calling - * thread and kernel memory must be allocated with KM_NOSLEEP. - * - * The framework passes a pointer to the handle in crypto_req_handle_t - * argument when it calls the SPI of the software provider. The macros - * KCF_RHNDL() and KCF_SWFP_RHNDL() are used to do this. - * - * When a provider asks the framework for kmflag value via - * crypto_kmflag(9S) we use REQHNDL2_KMFLAG() macro. - */ -extern ulong_t kcf_swprov_hndl; -#define KCF_RHNDL(kmflag) (((kmflag) == KM_SLEEP) ? NULL : &kcf_swprov_hndl) -#define KCF_SWFP_RHNDL(crq) (((crq) == NULL) ? NULL : &kcf_swprov_hndl) -#define REQHNDL2_KMFLAG(rhndl) \ - ((rhndl == &kcf_swprov_hndl) ? KM_NOSLEEP : KM_SLEEP) - -/* Internal call_req flags. They start after the public ones in api.h */ - -#define CRYPTO_SETDUAL 0x00001000 /* Set the 'cont' boolean before */ - /* submitting the request */ -#define KCF_ISDUALREQ(crq) \ - (((crq) == NULL) ? B_FALSE : (crq->cr_flag & CRYPTO_SETDUAL)) typedef struct kcf_prov_tried { kcf_provider_desc_t *pt_pd; @@ -106,178 +52,8 @@ typedef struct kcf_prov_tried { (tlist != NULL && is_in_triedlist(pd, tlist)) #define IS_RECOVERABLE(error) \ - (error == CRYPTO_BUFFER_TOO_BIG || \ - error == CRYPTO_BUSY || \ - error == CRYPTO_DEVICE_ERROR || \ - error == CRYPTO_DEVICE_MEMORY || \ - error == CRYPTO_KEY_SIZE_RANGE || \ - error == CRYPTO_NO_PERMISSION) - -#define KCF_ATOMIC_INCR(x) atomic_add_32(&(x), 1) -#define KCF_ATOMIC_DECR(x) atomic_add_32(&(x), -1) - -/* - * Node structure for synchronous requests. - */ -typedef struct kcf_sreq_node { - /* Should always be the first field in this structure */ - kcf_call_type_t sn_type; - /* - * sn_cv and sr_lock are used to wait for the - * operation to complete. sn_lock also protects - * the sn_state field. - */ - kcondvar_t sn_cv; - kmutex_t sn_lock; - kcf_req_status_t sn_state; - - /* - * Return value from the operation. This will be - * one of the CRYPTO_* errors defined in common.h. - */ - int sn_rv; - - /* - * parameters to call the SPI with. This can be - * a pointer as we know the caller context/stack stays. - */ - struct kcf_req_params *sn_params; - - /* Internal context for this request */ - struct kcf_context *sn_context; - - /* Provider handling this request */ - kcf_provider_desc_t *sn_provider; -} kcf_sreq_node_t; - -/* - * Node structure for asynchronous requests. A node can be on - * on a chain of requests hanging of the internal context - * structure and can be in the global software provider queue. - */ -typedef struct kcf_areq_node { - /* Should always be the first field in this structure */ - kcf_call_type_t an_type; - - /* an_lock protects the field an_state */ - kmutex_t an_lock; - kcf_req_status_t an_state; - crypto_call_req_t an_reqarg; - - /* - * parameters to call the SPI with. We need to - * save the params since the caller stack can go away. - */ - struct kcf_req_params an_params; - - /* - * The next two fields should be NULL for operations that - * don't need a context. - */ - /* Internal context for this request */ - struct kcf_context *an_context; - - /* next in chain of requests for context */ - struct kcf_areq_node *an_ctxchain_next; - - kcondvar_t an_turn_cv; - boolean_t an_is_my_turn; - boolean_t an_isdual; /* for internal reuse */ - - /* - * Next and previous nodes in the global software - * queue. These fields are NULL for a hardware - * provider since we use a taskq there. - */ - struct kcf_areq_node *an_next; - struct kcf_areq_node *an_prev; - - /* Provider handling this request */ - kcf_provider_desc_t *an_provider; - kcf_prov_tried_t *an_tried_plist; - - struct kcf_areq_node *an_idnext; /* Next in ID hash */ - struct kcf_areq_node *an_idprev; /* Prev in ID hash */ - kcondvar_t an_done; /* Signal request completion */ - uint_t an_refcnt; -} kcf_areq_node_t; - -#define KCF_AREQ_REFHOLD(areq) { \ - atomic_add_32(&(areq)->an_refcnt, 1); \ - ASSERT((areq)->an_refcnt != 0); \ -} - -#define KCF_AREQ_REFRELE(areq) { \ - ASSERT((areq)->an_refcnt != 0); \ - membar_exit(); \ - if (atomic_add_32_nv(&(areq)->an_refcnt, -1) == 0) \ - kcf_free_req(areq); \ -} - -#define GET_REQ_TYPE(arg) *((kcf_call_type_t *)(arg)) - -#define NOTIFY_CLIENT(areq, err) (*(areq)->an_reqarg.cr_callback_func)(\ - (areq)->an_reqarg.cr_callback_arg, err); - -/* For internally generated call requests for dual operations */ -typedef struct kcf_call_req { - crypto_call_req_t kr_callreq; /* external client call req */ - kcf_req_params_t kr_params; /* Params saved for next call */ - kcf_areq_node_t *kr_areq; /* Use this areq */ - off_t kr_saveoffset; - size_t kr_savelen; -} kcf_dual_req_t; - -/* - * The following are some what similar to macros in callo.h, which implement - * callout tables. - * - * The lower four bits of the ID are used to encode the table ID to - * index in to. The REQID_COUNTER_HIGH bit is used to avoid any check for - * wrap around when generating ID. We assume that there won't be a request - * which takes more time than 2^^(sizeof (long) - 5) other requests submitted - * after it. This ensures there won't be any ID collision. - */ -#define REQID_COUNTER_HIGH (1UL << (8 * sizeof (long) - 1)) -#define REQID_COUNTER_SHIFT 4 -#define REQID_COUNTER_LOW (1 << REQID_COUNTER_SHIFT) -#define REQID_TABLES 16 -#define REQID_TABLE_MASK (REQID_TABLES - 1) - -#define REQID_BUCKETS 512 -#define REQID_BUCKET_MASK (REQID_BUCKETS - 1) -#define REQID_HASH(id) (((id) >> REQID_COUNTER_SHIFT) & REQID_BUCKET_MASK) - -#define GET_REQID(areq) (areq)->an_reqarg.cr_reqid -#define SET_REQID(areq, val) GET_REQID(areq) = val - -/* - * Hash table for async requests. - */ -typedef struct kcf_reqid_table { - kmutex_t rt_lock; - crypto_req_id_t rt_curid; - kcf_areq_node_t *rt_idhash[REQID_BUCKETS]; -} kcf_reqid_table_t; - -/* - * Global software provider queue structure. Requests to be - * handled by a SW provider and have the ALWAYS_QUEUE flag set - * get queued here. - */ -typedef struct kcf_global_swq { - /* - * gs_cv and gs_lock are used to wait for new requests. - * gs_lock protects the changes to the queue. - */ - kcondvar_t gs_cv; - kmutex_t gs_lock; - uint_t gs_njobs; - uint_t gs_maxjobs; - kcf_areq_node_t *gs_first; - kcf_areq_node_t *gs_last; -} kcf_global_swq_t; - + (error == CRYPTO_BUSY || \ + error == CRYPTO_KEY_SIZE_RANGE) /* * Internal representation of a canonical context. We contain crypto_ctx_t @@ -287,47 +63,27 @@ typedef struct kcf_global_swq { typedef struct kcf_context { crypto_ctx_t kc_glbl_ctx; uint_t kc_refcnt; - kmutex_t kc_in_use_lock; - /* - * kc_req_chain_first and kc_req_chain_last are used to chain - * multiple async requests using the same context. They should be - * NULL for sync requests. - */ - kcf_areq_node_t *kc_req_chain_first; - kcf_areq_node_t *kc_req_chain_last; kcf_provider_desc_t *kc_prov_desc; /* Prov. descriptor */ kcf_provider_desc_t *kc_sw_prov_desc; /* Prov. descriptor */ - kcf_mech_entry_t *kc_mech; - struct kcf_context *kc_secondctx; /* for dual contexts */ } kcf_context_t; /* - * Bump up the reference count on the framework private context. A - * global context or a request that references this structure should - * do a hold. - */ -#define KCF_CONTEXT_REFHOLD(ictx) { \ - atomic_add_32(&(ictx)->kc_refcnt, 1); \ - ASSERT((ictx)->kc_refcnt != 0); \ -} - -/* * Decrement the reference count on the framework private context. * When the last reference is released, the framework private * context structure is freed along with the global context. */ #define KCF_CONTEXT_REFRELE(ictx) { \ - ASSERT((ictx)->kc_refcnt != 0); \ - membar_exit(); \ - if (atomic_add_32_nv(&(ictx)->kc_refcnt, -1) == 0) \ + membar_producer(); \ + int newval = atomic_add_32_nv(&(ictx)->kc_refcnt, -1); \ + ASSERT(newval != -1); \ + if (newval == 0) \ kcf_free_context(ictx); \ } /* - * Check if we can release the context now. In case of CRYPTO_QUEUED - * we do not release it as we can do it only after the provider notified - * us. In case of CRYPTO_BUSY, the client can retry the request using - * the context, so we do not release the context. + * Check if we can release the context now. In case of CRYPTO_BUSY, + * the client can retry the request using the context, + * so we do not release the context. * * This macro should be called only from the final routine in * an init/update/final sequence. We do not release the context in case @@ -345,185 +101,33 @@ typedef struct kcf_context { * This macro determines whether we're done with a context. */ #define KCF_CONTEXT_DONE(rv) \ - ((rv) != CRYPTO_QUEUED && (rv) != CRYPTO_BUSY && \ - (rv) != CRYPTO_BUFFER_TOO_SMALL) + ((rv) != CRYPTO_BUSY && (rv) != CRYPTO_BUFFER_TOO_SMALL) + + +#define KCF_SET_PROVIDER_MECHNUM(fmtype, pd, mechp) \ + (mechp)->cm_type = \ + KCF_TO_PROV_MECHNUM(pd, fmtype); /* * A crypto_ctx_template_t is internally a pointer to this struct */ typedef struct kcf_ctx_template { - crypto_kcf_provider_handle_t ct_prov_handle; /* provider handle */ - uint_t ct_generation; /* generation # */ size_t ct_size; /* for freeing */ crypto_spi_ctx_template_t ct_prov_tmpl; /* context template */ - /* from the SW prov */ + /* from the provider */ } kcf_ctx_template_t; -/* - * Structure for pool of threads working on global software queue. - */ -typedef struct kcf_pool { - uint32_t kp_threads; /* Number of threads in pool */ - uint32_t kp_idlethreads; /* Idle threads in pool */ - uint32_t kp_blockedthreads; /* Blocked threads in pool */ - - /* - * cv & lock to monitor the condition when no threads - * are around. In this case the failover thread kicks in. - */ - kcondvar_t kp_nothr_cv; - kmutex_t kp_thread_lock; - - /* Userspace thread creator variables. */ - boolean_t kp_signal_create_thread; /* Create requested flag */ - int kp_nthrs; /* # of threads to create */ - boolean_t kp_user_waiting; /* Thread waiting for work */ - - /* - * cv & lock for the condition where more threads need to be - * created. kp_user_lock also protects the three fields above. - */ - kcondvar_t kp_user_cv; /* Creator cond. variable */ - kmutex_t kp_user_lock; /* Creator lock */ -} kcf_pool_t; - - -/* - * State of a crypto bufcall element. - */ -typedef enum cbuf_state { - CBUF_FREE = 1, - CBUF_WAITING, - CBUF_RUNNING -} cbuf_state_t; - -/* - * Structure of a crypto bufcall element. - */ -typedef struct kcf_cbuf_elem { - /* - * lock and cv to wait for CBUF_RUNNING to be done - * kc_lock also protects kc_state. - */ - kmutex_t kc_lock; - kcondvar_t kc_cv; - cbuf_state_t kc_state; - - struct kcf_cbuf_elem *kc_next; - struct kcf_cbuf_elem *kc_prev; - void (*kc_func)(void *arg); - void *kc_arg; -} kcf_cbuf_elem_t; - -/* - * State of a notify element. - */ -typedef enum ntfy_elem_state { - NTFY_WAITING = 1, - NTFY_RUNNING -} ntfy_elem_state_t; - -/* - * Structure of a notify list element. - */ -typedef struct kcf_ntfy_elem { - /* - * lock and cv to wait for NTFY_RUNNING to be done. - * kn_lock also protects kn_state. - */ - kmutex_t kn_lock; - kcondvar_t kn_cv; - ntfy_elem_state_t kn_state; - - struct kcf_ntfy_elem *kn_next; - struct kcf_ntfy_elem *kn_prev; - - crypto_notify_callback_t kn_func; - uint32_t kn_event_mask; -} kcf_ntfy_elem_t; - - -/* - * The following values are based on the assumption that it would - * take around eight cpus to load a hardware provider (This is true for - * at least one product) and a kernel client may come from different - * low-priority interrupt levels. We will have CRYPTO_TASKQ_MIN number - * of cached taskq entries. The CRYPTO_TASKQ_MAX number is based on - * a throughput of 1GB/s using 512-byte buffers. These are just - * reasonable estimates and might need to change in future. - */ -#define CRYPTO_TASKQ_THREADS 8 -#define CRYPTO_TASKQ_MIN 64 -#define CRYPTO_TASKQ_MAX 2 * 1024 * 1024 - -extern int crypto_taskq_threads; -extern int crypto_taskq_minalloc; -extern int crypto_taskq_maxalloc; -extern kcf_global_swq_t *gswq; -extern int kcf_maxthreads; -extern int kcf_minthreads; - -/* - * All pending crypto bufcalls are put on a list. cbuf_list_lock - * protects changes to this list. - */ -extern kmutex_t cbuf_list_lock; -extern kcondvar_t cbuf_list_cv; - -/* - * All event subscribers are put on a list. kcf_notify_list_lock - * protects changes to this list. - */ -extern kmutex_t ntfy_list_lock; -extern kcondvar_t ntfy_list_cv; - -boolean_t kcf_get_next_logical_provider_member(kcf_provider_desc_t *, - kcf_provider_desc_t *, kcf_provider_desc_t **); -extern int kcf_get_hardware_provider(crypto_mech_type_t, crypto_mech_type_t, - boolean_t, kcf_provider_desc_t *, kcf_provider_desc_t **, - crypto_func_group_t); -extern int kcf_get_hardware_provider_nomech(offset_t, offset_t, - boolean_t, kcf_provider_desc_t *, kcf_provider_desc_t **); extern void kcf_free_triedlist(kcf_prov_tried_t *); extern kcf_prov_tried_t *kcf_insert_triedlist(kcf_prov_tried_t **, kcf_provider_desc_t *, int); extern kcf_provider_desc_t *kcf_get_mech_provider(crypto_mech_type_t, - kcf_mech_entry_t **, int *, kcf_prov_tried_t *, crypto_func_group_t, - boolean_t, size_t); -extern kcf_provider_desc_t *kcf_get_dual_provider(crypto_mechanism_t *, - crypto_mechanism_t *, kcf_mech_entry_t **, crypto_mech_type_t *, - crypto_mech_type_t *, int *, kcf_prov_tried_t *, - crypto_func_group_t, crypto_func_group_t, boolean_t, size_t); -extern crypto_ctx_t *kcf_new_ctx(crypto_call_req_t *, kcf_provider_desc_t *, - crypto_session_id_t); -extern int kcf_submit_request(kcf_provider_desc_t *, crypto_ctx_t *, - crypto_call_req_t *, kcf_req_params_t *, boolean_t); + kcf_mech_entry_t **, int *, kcf_prov_tried_t *, crypto_func_group_t); +extern crypto_ctx_t *kcf_new_ctx(kcf_provider_desc_t *); extern void kcf_sched_destroy(void); extern void kcf_sched_init(void); -extern void kcf_sched_start(void); -extern void kcf_sop_done(kcf_sreq_node_t *, int); -extern void kcf_aop_done(kcf_areq_node_t *, int); -extern int common_submit_request(kcf_provider_desc_t *, - crypto_ctx_t *, kcf_req_params_t *, crypto_req_handle_t); extern void kcf_free_context(kcf_context_t *); -extern int kcf_svc_wait(int *); -extern int kcf_svc_do_run(void); -extern int kcf_need_signature_verification(kcf_provider_desc_t *); -extern void kcf_verify_signature(void *); -extern struct modctl *kcf_get_modctl(crypto_provider_info_t *); -extern void verify_unverified_providers(void); -extern void kcf_free_req(kcf_areq_node_t *areq); -extern void crypto_bufcall_service(void); - -extern void kcf_walk_ntfylist(uint32_t, void *); -extern void kcf_do_notify(kcf_provider_desc_t *, boolean_t); - -extern kcf_dual_req_t *kcf_alloc_req(crypto_call_req_t *); -extern void kcf_next_req(void *, int); -extern void kcf_last_req(void *, int); - #ifdef __cplusplus } #endif diff --git a/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h b/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h index 2c62b5706651..63dfce7957a8 100644 --- a/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h +++ b/sys/contrib/openzfs/module/icp/include/sys/crypto/spi.h @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -43,39 +43,15 @@ extern "C" { #define __no_const #endif /* CONSTIFY_PLUGIN */ -#define CRYPTO_SPI_VERSION_1 1 -#define CRYPTO_SPI_VERSION_2 2 -#define CRYPTO_SPI_VERSION_3 3 - -/* - * Provider-private handle. This handle is specified by a provider - * when it registers by means of the pi_provider_handle field of - * the crypto_provider_info structure, and passed to the provider - * when its entry points are invoked. - */ -typedef void *crypto_provider_handle_t; - /* - * Context templates can be used to by software providers to pre-process + * Context templates can be used to by providers to pre-process * keying material, such as key schedules. They are allocated by - * a software provider create_ctx_template(9E) entry point, and passed + * a provider create_ctx_template(9E) entry point, and passed * as argument to initialization and atomic provider entry points. */ typedef void *crypto_spi_ctx_template_t; /* - * Request handles are used by the kernel to identify an asynchronous - * request being processed by a provider. It is passed by the kernel - * to a hardware provider when submitting a request, and must be - * specified by a provider when calling crypto_op_notification(9F) - */ -typedef void *crypto_req_handle_t; - -/* Values for cc_flags field */ -#define CRYPTO_INIT_OPSTATE 0x00000001 /* allocate and init cc_opstate */ -#define CRYPTO_USE_OPSTATE 0x00000002 /* .. start using it as context */ - -/* * The context structure is passed from the kernel to a provider. * It contains the information needed to process a multi-part or * single part operation. The context structure is not used @@ -86,81 +62,24 @@ typedef void *crypto_req_handle_t; * as separate arguments to Provider routines. */ typedef struct crypto_ctx { - crypto_provider_handle_t cc_provider; - crypto_session_id_t cc_session; void *cc_provider_private; /* owned by provider */ void *cc_framework_private; /* owned by framework */ - uint32_t cc_flags; /* flags */ - void *cc_opstate; /* state */ } crypto_ctx_t; /* - * Extended provider information. - */ - -/* - * valid values for ei_flags field of extended info structure - * They match the RSA Security, Inc PKCS#11 tokenInfo flags. - */ -#define CRYPTO_EXTF_RNG 0x00000001 -#define CRYPTO_EXTF_WRITE_PROTECTED 0x00000002 -#define CRYPTO_EXTF_LOGIN_REQUIRED 0x00000004 -#define CRYPTO_EXTF_USER_PIN_INITIALIZED 0x00000008 -#define CRYPTO_EXTF_CLOCK_ON_TOKEN 0x00000040 -#define CRYPTO_EXTF_PROTECTED_AUTHENTICATION_PATH 0x00000100 -#define CRYPTO_EXTF_DUAL_CRYPTO_OPERATIONS 0x00000200 -#define CRYPTO_EXTF_TOKEN_INITIALIZED 0x00000400 -#define CRYPTO_EXTF_USER_PIN_COUNT_LOW 0x00010000 -#define CRYPTO_EXTF_USER_PIN_FINAL_TRY 0x00020000 -#define CRYPTO_EXTF_USER_PIN_LOCKED 0x00040000 -#define CRYPTO_EXTF_USER_PIN_TO_BE_CHANGED 0x00080000 -#define CRYPTO_EXTF_SO_PIN_COUNT_LOW 0x00100000 -#define CRYPTO_EXTF_SO_PIN_FINAL_TRY 0x00200000 -#define CRYPTO_EXTF_SO_PIN_LOCKED 0x00400000 -#define CRYPTO_EXTF_SO_PIN_TO_BE_CHANGED 0x00800000 - -/* - * The crypto_control_ops structure contains pointers to control - * operations for cryptographic providers. It is passed through - * the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). - */ -typedef struct crypto_control_ops { - void (*provider_status)(crypto_provider_handle_t, uint_t *); -} __no_const crypto_control_ops_t; - -/* - * The crypto_ctx_ops structure contains points to context and context - * templates management operations for cryptographic providers. It is - * passed through the crypto_ops(9S) structure when providers register - * with the kernel using crypto_register_provider(9F). - */ -typedef struct crypto_ctx_ops { - int (*create_ctx_template)(crypto_provider_handle_t, - crypto_mechanism_t *, crypto_key_t *, - crypto_spi_ctx_template_t *, size_t *, crypto_req_handle_t); - int (*free_context)(crypto_ctx_t *); -} __no_const crypto_ctx_ops_t; - -/* * The crypto_digest_ops structure contains pointers to digest * operations for cryptographic providers. It is passed through * the crypto_ops(9S) structure when providers register with the * kernel using crypto_register_provider(9F). */ typedef struct crypto_digest_ops { - int (*digest_init)(crypto_ctx_t *, crypto_mechanism_t *, - crypto_req_handle_t); - int (*digest)(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); - int (*digest_update)(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); - int (*digest_key)(crypto_ctx_t *, crypto_key_t *, crypto_req_handle_t); - int (*digest_final)(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); - int (*digest_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_data_t *, - crypto_data_t *, crypto_req_handle_t); + int (*digest_init)(crypto_ctx_t *, crypto_mechanism_t *); + int (*digest)(crypto_ctx_t *, crypto_data_t *, crypto_data_t *); + int (*digest_update)(crypto_ctx_t *, crypto_data_t *); + int (*digest_key)(crypto_ctx_t *, crypto_key_t *); + int (*digest_final)(crypto_ctx_t *, crypto_data_t *); + int (*digest_atomic)(crypto_mechanism_t *, crypto_data_t *, + crypto_data_t *); } __no_const crypto_digest_ops_t; /* @@ -172,29 +91,27 @@ typedef struct crypto_digest_ops { typedef struct crypto_cipher_ops { int (*encrypt_init)(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_spi_ctx_template_t); int (*encrypt)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); + crypto_data_t *, crypto_data_t *); int (*encrypt_update)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); + crypto_data_t *, crypto_data_t *); int (*encrypt_final)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); - int (*encrypt_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_data_t *); + int (*encrypt_atomic)(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); int (*decrypt_init)(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_spi_ctx_template_t); int (*decrypt)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); + crypto_data_t *, crypto_data_t *); int (*decrypt_update)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); + crypto_data_t *, crypto_data_t *); int (*decrypt_final)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); - int (*decrypt_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_data_t *); + int (*decrypt_atomic)(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); } __no_const crypto_cipher_ops_t; /* @@ -206,289 +123,30 @@ typedef struct crypto_cipher_ops { typedef struct crypto_mac_ops { int (*mac_init)(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_spi_ctx_template_t); int (*mac)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); + crypto_data_t *, crypto_data_t *); int (*mac_update)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); + crypto_data_t *); int (*mac_final)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); - int (*mac_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); - int (*mac_verify_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); + crypto_data_t *); + int (*mac_atomic)(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); + int (*mac_verify_atomic)(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); } __no_const crypto_mac_ops_t; /* - * The crypto_sign_ops structure contains pointers to signing - * operations for cryptographic providers. It is passed through - * the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). - */ -typedef struct crypto_sign_ops { - int (*sign_init)(crypto_ctx_t *, - crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); - int (*sign)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); - int (*sign_update)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); - int (*sign_final)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); - int (*sign_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); - int (*sign_recover_init)(crypto_ctx_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); - int (*sign_recover)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); - int (*sign_recover_atomic)(crypto_provider_handle_t, - crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, - crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); -} __no_const crypto_sign_ops_t; - -/* - * The crypto_verify_ops structure contains pointers to verify - * operations for cryptographic providers. It is passed through - * the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). - */ -typedef struct crypto_verify_ops { - int (*verify_init)(crypto_ctx_t *, - crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); - int (*do_verify)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); - int (*verify_update)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); - int (*verify_final)(crypto_ctx_t *, - crypto_data_t *, crypto_req_handle_t); - int (*verify_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); - int (*verify_recover_init)(crypto_ctx_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); - int (*verify_recover)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); - int (*verify_recover_atomic)(crypto_provider_handle_t, - crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, - crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, - crypto_req_handle_t); -} __no_const crypto_verify_ops_t; - -/* - * The crypto_dual_ops structure contains pointers to dual - * cipher and sign/verify operations for cryptographic providers. - * It is passed through the crypto_ops(9S) structure when - * providers register with the kernel using - * crypto_register_provider(9F). - */ -typedef struct crypto_dual_ops { - int (*digest_encrypt_update)( - crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *, - crypto_data_t *, crypto_req_handle_t); - int (*decrypt_digest_update)( - crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *, - crypto_data_t *, crypto_req_handle_t); - int (*sign_encrypt_update)( - crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *, - crypto_data_t *, crypto_req_handle_t); - int (*decrypt_verify_update)( - crypto_ctx_t *, crypto_ctx_t *, crypto_data_t *, - crypto_data_t *, crypto_req_handle_t); -} __no_const crypto_dual_ops_t; - -/* - * The crypto_dual_cipher_mac_ops structure contains pointers to dual - * cipher and MAC operations for cryptographic providers. - * It is passed through the crypto_ops(9S) structure when - * providers register with the kernel using - * crypto_register_provider(9F). - */ -typedef struct crypto_dual_cipher_mac_ops { - int (*encrypt_mac_init)(crypto_ctx_t *, - crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_spi_ctx_template_t, - crypto_spi_ctx_template_t, crypto_req_handle_t); - int (*encrypt_mac)(crypto_ctx_t *, - crypto_data_t *, crypto_dual_data_t *, crypto_data_t *, - crypto_req_handle_t); - int (*encrypt_mac_update)(crypto_ctx_t *, - crypto_data_t *, crypto_dual_data_t *, crypto_req_handle_t); - int (*encrypt_mac_final)(crypto_ctx_t *, - crypto_dual_data_t *, crypto_data_t *, crypto_req_handle_t); - int (*encrypt_mac_atomic)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_data_t *, crypto_dual_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, - crypto_spi_ctx_template_t, crypto_req_handle_t); - - int (*mac_decrypt_init)(crypto_ctx_t *, - crypto_mechanism_t *, crypto_key_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_spi_ctx_template_t, - crypto_spi_ctx_template_t, crypto_req_handle_t); - int (*mac_decrypt)(crypto_ctx_t *, - crypto_dual_data_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); - int (*mac_decrypt_update)(crypto_ctx_t *, - crypto_dual_data_t *, crypto_data_t *, crypto_req_handle_t); - int (*mac_decrypt_final)(crypto_ctx_t *, - crypto_data_t *, crypto_data_t *, crypto_req_handle_t); - int (*mac_decrypt_atomic)(crypto_provider_handle_t, - crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, - crypto_mechanism_t *, crypto_key_t *, crypto_dual_data_t *, - crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, - crypto_spi_ctx_template_t, crypto_req_handle_t); - int (*mac_verify_decrypt_atomic)(crypto_provider_handle_t, - crypto_session_id_t, crypto_mechanism_t *, crypto_key_t *, - crypto_mechanism_t *, crypto_key_t *, crypto_dual_data_t *, - crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t, - crypto_spi_ctx_template_t, crypto_req_handle_t); -} __no_const crypto_dual_cipher_mac_ops_t; - -/* - * The crypto_random_number_ops structure contains pointers to random - * number operations for cryptographic providers. It is passed through - * the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). - */ -typedef struct crypto_random_number_ops { - int (*seed_random)(crypto_provider_handle_t, crypto_session_id_t, - uchar_t *, size_t, uint_t, uint32_t, crypto_req_handle_t); - int (*generate_random)(crypto_provider_handle_t, crypto_session_id_t, - uchar_t *, size_t, crypto_req_handle_t); -} __no_const crypto_random_number_ops_t; - -/* - * Flag values for seed_random. - */ -#define CRYPTO_SEED_NOW 0x00000001 - -/* - * The crypto_session_ops structure contains pointers to session - * operations for cryptographic providers. It is passed through - * the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). - */ -typedef struct crypto_session_ops { - int (*session_open)(crypto_provider_handle_t, crypto_session_id_t *, - crypto_req_handle_t); - int (*session_close)(crypto_provider_handle_t, crypto_session_id_t, - crypto_req_handle_t); - int (*session_login)(crypto_provider_handle_t, crypto_session_id_t, - crypto_user_type_t, char *, size_t, crypto_req_handle_t); - int (*session_logout)(crypto_provider_handle_t, crypto_session_id_t, - crypto_req_handle_t); -} __no_const crypto_session_ops_t; - -/* - * The crypto_object_ops structure contains pointers to object - * operations for cryptographic providers. It is passed through - * the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). - */ -typedef struct crypto_object_ops { - int (*object_create)(crypto_provider_handle_t, crypto_session_id_t, - crypto_object_attribute_t *, uint_t, crypto_object_id_t *, - crypto_req_handle_t); - int (*object_copy)(crypto_provider_handle_t, crypto_session_id_t, - crypto_object_id_t, crypto_object_attribute_t *, uint_t, - crypto_object_id_t *, crypto_req_handle_t); - int (*object_destroy)(crypto_provider_handle_t, crypto_session_id_t, - crypto_object_id_t, crypto_req_handle_t); - int (*object_get_size)(crypto_provider_handle_t, crypto_session_id_t, - crypto_object_id_t, size_t *, crypto_req_handle_t); - int (*object_get_attribute_value)(crypto_provider_handle_t, - crypto_session_id_t, crypto_object_id_t, - crypto_object_attribute_t *, uint_t, crypto_req_handle_t); - int (*object_set_attribute_value)(crypto_provider_handle_t, - crypto_session_id_t, crypto_object_id_t, - crypto_object_attribute_t *, uint_t, crypto_req_handle_t); - int (*object_find_init)(crypto_provider_handle_t, crypto_session_id_t, - crypto_object_attribute_t *, uint_t, void **, - crypto_req_handle_t); - int (*object_find)(crypto_provider_handle_t, void *, - crypto_object_id_t *, uint_t, uint_t *, crypto_req_handle_t); - int (*object_find_final)(crypto_provider_handle_t, void *, - crypto_req_handle_t); -} __no_const crypto_object_ops_t; - -/* - * The crypto_key_ops structure contains pointers to key - * operations for cryptographic providers. It is passed through - * the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). - */ -typedef struct crypto_key_ops { - int (*key_generate)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_object_attribute_t *, uint_t, - crypto_object_id_t *, crypto_req_handle_t); - int (*key_generate_pair)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_object_attribute_t *, uint_t, - crypto_object_attribute_t *, uint_t, crypto_object_id_t *, - crypto_object_id_t *, crypto_req_handle_t); - int (*key_wrap)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_object_id_t *, - uchar_t *, size_t *, crypto_req_handle_t); - int (*key_unwrap)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, uchar_t *, size_t *, - crypto_object_attribute_t *, uint_t, - crypto_object_id_t *, crypto_req_handle_t); - int (*key_derive)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_object_attribute_t *, - uint_t, crypto_object_id_t *, crypto_req_handle_t); - int (*key_check)(crypto_provider_handle_t, crypto_mechanism_t *, - crypto_key_t *); -} __no_const crypto_key_ops_t; - -/* - * The crypto_provider_management_ops structure contains pointers - * to management operations for cryptographic providers. It is passed - * through the crypto_ops(9S) structure when providers register with the - * kernel using crypto_register_provider(9F). + * The crypto_ctx_ops structure contains points to context and context + * templates management operations for cryptographic providers. It is + * passed through the crypto_ops(9S) structure when providers register + * with the kernel using crypto_register_provider(9F). */ -typedef struct crypto_provider_management_ops { - int (*ext_info)(crypto_provider_handle_t, - crypto_provider_ext_info_t *, crypto_req_handle_t); - int (*init_token)(crypto_provider_handle_t, char *, size_t, - char *, crypto_req_handle_t); - int (*init_pin)(crypto_provider_handle_t, crypto_session_id_t, - char *, size_t, crypto_req_handle_t); - int (*set_pin)(crypto_provider_handle_t, crypto_session_id_t, - char *, size_t, char *, size_t, crypto_req_handle_t); -} __no_const crypto_provider_management_ops_t; - -typedef struct crypto_mech_ops { - int (*copyin_mechanism)(crypto_provider_handle_t, - crypto_mechanism_t *, crypto_mechanism_t *, int *, int); - int (*copyout_mechanism)(crypto_provider_handle_t, - crypto_mechanism_t *, crypto_mechanism_t *, int *, int); - int (*free_mechanism)(crypto_provider_handle_t, crypto_mechanism_t *); -} __no_const crypto_mech_ops_t; - -typedef struct crypto_nostore_key_ops { - int (*nostore_key_generate)(crypto_provider_handle_t, - crypto_session_id_t, crypto_mechanism_t *, - crypto_object_attribute_t *, uint_t, crypto_object_attribute_t *, - uint_t, crypto_req_handle_t); - int (*nostore_key_generate_pair)(crypto_provider_handle_t, - crypto_session_id_t, crypto_mechanism_t *, - crypto_object_attribute_t *, uint_t, crypto_object_attribute_t *, - uint_t, crypto_object_attribute_t *, uint_t, - crypto_object_attribute_t *, uint_t, crypto_req_handle_t); - int (*nostore_key_derive)(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_object_attribute_t *, - uint_t, crypto_object_attribute_t *, uint_t, crypto_req_handle_t); -} __no_const crypto_nostore_key_ops_t; +typedef struct crypto_ctx_ops { + int (*create_ctx_template)(crypto_mechanism_t *, crypto_key_t *, + crypto_spi_ctx_template_t *, size_t *); + int (*free_context)(crypto_ctx_t *); +} __no_const crypto_ctx_ops_t; /* * The crypto_ops(9S) structure contains the structures containing @@ -497,58 +155,13 @@ typedef struct crypto_nostore_key_ops { * supplied by a provider when it registers with the kernel * by calling crypto_register_provider(9F). */ -typedef struct crypto_ops_v1 { - crypto_control_ops_t *co_control_ops; - crypto_digest_ops_t *co_digest_ops; - crypto_cipher_ops_t *co_cipher_ops; - crypto_mac_ops_t *co_mac_ops; - crypto_sign_ops_t *co_sign_ops; - crypto_verify_ops_t *co_verify_ops; - crypto_dual_ops_t *co_dual_ops; - crypto_dual_cipher_mac_ops_t *co_dual_cipher_mac_ops; - crypto_random_number_ops_t *co_random_ops; - crypto_session_ops_t *co_session_ops; - crypto_object_ops_t *co_object_ops; - crypto_key_ops_t *co_key_ops; - crypto_provider_management_ops_t *co_provider_ops; - crypto_ctx_ops_t *co_ctx_ops; -} crypto_ops_v1_t; - -typedef struct crypto_ops_v2 { - crypto_ops_v1_t v1_ops; - crypto_mech_ops_t *co_mech_ops; -} crypto_ops_v2_t; - -typedef struct crypto_ops_v3 { - crypto_ops_v2_t v2_ops; - crypto_nostore_key_ops_t *co_nostore_key_ops; -} crypto_ops_v3_t; - typedef struct crypto_ops { - union { - crypto_ops_v3_t cou_v3; - crypto_ops_v2_t cou_v2; - crypto_ops_v1_t cou_v1; - } cou; + const crypto_digest_ops_t *co_digest_ops; + const crypto_cipher_ops_t *co_cipher_ops; + const crypto_mac_ops_t *co_mac_ops; + const crypto_ctx_ops_t *co_ctx_ops; } crypto_ops_t; -#define co_control_ops cou.cou_v1.co_control_ops -#define co_digest_ops cou.cou_v1.co_digest_ops -#define co_cipher_ops cou.cou_v1.co_cipher_ops -#define co_mac_ops cou.cou_v1.co_mac_ops -#define co_sign_ops cou.cou_v1.co_sign_ops -#define co_verify_ops cou.cou_v1.co_verify_ops -#define co_dual_ops cou.cou_v1.co_dual_ops -#define co_dual_cipher_mac_ops cou.cou_v1.co_dual_cipher_mac_ops -#define co_random_ops cou.cou_v1.co_random_ops -#define co_session_ops cou.cou_v1.co_session_ops -#define co_object_ops cou.cou_v1.co_object_ops -#define co_key_ops cou.cou_v1.co_key_ops -#define co_provider_ops cou.cou_v1.co_provider_ops -#define co_ctx_ops cou.cou_v1.co_ctx_ops -#define co_mech_ops cou.cou_v2.co_mech_ops -#define co_nostore_key_ops cou.cou_v3.co_nostore_key_ops - /* * The mechanism info structure crypto_mech_info_t contains a function group * bit mask cm_func_group_mask. This field, of type crypto_func_group_t, @@ -562,29 +175,11 @@ typedef uint32_t crypto_func_group_t; #define CRYPTO_FG_ENCRYPT 0x00000001 /* encrypt_init() */ #define CRYPTO_FG_DECRYPT 0x00000002 /* decrypt_init() */ #define CRYPTO_FG_DIGEST 0x00000004 /* digest_init() */ -#define CRYPTO_FG_SIGN 0x00000008 /* sign_init() */ -#define CRYPTO_FG_SIGN_RECOVER 0x00000010 /* sign_recover_init() */ -#define CRYPTO_FG_VERIFY 0x00000020 /* verify_init() */ -#define CRYPTO_FG_VERIFY_RECOVER 0x00000040 /* verify_recover_init() */ -#define CRYPTO_FG_GENERATE 0x00000080 /* key_generate() */ -#define CRYPTO_FG_GENERATE_KEY_PAIR 0x00000100 /* key_generate_pair() */ -#define CRYPTO_FG_WRAP 0x00000200 /* key_wrap() */ -#define CRYPTO_FG_UNWRAP 0x00000400 /* key_unwrap() */ -#define CRYPTO_FG_DERIVE 0x00000800 /* key_derive() */ #define CRYPTO_FG_MAC 0x00001000 /* mac_init() */ -#define CRYPTO_FG_ENCRYPT_MAC 0x00002000 /* encrypt_mac_init() */ -#define CRYPTO_FG_MAC_DECRYPT 0x00004000 /* decrypt_mac_init() */ #define CRYPTO_FG_ENCRYPT_ATOMIC 0x00008000 /* encrypt_atomic() */ #define CRYPTO_FG_DECRYPT_ATOMIC 0x00010000 /* decrypt_atomic() */ #define CRYPTO_FG_MAC_ATOMIC 0x00020000 /* mac_atomic() */ #define CRYPTO_FG_DIGEST_ATOMIC 0x00040000 /* digest_atomic() */ -#define CRYPTO_FG_SIGN_ATOMIC 0x00080000 /* sign_atomic() */ -#define CRYPTO_FG_SIGN_RECOVER_ATOMIC 0x00100000 /* sign_recover_atomic() */ -#define CRYPTO_FG_VERIFY_ATOMIC 0x00200000 /* verify_atomic() */ -#define CRYPTO_FG_VERIFY_RECOVER_ATOMIC 0x00400000 /* verify_recover_atomic() */ -#define CRYPTO_FG_ENCRYPT_MAC_ATOMIC 0x00800000 /* encrypt_mac_atomic() */ -#define CRYPTO_FG_MAC_DECRYPT_ATOMIC 0x01000000 /* mac_decrypt_atomic() */ -#define CRYPTO_FG_RESERVED 0x80000000 /* * Maximum length of the pi_provider_description field of the @@ -593,21 +188,6 @@ typedef uint32_t crypto_func_group_t; #define CRYPTO_PROVIDER_DESCR_MAX_LEN 64 -/* Bit mask for all the simple operations */ -#define CRYPTO_FG_SIMPLEOP_MASK (CRYPTO_FG_ENCRYPT | CRYPTO_FG_DECRYPT | \ - CRYPTO_FG_DIGEST | CRYPTO_FG_SIGN | CRYPTO_FG_VERIFY | CRYPTO_FG_MAC | \ - CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT_ATOMIC | \ - CRYPTO_FG_MAC_ATOMIC | CRYPTO_FG_DIGEST_ATOMIC | CRYPTO_FG_SIGN_ATOMIC | \ - CRYPTO_FG_VERIFY_ATOMIC) - -/* Bit mask for all the dual operations */ -#define CRYPTO_FG_MAC_CIPHER_MASK (CRYPTO_FG_ENCRYPT_MAC | \ - CRYPTO_FG_MAC_DECRYPT | CRYPTO_FG_ENCRYPT_MAC_ATOMIC | \ - CRYPTO_FG_MAC_DECRYPT_ATOMIC) - -/* Add other combos to CRYPTO_FG_DUAL_MASK */ -#define CRYPTO_FG_DUAL_MASK CRYPTO_FG_MAC_CIPHER_MASK - /* * The crypto_mech_info structure specifies one of the mechanisms * supported by a cryptographic provider. The pi_mechanisms field of @@ -618,21 +198,8 @@ typedef struct crypto_mech_info { crypto_mech_name_t cm_mech_name; crypto_mech_type_t cm_mech_number; crypto_func_group_t cm_func_group_mask; - ssize_t cm_min_key_length; - ssize_t cm_max_key_length; - uint32_t cm_mech_flags; } crypto_mech_info_t; -/* Alias the old name to the new name for compatibility. */ -#define cm_keysize_unit cm_mech_flags - -/* - * The following is used by a provider that sets - * CRYPTO_HASH_NO_UPDATE. It needs to specify the maximum - * input data size it can digest in this field. - */ -#define cm_max_input_length cm_max_key_length - /* * crypto_kcf_provider_handle_t is a handle allocated by the kernel. * It is returned after the provider registers with @@ -644,79 +211,24 @@ typedef uint_t crypto_kcf_provider_handle_t; /* * Provider information. Passed as argument to crypto_register_provider(9F). - * Describes the provider and its capabilities. Multiple providers can - * register for the same device instance. In this case, the same - * pi_provider_dev must be specified with a different pi_provider_handle. + * Describes the provider and its capabilities. */ -typedef struct crypto_provider_info_v1 { - uint_t pi_interface_version; - char *pi_provider_description; - crypto_provider_type_t pi_provider_type; - crypto_provider_handle_t pi_provider_handle; - crypto_ops_t *pi_ops_vector; - uint_t pi_mech_list_count; - crypto_mech_info_t *pi_mechanisms; - uint_t pi_logical_provider_count; - crypto_kcf_provider_handle_t *pi_logical_providers; -} crypto_provider_info_v1_t; - -typedef struct crypto_provider_info_v2 { - crypto_provider_info_v1_t v1_info; - uint_t pi_flags; -} crypto_provider_info_v2_t; - typedef struct crypto_provider_info { - union { - crypto_provider_info_v2_t piu_v2; - crypto_provider_info_v1_t piu_v1; - } piu; + const char *pi_provider_description; + const crypto_ops_t *pi_ops_vector; + uint_t pi_mech_list_count; + const crypto_mech_info_t *pi_mechanisms; } crypto_provider_info_t; -#define pi_interface_version piu.piu_v1.pi_interface_version -#define pi_provider_description piu.piu_v1.pi_provider_description -#define pi_provider_type piu.piu_v1.pi_provider_type -#define pi_provider_handle piu.piu_v1.pi_provider_handle -#define pi_ops_vector piu.piu_v1.pi_ops_vector -#define pi_mech_list_count piu.piu_v1.pi_mech_list_count -#define pi_mechanisms piu.piu_v1.pi_mechanisms -#define pi_logical_provider_count piu.piu_v1.pi_logical_provider_count -#define pi_logical_providers piu.piu_v1.pi_logical_providers -#define pi_flags piu.piu_v2.pi_flags - -/* hidden providers can only be accessed via a logical provider */ -#define CRYPTO_HIDE_PROVIDER 0x00000001 -/* - * provider can not do multi-part digest (updates) and has a limit - * on maximum input data that it can digest. - */ -#define CRYPTO_HASH_NO_UPDATE 0x00000002 - -/* provider can handle the request without returning a CRYPTO_QUEUED */ -#define CRYPTO_SYNCHRONOUS 0x00000004 - -#define CRYPTO_PIFLAGS_RESERVED2 0x40000000 -#define CRYPTO_PIFLAGS_RESERVED1 0x80000000 - -/* - * Provider status passed by a provider to crypto_provider_notification(9F) - * and returned by the provider_status(9E) entry point. - */ -#define CRYPTO_PROVIDER_READY 0 -#define CRYPTO_PROVIDER_BUSY 1 -#define CRYPTO_PROVIDER_FAILED 2 - /* * Functions exported by Solaris to cryptographic providers. Providers * call these functions to register and unregister, notify the kernel * of state changes, and notify the kernel when a asynchronous request * completed. */ -extern int crypto_register_provider(crypto_provider_info_t *, +extern int crypto_register_provider(const crypto_provider_info_t *, crypto_kcf_provider_handle_t *); extern int crypto_unregister_provider(crypto_kcf_provider_handle_t); -extern void crypto_provider_notification(crypto_kcf_provider_handle_t, uint_t); -extern void crypto_op_notification(crypto_req_handle_t, int); -extern int crypto_kmflag(crypto_req_handle_t); #ifdef __cplusplus diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h deleted file mode 100644 index f2dae7093b94..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/ia32/asm_linkage.h +++ /dev/null @@ -1,307 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_ASM_LINKAGE_H -#define _IA32_SYS_ASM_LINKAGE_H - -#include <sys/stack.h> -#include <sys/trap.h> - -#ifdef __cplusplus -extern "C" { -#endif - -#ifdef _ASM /* The remainder of this file is only for assembly files */ - -/* - * make annoying differences in assembler syntax go away - */ - -/* - * D16 and A16 are used to insert instructions prefixes; the - * macros help the assembler code be slightly more portable. - */ -#if !defined(__GNUC_AS__) -/* - * /usr/ccs/bin/as prefixes are parsed as separate instructions - */ -#define D16 data16; -#define A16 addr16; - -/* - * (There are some weird constructs in constant expressions) - */ -#define _CONST(const) [const] -#define _BITNOT(const) -1!_CONST(const) -#define _MUL(a, b) _CONST(a \* b) - -#else -/* - * Why not use the 'data16' and 'addr16' prefixes .. well, the - * assembler doesn't quite believe in real mode, and thus argues with - * us about what we're trying to do. - */ -#define D16 .byte 0x66; -#define A16 .byte 0x67; - -#define _CONST(const) (const) -#define _BITNOT(const) ~_CONST(const) -#define _MUL(a, b) _CONST(a * b) - -#endif - -/* - * C pointers are different sizes between i386 and amd64. - * These constants can be used to compute offsets into pointer arrays. - */ -#if defined(__amd64) -#define CLONGSHIFT 3 -#define CLONGSIZE 8 -#define CLONGMASK 7 -#elif defined(__i386) -#define CLONGSHIFT 2 -#define CLONGSIZE 4 -#define CLONGMASK 3 -#endif - -/* - * Since we know we're either ILP32 or LP64 .. - */ -#define CPTRSHIFT CLONGSHIFT -#define CPTRSIZE CLONGSIZE -#define CPTRMASK CLONGMASK - -#if CPTRSIZE != (1 << CPTRSHIFT) || CLONGSIZE != (1 << CLONGSHIFT) -#error "inconsistent shift constants" -#endif - -#if CPTRMASK != (CPTRSIZE - 1) || CLONGMASK != (CLONGSIZE - 1) -#error "inconsistent mask constants" -#endif - -#define ASM_ENTRY_ALIGN 16 - -/* - * SSE register alignment and save areas - */ - -#define XMM_SIZE 16 -#define XMM_ALIGN 16 - -#if defined(__amd64) - -#define SAVE_XMM_PROLOG(sreg, nreg) \ - subq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp; \ - movq %rsp, sreg - -#define RSTOR_XMM_EPILOG(sreg, nreg) \ - addq $_CONST(_MUL(XMM_SIZE, nreg)), %rsp - -#elif defined(__i386) - -#define SAVE_XMM_PROLOG(sreg, nreg) \ - subl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; \ - movl %esp, sreg; \ - addl $XMM_ALIGN, sreg; \ - andl $_BITNOT(XMM_ALIGN-1), sreg - -#define RSTOR_XMM_EPILOG(sreg, nreg) \ - addl $_CONST(_MUL(XMM_SIZE, nreg) + XMM_ALIGN), %esp; - -#endif /* __i386 */ - -/* - * profiling causes definitions of the MCOUNT and RTMCOUNT - * particular to the type - */ -#ifdef GPROF - -#define MCOUNT(x) \ - pushl %ebp; \ - movl %esp, %ebp; \ - call _mcount; \ - popl %ebp - -#endif /* GPROF */ - -#ifdef PROF - -#define MCOUNT(x) \ -/* CSTYLED */ \ - .lcomm .L_/**/x/**/1, 4, 4; \ - pushl %ebp; \ - movl %esp, %ebp; \ -/* CSTYLED */ \ - movl $.L_/**/x/**/1, %edx; \ - call _mcount; \ - popl %ebp - -#endif /* PROF */ - -/* - * if we are not profiling, MCOUNT should be defined to nothing - */ -#if !defined(PROF) && !defined(GPROF) -#define MCOUNT(x) -#endif /* !defined(PROF) && !defined(GPROF) */ - -#define RTMCOUNT(x) MCOUNT(x) - -/* - * Macro to define weak symbol aliases. These are similar to the ANSI-C - * #pragma weak _name = name - * except a compiler can determine type. The assembler must be told. Hence, - * the second parameter must be the type of the symbol (i.e.: function,...) - */ -#define ANSI_PRAGMA_WEAK(sym, stype) \ -/* CSTYLED */ \ - .weak _/**/sym; \ -/* CSTYLED */ \ - .type _/**/sym, @stype; \ -/* CSTYLED */ \ -_/**/sym = sym - -/* - * Like ANSI_PRAGMA_WEAK(), but for unrelated names, as in: - * #pragma weak sym1 = sym2 - */ -#define ANSI_PRAGMA_WEAK2(sym1, sym2, stype) \ - .weak sym1; \ - .type sym1, @stype; \ -sym1 = sym2 - -/* - * ENTRY provides the standard procedure entry code and an easy way to - * insert the calls to mcount for profiling. ENTRY_NP is identical, but - * never calls mcount. - */ -#define ENTRY(x) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x; \ - .type x, @function; \ -x: MCOUNT(x) - -#define ENTRY_NP(x) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x; \ - .type x, @function; \ -x: - -#define RTENTRY(x) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x; \ - .type x, @function; \ -x: RTMCOUNT(x) - -/* - * ENTRY2 is identical to ENTRY but provides two labels for the entry point. - */ -#define ENTRY2(x, y) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x, y; \ - .type x, @function; \ - .type y, @function; \ -/* CSTYLED */ \ -x: ; \ -y: MCOUNT(x) - -#define ENTRY_NP2(x, y) \ - .text; \ - .align ASM_ENTRY_ALIGN; \ - .globl x, y; \ - .type x, @function; \ - .type y, @function; \ -/* CSTYLED */ \ -x: ; \ -y: - - -/* - * ALTENTRY provides for additional entry points. - */ -#define ALTENTRY(x) \ - .globl x; \ - .type x, @function; \ -x: - -/* - * DGDEF and DGDEF2 provide global data declarations. - * - * DGDEF provides a word aligned word of storage. - * - * DGDEF2 allocates "sz" bytes of storage with **NO** alignment. This - * implies this macro is best used for byte arrays. - * - * DGDEF3 allocates "sz" bytes of storage with "algn" alignment. - */ -#define DGDEF2(name, sz) \ - .data; \ - .globl name; \ - .type name, @object; \ - .size name, sz; \ -name: - -#define DGDEF3(name, sz, algn) \ - .data; \ - .align algn; \ - .globl name; \ - .type name, @object; \ - .size name, sz; \ -name: - -#define DGDEF(name) DGDEF3(name, 4, 4) - -/* - * SET_SIZE trails a function and set the size for the ELF symbol table. - */ -#define SET_SIZE(x) \ - .size x, [.-x] - -/* - * NWORD provides native word value. - */ -#if defined(__amd64) - -/*CSTYLED*/ -#define NWORD quad - -#elif defined(__i386) - -#define NWORD long - -#endif /* __i386 */ - -#endif /* _ASM */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_ASM_LINKAGE_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h deleted file mode 100644 index 9e7c089e1182..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/ia32/stack.h +++ /dev/null @@ -1,160 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License, Version 1.0 only - * (the "License"). You may not use this file except in compliance - * with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2004 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_STACK_H -#define _IA32_SYS_STACK_H - -#if !defined(_ASM) - -#include <sys/types.h> - -#endif - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * In the x86 world, a stack frame looks like this: - * - * |--------------------------| - * 4n+8(%ebp) ->| argument word n | - * | ... | (Previous frame) - * 8(%ebp) ->| argument word 0 | - * |--------------------------|-------------------- - * 4(%ebp) ->| return address | - * |--------------------------| - * 0(%ebp) ->| previous %ebp (optional) | - * |--------------------------| - * -4(%ebp) ->| unspecified | (Current frame) - * | ... | - * 0(%esp) ->| variable size | - * |--------------------------| - */ - -/* - * Stack alignment macros. - */ - -#define STACK_ALIGN32 4 -#define STACK_ENTRY_ALIGN32 4 -#define STACK_BIAS32 0 -#define SA32(x) (((x)+(STACK_ALIGN32-1)) & ~(STACK_ALIGN32-1)) -#define STACK_RESERVE32 0 -#define MINFRAME32 0 - -#if defined(__amd64) - -/* - * In the amd64 world, a stack frame looks like this: - * - * |--------------------------| - * 8n+16(%rbp)->| argument word n | - * | ... | (Previous frame) - * 16(%rbp) ->| argument word 0 | - * |--------------------------|-------------------- - * 8(%rbp) ->| return address | - * |--------------------------| - * 0(%rbp) ->| previous %rbp | - * |--------------------------| - * -8(%rbp) ->| unspecified | (Current frame) - * | ... | - * 0(%rsp) ->| variable size | - * |--------------------------| - * -128(%rsp) ->| reserved for function | - * |--------------------------| - * - * The end of the input argument area must be aligned on a 16-byte - * boundary; i.e. (%rsp - 8) % 16 == 0 at function entry. - * - * The 128-byte location beyond %rsp is considered to be reserved for - * functions and is NOT modified by signal handlers. It can be used - * to store temporary data that is not needed across function calls. - */ - -/* - * Stack alignment macros. - */ - -#define STACK_ALIGN64 16 -#define STACK_ENTRY_ALIGN64 8 -#define STACK_BIAS64 0 -#define SA64(x) (((x)+(STACK_ALIGN64-1)) & ~(STACK_ALIGN64-1)) -#define STACK_RESERVE64 128 -#define MINFRAME64 0 - -#define STACK_ALIGN STACK_ALIGN64 -#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN64 -#define STACK_BIAS STACK_BIAS64 -#define SA(x) SA64(x) -#define STACK_RESERVE STACK_RESERVE64 -#define MINFRAME MINFRAME64 - -#elif defined(__i386) - -#define STACK_ALIGN STACK_ALIGN32 -#define STACK_ENTRY_ALIGN STACK_ENTRY_ALIGN32 -#define STACK_BIAS STACK_BIAS32 -#define SA(x) SA32(x) -#define STACK_RESERVE STACK_RESERVE32 -#define MINFRAME MINFRAME32 - -#endif /* __i386 */ - -#if defined(_KERNEL) && !defined(_ASM) - -#if defined(ZFS_DEBUG) -#if STACK_ALIGN == 4 -#define ASSERT_STACK_ALIGNED() \ - { \ - uint32_t __tmp; \ - ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \ - } -#elif (STACK_ALIGN == 16) && (_LONG_DOUBLE_ALIGNMENT == 16) -#define ASSERT_STACK_ALIGNED() \ - { \ - long double __tmp; \ - ASSERT((((uintptr_t)&__tmp) & (STACK_ALIGN - 1)) == 0); \ - } -#endif -#else /* DEBUG */ -#define ASSERT_STACK_ALIGNED() -#endif /* DEBUG */ - -struct regs; - -void traceregs(struct regs *); -void traceback(caddr_t); - -#endif /* defined(_KERNEL) && !defined(_ASM) */ - -#define STACK_GROWTH_DOWN /* stacks grow from high to low addresses */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_STACK_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h b/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h deleted file mode 100644 index 55b94969b80b..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/ia32/trap.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ -/* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ -/* All Rights Reserved */ - -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _IA32_SYS_TRAP_H -#define _IA32_SYS_TRAP_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* - * Trap type values - */ - -#define T_ZERODIV 0x0 /* #de divide by 0 error */ -#define T_SGLSTP 0x1 /* #db single step */ -#define T_NMIFLT 0x2 /* NMI */ -#define T_BPTFLT 0x3 /* #bp breakpoint fault, INT3 insn */ -#define T_OVFLW 0x4 /* #of INTO overflow fault */ -#define T_BOUNDFLT 0x5 /* #br BOUND insn fault */ -#define T_ILLINST 0x6 /* #ud invalid opcode fault */ -#define T_NOEXTFLT 0x7 /* #nm device not available: x87 */ -#define T_DBLFLT 0x8 /* #df double fault */ -#define T_EXTOVRFLT 0x9 /* [not generated: 386 only] */ -#define T_TSSFLT 0xa /* #ts invalid TSS fault */ -#define T_SEGFLT 0xb /* #np segment not present fault */ -#define T_STKFLT 0xc /* #ss stack fault */ -#define T_GPFLT 0xd /* #gp general protection fault */ -#define T_PGFLT 0xe /* #pf page fault */ -#define T_EXTERRFLT 0x10 /* #mf x87 FPU error fault */ -#define T_ALIGNMENT 0x11 /* #ac alignment check error */ -#define T_MCE 0x12 /* #mc machine check exception */ -#define T_SIMDFPE 0x13 /* #xm SSE/SSE exception */ -#define T_DBGENTR 0x14 /* debugger entry */ -#define T_ENDPERR 0x21 /* emulated extension error flt */ -#define T_ENOEXTFLT 0x20 /* emulated ext not present */ -#define T_FASTTRAP 0xd2 /* fast system call */ -#define T_SYSCALLINT 0x91 /* general system call */ -#define T_DTRACE_RET 0x7f /* DTrace pid return */ -#define T_INT80 0x80 /* int80 handler for linux emulation */ -#define T_SOFTINT 0x50fd /* pseudo softint trap type */ - -/* - * Pseudo traps. - */ -#define T_INTERRUPT 0x100 -#define T_FAULT 0x200 -#define T_AST 0x400 -#define T_SYSCALL 0x180 - - -/* - * Values of error code on stack in case of page fault - */ - -#define PF_ERR_MASK 0x01 /* Mask for error bit */ -#define PF_ERR_PAGE 0x00 /* page not present */ -#define PF_ERR_PROT 0x01 /* protection error */ -#define PF_ERR_WRITE 0x02 /* fault caused by write (else read) */ -#define PF_ERR_USER 0x04 /* processor was in user mode */ - /* (else supervisor) */ -#define PF_ERR_EXEC 0x10 /* attempt to execute a No eXec page (AMD) */ - -/* - * Definitions for fast system call subfunctions - */ -#define T_FNULL 0 /* Null trap for testing */ -#define T_FGETFP 1 /* Get emulated FP context */ -#define T_FSETFP 2 /* Set emulated FP context */ -#define T_GETHRTIME 3 /* Get high resolution time */ -#define T_GETHRVTIME 4 /* Get high resolution virtual time */ -#define T_GETHRESTIME 5 /* Get high resolution time */ -#define T_GETLGRP 6 /* Get home lgrpid */ - -#define T_LASTFAST 6 /* Last valid subfunction */ - -#ifdef __cplusplus -} -#endif - -#endif /* _IA32_SYS_TRAP_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/modctl.h b/sys/contrib/openzfs/module/icp/include/sys/modctl.h deleted file mode 100644 index 6c26ad618c93..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/modctl.h +++ /dev/null @@ -1,477 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2009 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MODCTL_H -#define _SYS_MODCTL_H - -/* - * loadable module support. - */ - -#include <sys/zfs_context.h> - -#ifdef __cplusplus -extern "C" { -#endif - -struct modlmisc; -struct modlinkage; - -/* - * The following structure defines the operations used by modctl - * to load and unload modules. Each supported loadable module type - * requires a set of mod_ops. - */ -struct mod_ops { - int (*modm_install)(struct modlmisc *, struct modlinkage *); - int (*modm_remove)(struct modlmisc *, struct modlinkage *); - int (*modm_info)(void *, struct modlinkage *, int *); -}; - -/* - * The defined set of mod_ops structures for each loadable module type - * Defined in modctl.c - */ -extern struct mod_ops mod_brandops; -#if defined(__i386) || defined(__amd64) -extern struct mod_ops mod_cpuops; -#endif -extern struct mod_ops mod_cryptoops; -extern struct mod_ops mod_driverops; -extern struct mod_ops mod_execops; -extern struct mod_ops mod_fsops; -extern struct mod_ops mod_miscops; -extern struct mod_ops mod_schedops; -extern struct mod_ops mod_strmodops; -extern struct mod_ops mod_syscallops; -extern struct mod_ops mod_sockmodops; -#ifdef _SYSCALL32_IMPL -extern struct mod_ops mod_syscallops32; -#endif -extern struct mod_ops mod_dacfops; -extern struct mod_ops mod_ippops; -extern struct mod_ops mod_pcbeops; -extern struct mod_ops mod_devfsops; -extern struct mod_ops mod_kiconvops; - -/* - * Definitions for the module specific linkage structures. - * The first two fields are the same in all of the structures. - * The linkinfo is for informational purposes only and is returned by - * modctl with the MODINFO cmd. - */ - -/* For cryptographic providers */ -struct modlcrypto { - struct mod_ops *crypto_modops; - char *crypto_linkinfo; -}; - -/* For misc */ -struct modlmisc { - struct mod_ops *misc_modops; - char *misc_linkinfo; -}; - -/* - * Revision number of loadable modules support. This is the value - * that must be used in the modlinkage structure. - */ -#define MODREV_1 1 - -/* - * The modlinkage structure is the structure that the module writer - * provides to the routines to install, remove, and stat a module. - * The ml_linkage element is an array of pointers to linkage structures. - * For most modules there is only one linkage structure. We allocate - * enough space for 3 linkage structures which happens to be the most - * we have in any sun supplied module. For those modules with more - * than 3 linkage structures (which is very unlikely), a modlinkage - * structure must be kmem_alloc'd in the module wrapper to be big enough - * for all of the linkage structures. - */ -struct modlinkage { - int ml_rev; /* rev of loadable modules system */ -#ifdef _LP64 - void *ml_linkage[7]; /* more space in 64-bit OS */ -#else - void *ml_linkage[4]; /* NULL terminated list of */ - /* linkage structures */ -#endif -}; - -/* - * commands. These are the commands supported by the modctl system call. - */ -#define MODLOAD 0 -#define MODUNLOAD 1 -#define MODINFO 2 -#define MODRESERVED 3 -#define MODSETMINIROOT 4 -#define MODADDMAJBIND 5 -#define MODGETPATH 6 -#define MODREADSYSBIND 7 -#define MODGETMAJBIND 8 -#define MODGETNAME 9 -#define MODSIZEOF_DEVID 10 -#define MODGETDEVID 11 -#define MODSIZEOF_MINORNAME 12 -#define MODGETMINORNAME 13 -#define MODGETPATHLEN 14 -#define MODEVENTS 15 -#define MODGETFBNAME 16 -#define MODREREADDACF 17 -#define MODLOADDRVCONF 18 -#define MODUNLOADDRVCONF 19 -#define MODREMMAJBIND 20 -#define MODDEVT2INSTANCE 21 -#define MODGETDEVFSPATH_LEN 22 -#define MODGETDEVFSPATH 23 -#define MODDEVID2PATHS 24 -#define MODSETDEVPOLICY 26 -#define MODGETDEVPOLICY 27 -#define MODALLOCPRIV 28 -#define MODGETDEVPOLICYBYNAME 29 -#define MODLOADMINORPERM 31 -#define MODADDMINORPERM 32 -#define MODREMMINORPERM 33 -#define MODREMDRVCLEANUP 34 -#define MODDEVEXISTS 35 -#define MODDEVREADDIR 36 -#define MODDEVNAME 37 -#define MODGETDEVFSPATH_MI_LEN 38 -#define MODGETDEVFSPATH_MI 39 -#define MODRETIRE 40 -#define MODUNRETIRE 41 -#define MODISRETIRED 42 -#define MODDEVEMPTYDIR 43 -#define MODREMDRVALIAS 44 - -/* - * sub cmds for MODEVENTS - */ -#define MODEVENTS_FLUSH 0 -#define MODEVENTS_FLUSH_DUMP 1 -#define MODEVENTS_SET_DOOR_UPCALL_FILENAME 2 -#define MODEVENTS_GETDATA 3 -#define MODEVENTS_FREEDATA 4 -#define MODEVENTS_POST_EVENT 5 -#define MODEVENTS_REGISTER_EVENT 6 - -/* - * devname subcmds for MODDEVNAME - */ -#define MODDEVNAME_LOOKUPDOOR 0 -#define MODDEVNAME_DEVFSADMNODE 1 -#define MODDEVNAME_NSMAPS 2 -#define MODDEVNAME_PROFILE 3 -#define MODDEVNAME_RECONFIG 4 -#define MODDEVNAME_SYSAVAIL 5 - - -/* - * Data structure passed to modconfig command in kernel to build devfs tree - */ - -struct aliases { - struct aliases *a_next; - char *a_name; - int a_len; -}; - -#define MAXMODCONFNAME 256 - -struct modconfig { - char drvname[MAXMODCONFNAME]; - char drvclass[MAXMODCONFNAME]; - int major; - int flags; - int num_aliases; - struct aliases *ap; -}; - -#if defined(_SYSCALL32) - -struct aliases32 { - caddr32_t a_next; - caddr32_t a_name; - int32_t a_len; -}; - -struct modconfig32 { - char drvname[MAXMODCONFNAME]; - char drvclass[MAXMODCONFNAME]; - int32_t major; - int32_t flags; - int32_t num_aliases; - caddr32_t ap; -}; - -#endif /* _SYSCALL32 */ - -/* flags for modconfig */ -#define MOD_UNBIND_OVERRIDE 0x01 /* fail unbind if in use */ - -/* - * Max module path length - */ -#define MOD_MAXPATH 256 - -/* - * Default search path for modules ADDITIONAL to the directory - * where the kernel components we booted from are. - * - * Most often, this will be "/platform/{platform}/kernel /kernel /usr/kernel", - * but we don't wire it down here. - */ -#define MOD_DEFPATH "/kernel /usr/kernel" - -/* - * Default file name extension for autoloading modules. - */ -#define MOD_DEFEXT "" - -/* - * Parameters for modinfo - */ -#define MODMAXNAMELEN 32 /* max module name length */ -#define MODMAXLINKINFOLEN 32 /* max link info length */ - -/* - * Module specific information. - */ -struct modspecific_info { - char msi_linkinfo[MODMAXLINKINFOLEN]; /* name in linkage struct */ - int msi_p0; /* module specific information */ -}; - -/* - * Structure returned by modctl with MODINFO command. - */ -#define MODMAXLINK 10 /* max linkages modinfo can handle */ - -struct modinfo { - int mi_info; /* Flags for info wanted */ - int mi_state; /* Flags for module state */ - int mi_id; /* id of this loaded module */ - int mi_nextid; /* id of next module or -1 */ - caddr_t mi_base; /* virtual addr of text */ - size_t mi_size; /* size of module in bytes */ - int mi_rev; /* loadable modules rev */ - int mi_loadcnt; /* # of times loaded */ - char mi_name[MODMAXNAMELEN]; /* name of module */ - struct modspecific_info mi_msinfo[MODMAXLINK]; - /* mod specific info */ -}; - - -#if defined(_SYSCALL32) - -#define MODMAXNAMELEN32 32 /* max module name length */ -#define MODMAXLINKINFOLEN32 32 /* max link info length */ -#define MODMAXLINK32 10 /* max linkages modinfo can handle */ - -struct modspecific_info32 { - char msi_linkinfo[MODMAXLINKINFOLEN32]; /* name in linkage struct */ - int32_t msi_p0; /* module specific information */ -}; - -struct modinfo32 { - int32_t mi_info; /* Flags for info wanted */ - int32_t mi_state; /* Flags for module state */ - int32_t mi_id; /* id of this loaded module */ - int32_t mi_nextid; /* id of next module or -1 */ - caddr32_t mi_base; /* virtual addr of text */ - uint32_t mi_size; /* size of module in bytes */ - int32_t mi_rev; /* loadable modules rev */ - int32_t mi_loadcnt; /* # of times loaded */ - char mi_name[MODMAXNAMELEN32]; /* name of module */ - struct modspecific_info32 mi_msinfo[MODMAXLINK32]; - /* mod specific info */ -}; - -#endif /* _SYSCALL32 */ - -/* Values for mi_info flags */ -#define MI_INFO_ONE 1 -#define MI_INFO_ALL 2 -#define MI_INFO_CNT 4 -#define MI_INFO_LINKAGE 8 /* used internally to extract modlinkage */ -/* - * MI_INFO_NOBASE indicates caller does not need mi_base. Failure to use this - * flag may lead 32-bit apps to receive an EOVERFLOW error from modctl(MODINFO) - * when used with a 64-bit kernel. - */ -#define MI_INFO_NOBASE 16 - -/* Values for mi_state */ -#define MI_LOADED 1 -#define MI_INSTALLED 2 - -/* - * Macros to vector to the appropriate module specific routine. - */ -#define MODL_INSTALL(MODL, MODLP) \ - (*(MODL)->misc_modops->modm_install)(MODL, MODLP) -#define MODL_REMOVE(MODL, MODLP) \ - (*(MODL)->misc_modops->modm_remove)(MODL, MODLP) -#define MODL_INFO(MODL, MODLP, P0) \ - (*(MODL)->misc_modops->modm_info)(MODL, MODLP, P0) - -/* - * Definitions for stubs - */ -struct mod_stub_info { - uintptr_t mods_func_adr; - struct mod_modinfo *mods_modinfo; - uintptr_t mods_stub_adr; - int (*mods_errfcn)(void); - int mods_flag; /* flags defined below */ -}; - -/* - * Definitions for mods_flag. - */ -#define MODS_WEAK 0x01 /* weak stub (not loaded if called) */ -#define MODS_NOUNLOAD 0x02 /* module not unloadable (no _fini()) */ -#define MODS_INSTALLED 0x10 /* module installed */ - -struct mod_modinfo { - char *modm_module_name; - struct modctl *mp; - struct mod_stub_info modm_stubs[1]; -}; - -struct modctl_list { - struct modctl_list *modl_next; - struct modctl *modl_modp; -}; - -/* - * Structure to manage a loadable module. - * Note: the module (mod_mp) structure's "text" and "text_size" information - * are replicated in the modctl structure so that mod_containing_pc() - * doesn't have to grab any locks (modctls are persistent; modules are not.) - */ -typedef struct modctl { - struct modctl *mod_next; /* &modules based list */ - struct modctl *mod_prev; - int mod_id; - void *mod_mp; - kthread_t *mod_inprogress_thread; - struct mod_modinfo *mod_modinfo; - struct modlinkage *mod_linkage; - char *mod_filename; - char *mod_modname; - - char mod_busy; /* inprogress_thread has locked */ - char mod_want; /* someone waiting for unlock */ - char mod_prim; /* primary module */ - - int mod_ref; /* ref count - from dependent or stub */ - - char mod_loaded; /* module in memory */ - char mod_installed; /* post _init pre _fini */ - char mod_loadflags; - char mod_delay_unload; /* deferred unload */ - - struct modctl_list *mod_requisites; /* mods this one depends on. */ - void *____unused; /* NOTE: reuse (same size) is OK, */ - /* deletion causes mdb.vs.core issues */ - int mod_loadcnt; /* number of times mod was loaded */ - int mod_nenabled; /* # of enabled DTrace probes in mod */ - char *mod_text; - size_t mod_text_size; - - int mod_gencount; /* # times loaded/unloaded */ - struct modctl *mod_requisite_loading; /* mod circular dependency */ -} modctl_t; - -/* - * mod_loadflags - */ - -#define MOD_NOAUTOUNLOAD 0x1 /* Auto mod-unloader skips this mod */ -#define MOD_NONOTIFY 0x2 /* No krtld notifications on (un)load */ -#define MOD_NOUNLOAD 0x4 /* Assume EBUSY for all _fini's */ - -#define MOD_BIND_HASHSIZE 64 -#define MOD_BIND_HASHMASK (MOD_BIND_HASHSIZE-1) - -typedef int modid_t; - -/* - * global function and data declarations - */ -extern kmutex_t mod_lock; - -extern char *systemfile; -extern char **syscallnames; -extern int moddebug; - -/* - * this is the head of a doubly linked list. Only the next and prev - * pointers are used - */ -extern modctl_t modules; - -/* - * Only the following are part of the DDI/DKI - */ -extern int mod_install(struct modlinkage *); -extern int mod_remove(struct modlinkage *); -extern int mod_info(struct modlinkage *, struct modinfo *); - -/* - * bit definitions for moddebug. - */ -#define MODDEBUG_LOADMSG 0x80000000 /* print "[un]loading..." msg */ -#define MODDEBUG_ERRMSG 0x40000000 /* print detailed error msgs */ -#define MODDEBUG_LOADMSG2 0x20000000 /* print 2nd level msgs */ -#define MODDEBUG_RETIRE 0x10000000 /* print retire msgs */ -#define MODDEBUG_BINDING 0x00040000 /* driver/alias binding */ -#define MODDEBUG_FINI_EBUSY 0x00020000 /* pretend fini returns EBUSY */ -#define MODDEBUG_NOAUL_IPP 0x00010000 /* no Autounloading ipp mods */ -#define MODDEBUG_NOAUL_DACF 0x00008000 /* no Autounloading dacf mods */ -#define MODDEBUG_KEEPTEXT 0x00004000 /* keep text after unloading */ -#define MODDEBUG_NOAUL_DRV 0x00001000 /* no Autounloading Drivers */ -#define MODDEBUG_NOAUL_EXEC 0x00000800 /* no Autounloading Execs */ -#define MODDEBUG_NOAUL_FS 0x00000400 /* no Autounloading File sys */ -#define MODDEBUG_NOAUL_MISC 0x00000200 /* no Autounloading misc */ -#define MODDEBUG_NOAUL_SCHED 0x00000100 /* no Autounloading scheds */ -#define MODDEBUG_NOAUL_STR 0x00000080 /* no Autounloading streams */ -#define MODDEBUG_NOAUL_SYS 0x00000040 /* no Autounloading syscalls */ -#define MODDEBUG_NOCTF 0x00000020 /* do not load CTF debug data */ -#define MODDEBUG_NOAUTOUNLOAD 0x00000010 /* no autounloading at all */ -#define MODDEBUG_DDI_MOD 0x00000008 /* ddi_mod{open,sym,close} */ -#define MODDEBUG_MP_MATCH 0x00000004 /* dev_minorperm */ -#define MODDEBUG_MINORPERM 0x00000002 /* minor perm modctls */ -#define MODDEBUG_USERDEBUG 0x00000001 /* bpt after init_module() */ - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MODCTL_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/modhash.h b/sys/contrib/openzfs/module/icp/include/sys/modhash.h deleted file mode 100644 index 06b52ff02604..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/modhash.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MODHASH_H -#define _SYS_MODHASH_H - -/* - * Generic hash implementation for the kernel. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/zfs_context.h> - -/* - * Opaque data types for storing keys and values - */ -typedef void *mod_hash_val_t; -typedef void *mod_hash_key_t; - -/* - * Opaque data type for reservation - */ -typedef void *mod_hash_hndl_t; - -/* - * Opaque type for hash itself. - */ -struct mod_hash; -typedef struct mod_hash mod_hash_t; - -/* - * String hash table - */ -mod_hash_t *mod_hash_create_strhash_nodtr(char *, size_t, - void (*)(mod_hash_val_t)); -mod_hash_t *mod_hash_create_strhash(char *, size_t, void (*)(mod_hash_val_t)); -void mod_hash_destroy_strhash(mod_hash_t *); -int mod_hash_strkey_cmp(mod_hash_key_t, mod_hash_key_t); -void mod_hash_strkey_dtor(mod_hash_key_t); -void mod_hash_strval_dtor(mod_hash_val_t); -uint_t mod_hash_bystr(void *, mod_hash_key_t); - -/* - * Pointer hash table - */ -mod_hash_t *mod_hash_create_ptrhash(char *, size_t, void (*)(mod_hash_val_t), - size_t); -void mod_hash_destroy_ptrhash(mod_hash_t *); -int mod_hash_ptrkey_cmp(mod_hash_key_t, mod_hash_key_t); -uint_t mod_hash_byptr(void *, mod_hash_key_t); - -/* - * ID hash table - */ -mod_hash_t *mod_hash_create_idhash(char *, size_t, void (*)(mod_hash_val_t)); -void mod_hash_destroy_idhash(mod_hash_t *); -int mod_hash_idkey_cmp(mod_hash_key_t, mod_hash_key_t); -uint_t mod_hash_byid(void *, mod_hash_key_t); -uint_t mod_hash_iddata_gen(size_t); - -/* - * Hash management functions - */ -mod_hash_t *mod_hash_create_extended(char *, size_t, void (*)(mod_hash_key_t), - void (*)(mod_hash_val_t), uint_t (*)(void *, mod_hash_key_t), void *, - int (*)(mod_hash_key_t, mod_hash_key_t), int); - -void mod_hash_destroy_hash(mod_hash_t *); -void mod_hash_clear(mod_hash_t *); - -/* - * Null key and value destructors - */ -void mod_hash_null_keydtor(mod_hash_key_t); -void mod_hash_null_valdtor(mod_hash_val_t); - -/* - * Basic hash operations - */ - -/* - * Error codes for insert, remove, find, destroy. - */ -#define MH_ERR_NOMEM -1 -#define MH_ERR_DUPLICATE -2 -#define MH_ERR_NOTFOUND -3 - -/* - * Return codes for hash walkers - */ -#define MH_WALK_CONTINUE 0 -#define MH_WALK_TERMINATE 1 - -/* - * Basic hash operations - */ -int mod_hash_insert(mod_hash_t *, mod_hash_key_t, mod_hash_val_t); -int mod_hash_replace(mod_hash_t *, mod_hash_key_t, mod_hash_val_t); -int mod_hash_remove(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *); -int mod_hash_destroy(mod_hash_t *, mod_hash_key_t); -int mod_hash_find(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *); -int mod_hash_find_cb(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *, - void (*)(mod_hash_key_t, mod_hash_val_t)); -int mod_hash_find_cb_rval(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *, - int (*)(mod_hash_key_t, mod_hash_val_t), int *); -void mod_hash_walk(mod_hash_t *, - uint_t (*)(mod_hash_key_t, mod_hash_val_t *, void *), void *); - -/* - * Reserving hash operations - */ -int mod_hash_reserve(mod_hash_t *, mod_hash_hndl_t *); -int mod_hash_reserve_nosleep(mod_hash_t *, mod_hash_hndl_t *); -void mod_hash_cancel(mod_hash_t *, mod_hash_hndl_t *); -int mod_hash_insert_reserve(mod_hash_t *, mod_hash_key_t, mod_hash_val_t, - mod_hash_hndl_t); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MODHASH_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h b/sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h deleted file mode 100644 index 3130773aa196..000000000000 --- a/sys/contrib/openzfs/module/icp/include/sys/modhash_impl.h +++ /dev/null @@ -1,108 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#ifndef _SYS_MODHASH_IMPL_H -#define _SYS_MODHASH_IMPL_H - -/* - * Internal details for the kernel's generic hash implementation. - */ - -#ifdef __cplusplus -extern "C" { -#endif - -#include <sys/zfs_context.h> -#include <sys/modhash.h> - -struct mod_hash_entry { - mod_hash_key_t mhe_key; /* stored hash key */ - mod_hash_val_t mhe_val; /* stored hash value */ - struct mod_hash_entry *mhe_next; /* next item in chain */ -}; - -struct mod_hash_stat { - ulong_t mhs_hit; /* tried a 'find' and it succeeded */ - ulong_t mhs_miss; /* tried a 'find' but it failed */ - ulong_t mhs_coll; /* occur when insert fails because of dup's */ - ulong_t mhs_nelems; /* total number of stored key/value pairs */ - ulong_t mhs_nomem; /* number of times kmem_alloc failed */ -}; - -struct mod_hash { - krwlock_t mh_contents; /* lock protecting contents */ - char *mh_name; /* hash name */ - int mh_sleep; /* kmem_alloc flag */ - size_t mh_nchains; /* # of elements in mh_entries */ - - /* key and val destructor */ - void (*mh_kdtor)(mod_hash_key_t); - void (*mh_vdtor)(mod_hash_val_t); - - /* key comparator */ - int (*mh_keycmp)(mod_hash_key_t, mod_hash_key_t); - - /* hash algorithm, and algorithm-private data */ - uint_t (*mh_hashalg)(void *, mod_hash_key_t); - void *mh_hashalg_data; - - struct mod_hash *mh_next; /* next hash in list */ - - struct mod_hash_stat mh_stat; - - struct mod_hash_entry *mh_entries[1]; -}; - -/* - * MH_SIZE() - * Compute the size of a mod_hash_t, in bytes, given the number of - * elements it contains. - */ -#define MH_SIZE(n) \ - (sizeof (mod_hash_t) + ((n) - 1) * (sizeof (struct mod_hash_entry *))) - -/* - * Module initialization; called once. - */ -void mod_hash_fini(void); -void mod_hash_init(void); - -/* - * Internal routines. Use directly with care. - */ -uint_t i_mod_hash(mod_hash_t *, mod_hash_key_t); -int i_mod_hash_insert_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t, - mod_hash_hndl_t); -int i_mod_hash_remove_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *); -int i_mod_hash_find_nosync(mod_hash_t *, mod_hash_key_t, mod_hash_val_t *); -void i_mod_hash_walk_nosync(mod_hash_t *, uint_t (*)(mod_hash_key_t, - mod_hash_val_t *, void *), void *); -void i_mod_hash_clear_nosync(mod_hash_t *hash); - -#ifdef __cplusplus -} -#endif - -#endif /* _SYS_MODHASH_IMPL_H */ diff --git a/sys/contrib/openzfs/module/icp/include/sys/stack.h b/sys/contrib/openzfs/module/icp/include/sys/stack.h index 64fecf409b5c..0bace018b5ab 100644 --- a/sys/contrib/openzfs/module/icp/include/sys/stack.h +++ b/sys/contrib/openzfs/module/icp/include/sys/stack.h @@ -7,7 +7,7 @@ * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/include/sys/trap.h b/sys/contrib/openzfs/module/icp/include/sys/trap.h index 7f9fd375805f..2f47d43939c1 100644 --- a/sys/contrib/openzfs/module/icp/include/sys/trap.h +++ b/sys/contrib/openzfs/module/icp/include/sys/trap.h @@ -7,7 +7,7 @@ * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * diff --git a/sys/contrib/openzfs/module/icp/io/aes.c b/sys/contrib/openzfs/module/icp/io/aes.c index c47c7567b900..d6f01304f56b 100644 --- a/sys/contrib/openzfs/module/icp/io/aes.c +++ b/sys/contrib/openzfs/module/icp/io/aes.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -32,102 +32,65 @@ #include <sys/crypto/spi.h> #include <sys/crypto/icp.h> #include <modes/modes.h> -#include <sys/modctl.h> #define _AES_IMPL #include <aes/aes_impl.h> #include <modes/gcm_impl.h> -#define CRYPTO_PROVIDER_NAME "aes" - -extern struct mod_ops mod_cryptoops; - -/* - * Module linkage information for the kernel. - */ -static struct modlcrypto modlcrypto = { - &mod_cryptoops, - "AES Kernel SW Provider" -}; - -static struct modlinkage modlinkage = { - MODREV_1, { (void *)&modlcrypto, NULL } -}; - /* * Mechanism info structure passed to KCF during registration. */ -static crypto_mech_info_t aes_mech_info_tab[] = { +static const crypto_mech_info_t aes_mech_info_tab[] = { /* AES_ECB */ {SUN_CKM_AES_ECB, AES_ECB_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | - CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, - AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC}, /* AES_CBC */ {SUN_CKM_AES_CBC, AES_CBC_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | - CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, - AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC}, /* AES_CTR */ {SUN_CKM_AES_CTR, AES_CTR_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | - CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, - AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC}, /* AES_CCM */ {SUN_CKM_AES_CCM, AES_CCM_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | - CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, - AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC}, /* AES_GCM */ {SUN_CKM_AES_GCM, AES_GCM_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | - CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC, - AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC}, /* AES_GMAC */ {SUN_CKM_AES_GMAC, AES_GMAC_MECH_INFO_TYPE, CRYPTO_FG_ENCRYPT | CRYPTO_FG_ENCRYPT_ATOMIC | CRYPTO_FG_DECRYPT | CRYPTO_FG_DECRYPT_ATOMIC | - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC | - CRYPTO_FG_SIGN | CRYPTO_FG_SIGN_ATOMIC | - CRYPTO_FG_VERIFY | CRYPTO_FG_VERIFY_ATOMIC, - AES_MIN_KEY_BYTES, AES_MAX_KEY_BYTES, CRYPTO_KEYSIZE_UNIT_IN_BYTES} -}; - -static void aes_provider_status(crypto_provider_handle_t, uint_t *); - -static crypto_control_ops_t aes_control_ops = { - aes_provider_status + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, }; static int aes_encrypt_init(crypto_ctx_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_key_t *, crypto_spi_ctx_template_t); static int aes_decrypt_init(crypto_ctx_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_key_t *, crypto_spi_ctx_template_t); static int aes_common_init(crypto_ctx_t *, crypto_mechanism_t *, - crypto_key_t *, crypto_spi_ctx_template_t, crypto_req_handle_t, boolean_t); + crypto_key_t *, crypto_spi_ctx_template_t, boolean_t); static int aes_common_init_ctx(aes_ctx_t *, crypto_spi_ctx_template_t *, crypto_mechanism_t *, crypto_key_t *, int, boolean_t); -static int aes_encrypt_final(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); -static int aes_decrypt_final(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); +static int aes_encrypt_final(crypto_ctx_t *, crypto_data_t *); +static int aes_decrypt_final(crypto_ctx_t *, crypto_data_t *); -static int aes_encrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); +static int aes_encrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *); static int aes_encrypt_update(crypto_ctx_t *, crypto_data_t *, - crypto_data_t *, crypto_req_handle_t); -static int aes_encrypt_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_data_t *); +static int aes_encrypt_atomic(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); -static int aes_decrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); +static int aes_decrypt(crypto_ctx_t *, crypto_data_t *, crypto_data_t *); static int aes_decrypt_update(crypto_ctx_t *, crypto_data_t *, - crypto_data_t *, crypto_req_handle_t); -static int aes_decrypt_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, - crypto_data_t *, crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_data_t *); +static int aes_decrypt_atomic(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); -static crypto_cipher_ops_t aes_cipher_ops = { +static const crypto_cipher_ops_t aes_cipher_ops = { .encrypt_init = aes_encrypt_init, .encrypt = aes_encrypt, .encrypt_update = aes_encrypt_update, @@ -140,14 +103,12 @@ static crypto_cipher_ops_t aes_cipher_ops = { .decrypt_atomic = aes_decrypt_atomic }; -static int aes_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); -static int aes_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); +static int aes_mac_atomic(crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, + crypto_data_t *, crypto_spi_ctx_template_t); +static int aes_mac_verify_atomic(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); -static crypto_mac_ops_t aes_mac_ops = { +static const crypto_mac_ops_t aes_mac_ops = { .mac_init = NULL, .mac = NULL, .mac_update = NULL, @@ -156,42 +117,28 @@ static crypto_mac_ops_t aes_mac_ops = { .mac_verify_atomic = aes_mac_verify_atomic }; -static int aes_create_ctx_template(crypto_provider_handle_t, - crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, - size_t *, crypto_req_handle_t); +static int aes_create_ctx_template(crypto_mechanism_t *, crypto_key_t *, + crypto_spi_ctx_template_t *, size_t *); static int aes_free_context(crypto_ctx_t *); -static crypto_ctx_ops_t aes_ctx_ops = { +static const crypto_ctx_ops_t aes_ctx_ops = { .create_ctx_template = aes_create_ctx_template, .free_context = aes_free_context }; -static crypto_ops_t aes_crypto_ops = {{{{{ - &aes_control_ops, +static const crypto_ops_t aes_crypto_ops = { NULL, &aes_cipher_ops, &aes_mac_ops, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - &aes_ctx_ops -}}}}}; + &aes_ctx_ops, +}; -static crypto_provider_info_t aes_prov_info = {{{{ - CRYPTO_SPI_VERSION_1, +static const crypto_provider_info_t aes_prov_info = { "AES Software Provider", - CRYPTO_SW_PROVIDER, - NULL, &aes_crypto_ops, - sizeof (aes_mech_info_tab)/sizeof (crypto_mech_info_t), + sizeof (aes_mech_info_tab) / sizeof (crypto_mech_info_t), aes_mech_info_tab -}}}}; +}; static crypto_kcf_provider_handle_t aes_prov_handle = 0; static crypto_data_t null_crypto_data = { CRYPTO_DATA_RAW }; @@ -199,20 +146,13 @@ static crypto_data_t null_crypto_data = { CRYPTO_DATA_RAW }; int aes_mod_init(void) { - int ret; - /* Determine the fastest available implementation. */ aes_impl_init(); gcm_impl_init(); - if ((ret = mod_install(&modlinkage)) != 0) - return (ret); - /* Register with KCF. If the registration fails, remove the module. */ - if (crypto_register_provider(&aes_prov_info, &aes_prov_handle)) { - (void) mod_remove(&modlinkage); + if (crypto_register_provider(&aes_prov_info, &aes_prov_handle)) return (EACCES); - } return (0); } @@ -228,11 +168,11 @@ aes_mod_fini(void) aes_prov_handle = 0; } - return (mod_remove(&modlinkage)); + return (0); } static int -aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag) +aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx) { void *p = NULL; boolean_t param_required = B_TRUE; @@ -274,7 +214,7 @@ aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag) rv = CRYPTO_MECHANISM_PARAM_INVALID; } if (ctx != NULL) { - p = (alloc_fun)(kmflag); + p = (alloc_fun)(KM_SLEEP); *ctx = p; } return (rv); @@ -286,52 +226,31 @@ aes_check_mech_param(crypto_mechanism_t *mechanism, aes_ctx_t **ctx, int kmflag) static int init_keysched(crypto_key_t *key, void *newbie) { - /* - * Only keys by value are supported by this module. - */ - switch (key->ck_format) { - case CRYPTO_KEY_RAW: - if (key->ck_length < AES_MINBITS || - key->ck_length > AES_MAXBITS) { - return (CRYPTO_KEY_SIZE_RANGE); - } - - /* key length must be either 128, 192, or 256 */ - if ((key->ck_length & 63) != 0) - return (CRYPTO_KEY_SIZE_RANGE); - break; - default: - return (CRYPTO_KEY_TYPE_INCONSISTENT); + if (key->ck_length < AES_MINBITS || + key->ck_length > AES_MAXBITS) { + return (CRYPTO_KEY_SIZE_RANGE); } + /* key length must be either 128, 192, or 256 */ + if ((key->ck_length & 63) != 0) + return (CRYPTO_KEY_SIZE_RANGE); + aes_init_keysched(key->ck_data, key->ck_length, newbie); return (CRYPTO_SUCCESS); } -/* - * KCF software provider control entry points. - */ -/* ARGSUSED */ -static void -aes_provider_status(crypto_provider_handle_t provider, uint_t *status) -{ - *status = CRYPTO_PROVIDER_READY; -} - static int aes_encrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_spi_ctx_template_t template, - crypto_req_handle_t req) + crypto_key_t *key, crypto_spi_ctx_template_t template) { - return (aes_common_init(ctx, mechanism, key, template, req, B_TRUE)); + return (aes_common_init(ctx, mechanism, key, template, B_TRUE)); } static int aes_decrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_spi_ctx_template_t template, - crypto_req_handle_t req) + crypto_key_t *key, crypto_spi_ctx_template_t template) { - return (aes_common_init(ctx, mechanism, key, template, req, B_FALSE)); + return (aes_common_init(ctx, mechanism, key, template, B_FALSE)); } @@ -342,25 +261,16 @@ aes_decrypt_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, static int aes_common_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_spi_ctx_template_t template, - crypto_req_handle_t req, boolean_t is_encrypt_init) + boolean_t is_encrypt_init) { aes_ctx_t *aes_ctx; int rv; - int kmflag; - - /* - * Only keys by value are supported by this module. - */ - if (key->ck_format != CRYPTO_KEY_RAW) { - return (CRYPTO_KEY_TYPE_INCONSISTENT); - } - kmflag = crypto_kmflag(req); - if ((rv = aes_check_mech_param(mechanism, &aes_ctx, kmflag)) + if ((rv = aes_check_mech_param(mechanism, &aes_ctx)) != CRYPTO_SUCCESS) return (rv); - rv = aes_common_init_ctx(aes_ctx, template, mechanism, key, kmflag, + rv = aes_common_init_ctx(aes_ctx, template, mechanism, key, KM_SLEEP, is_encrypt_init); if (rv != CRYPTO_SUCCESS) { crypto_free_mode_ctx(aes_ctx); @@ -390,7 +300,7 @@ aes_copy_block64(uint8_t *in, uint64_t *out) static int aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext, - crypto_data_t *ciphertext, crypto_req_handle_t req) + crypto_data_t *ciphertext) { int ret = CRYPTO_FAILED; @@ -442,7 +352,7 @@ aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext, /* * Do an update on the specified input data. */ - ret = aes_encrypt_update(ctx, plaintext, ciphertext, req); + ret = aes_encrypt_update(ctx, plaintext, ciphertext); if (ret != CRYPTO_SUCCESS) { return (ret); } @@ -505,7 +415,7 @@ aes_encrypt(crypto_ctx_t *ctx, crypto_data_t *plaintext, static int aes_decrypt(crypto_ctx_t *ctx, crypto_data_t *ciphertext, - crypto_data_t *plaintext, crypto_req_handle_t req) + crypto_data_t *plaintext) { int ret = CRYPTO_FAILED; @@ -563,7 +473,7 @@ aes_decrypt(crypto_ctx_t *ctx, crypto_data_t *ciphertext, /* * Do an update on the specified input data. */ - ret = aes_decrypt_update(ctx, ciphertext, plaintext, req); + ret = aes_decrypt_update(ctx, ciphertext, plaintext); if (ret != CRYPTO_SUCCESS) { goto cleanup; } @@ -617,10 +527,9 @@ cleanup: } -/* ARGSUSED */ static int aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext, - crypto_data_t *ciphertext, crypto_req_handle_t req) + crypto_data_t *ciphertext) { off_t saved_offset; size_t saved_length, out_len; @@ -652,13 +561,11 @@ aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext, switch (plaintext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(ctx->cc_provider_private, - plaintext, ciphertext, aes_encrypt_contiguous_blocks, - aes_copy_block64); + plaintext, ciphertext, aes_encrypt_contiguous_blocks); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(ctx->cc_provider_private, - plaintext, ciphertext, aes_encrypt_contiguous_blocks, - aes_copy_block64); + plaintext, ciphertext, aes_encrypt_contiguous_blocks); break; default: ret = CRYPTO_ARGUMENTS_BAD; @@ -690,7 +597,7 @@ aes_encrypt_update(crypto_ctx_t *ctx, crypto_data_t *plaintext, static int aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext, - crypto_data_t *plaintext, crypto_req_handle_t req) + crypto_data_t *plaintext) { off_t saved_offset; size_t saved_length, out_len; @@ -722,22 +629,17 @@ aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext, saved_offset = plaintext->cd_offset; saved_length = plaintext->cd_length; - if (aes_ctx->ac_flags & (GCM_MODE|GMAC_MODE)) - gcm_set_kmflag((gcm_ctx_t *)aes_ctx, crypto_kmflag(req)); - /* * Do the AES update on the specified input data. */ switch (ciphertext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(ctx->cc_provider_private, - ciphertext, plaintext, aes_decrypt_contiguous_blocks, - aes_copy_block64); + ciphertext, plaintext, aes_decrypt_contiguous_blocks); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(ctx->cc_provider_private, - ciphertext, plaintext, aes_decrypt_contiguous_blocks, - aes_copy_block64); + ciphertext, plaintext, aes_decrypt_contiguous_blocks); break; default: ret = CRYPTO_ARGUMENTS_BAD; @@ -769,10 +671,8 @@ aes_decrypt_update(crypto_ctx_t *ctx, crypto_data_t *ciphertext, return (ret); } -/* ARGSUSED */ static int -aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data, - crypto_req_handle_t req) +aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data) { aes_ctx_t *aes_ctx; int ret; @@ -826,10 +726,8 @@ aes_encrypt_final(crypto_ctx_t *ctx, crypto_data_t *data, return (CRYPTO_SUCCESS); } -/* ARGSUSED */ static int -aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data, - crypto_req_handle_t req) +aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data) { aes_ctx_t *aes_ctx; int ret; @@ -929,14 +827,12 @@ aes_decrypt_final(crypto_ctx_t *ctx, crypto_data_t *data, return (CRYPTO_SUCCESS); } -/* ARGSUSED */ static int -aes_encrypt_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, +aes_encrypt_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *plaintext, crypto_data_t *ciphertext, - crypto_spi_ctx_template_t template, crypto_req_handle_t req) + crypto_spi_ctx_template_t template) { - aes_ctx_t aes_ctx; /* on the stack */ + aes_ctx_t aes_ctx = {{{{0}}}}; off_t saved_offset; size_t saved_length; size_t length_needed; @@ -959,13 +855,11 @@ aes_encrypt_atomic(crypto_provider_handle_t provider, return (CRYPTO_DATA_LEN_RANGE); } - if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS) + if ((ret = aes_check_mech_param(mechanism, NULL)) != CRYPTO_SUCCESS) return (ret); - bzero(&aes_ctx, sizeof (aes_ctx_t)); - ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key, - crypto_kmflag(req), B_TRUE); + KM_SLEEP, B_TRUE); if (ret != CRYPTO_SUCCESS) return (ret); @@ -976,7 +870,7 @@ aes_encrypt_atomic(crypto_provider_handle_t provider, case AES_GMAC_MECH_INFO_TYPE: if (plaintext->cd_length != 0) return (CRYPTO_ARGUMENTS_BAD); - fallthrough; + zfs_fallthrough; case AES_GCM_MECH_INFO_TYPE: length_needed = plaintext->cd_length + aes_ctx.ac_tag_len; break; @@ -1000,11 +894,11 @@ aes_encrypt_atomic(crypto_provider_handle_t provider, switch (plaintext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(&aes_ctx, plaintext, ciphertext, - aes_encrypt_contiguous_blocks, aes_copy_block64); + aes_encrypt_contiguous_blocks); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(&aes_ctx, plaintext, ciphertext, - aes_encrypt_contiguous_blocks, aes_copy_block64); + aes_encrypt_contiguous_blocks); break; default: ret = CRYPTO_ARGUMENTS_BAD; @@ -1048,31 +942,21 @@ aes_encrypt_atomic(crypto_provider_handle_t provider, out: if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { - bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); + memset(aes_ctx.ac_keysched, 0, aes_ctx.ac_keysched_len); kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); } -#ifdef CAN_USE_GCM_ASM - if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE) && - ((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) { - - gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx; - - bzero(ctx->gcm_Htable, ctx->gcm_htab_len); - kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); + if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE)) { + gcm_clear_ctx((gcm_ctx_t *)&aes_ctx); } -#endif - return (ret); } -/* ARGSUSED */ static int -aes_decrypt_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, +aes_decrypt_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *ciphertext, crypto_data_t *plaintext, - crypto_spi_ctx_template_t template, crypto_req_handle_t req) + crypto_spi_ctx_template_t template) { - aes_ctx_t aes_ctx; /* on the stack */ + aes_ctx_t aes_ctx = {{{{0}}}}; off_t saved_offset; size_t saved_length; size_t length_needed; @@ -1095,13 +979,11 @@ aes_decrypt_atomic(crypto_provider_handle_t provider, return (CRYPTO_ENCRYPTED_DATA_LEN_RANGE); } - if ((ret = aes_check_mech_param(mechanism, NULL, 0)) != CRYPTO_SUCCESS) + if ((ret = aes_check_mech_param(mechanism, NULL)) != CRYPTO_SUCCESS) return (ret); - bzero(&aes_ctx, sizeof (aes_ctx_t)); - ret = aes_common_init_ctx(&aes_ctx, template, mechanism, key, - crypto_kmflag(req), B_FALSE); + KM_SLEEP, B_FALSE); if (ret != CRYPTO_SUCCESS) return (ret); @@ -1131,21 +1013,17 @@ aes_decrypt_atomic(crypto_provider_handle_t provider, saved_offset = plaintext->cd_offset; saved_length = plaintext->cd_length; - if (mechanism->cm_type == AES_GCM_MECH_INFO_TYPE || - mechanism->cm_type == AES_GMAC_MECH_INFO_TYPE) - gcm_set_kmflag((gcm_ctx_t *)&aes_ctx, crypto_kmflag(req)); - /* * Do an update on the specified input data. */ switch (ciphertext->cd_format) { case CRYPTO_DATA_RAW: ret = crypto_update_iov(&aes_ctx, ciphertext, plaintext, - aes_decrypt_contiguous_blocks, aes_copy_block64); + aes_decrypt_contiguous_blocks); break; case CRYPTO_DATA_UIO: ret = crypto_update_uio(&aes_ctx, ciphertext, plaintext, - aes_decrypt_contiguous_blocks, aes_copy_block64); + aes_decrypt_contiguous_blocks); break; default: ret = CRYPTO_ARGUMENTS_BAD; @@ -1206,7 +1084,7 @@ aes_decrypt_atomic(crypto_provider_handle_t provider, out: if (aes_ctx.ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { - bzero(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); + memset(aes_ctx.ac_keysched, 0, aes_ctx.ac_keysched_len); kmem_free(aes_ctx.ac_keysched, aes_ctx.ac_keysched_len); } @@ -1215,18 +1093,7 @@ out: vmem_free(aes_ctx.ac_pt_buf, aes_ctx.ac_data_len); } } else if (aes_ctx.ac_flags & (GCM_MODE|GMAC_MODE)) { - if (((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf != NULL) { - vmem_free(((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf, - ((gcm_ctx_t *)&aes_ctx)->gcm_pt_buf_len); - } -#ifdef CAN_USE_GCM_ASM - if (((gcm_ctx_t *)&aes_ctx)->gcm_Htable != NULL) { - gcm_ctx_t *ctx = (gcm_ctx_t *)&aes_ctx; - - bzero(ctx->gcm_Htable, ctx->gcm_htab_len); - kmem_free(ctx->gcm_Htable, ctx->gcm_htab_len); - } -#endif + gcm_clear_ctx((gcm_ctx_t *)&aes_ctx); } return (ret); @@ -1235,11 +1102,9 @@ out: /* * KCF software provider context template entry points. */ -/* ARGSUSED */ static int -aes_create_ctx_template(crypto_provider_handle_t provider, - crypto_mechanism_t *mechanism, crypto_key_t *key, - crypto_spi_ctx_template_t *tmpl, size_t *tmpl_size, crypto_req_handle_t req) +aes_create_ctx_template(crypto_mechanism_t *mechanism, crypto_key_t *key, + crypto_spi_ctx_template_t *tmpl, size_t *tmpl_size) { void *keysched; size_t size; @@ -1253,8 +1118,7 @@ aes_create_ctx_template(crypto_provider_handle_t provider, mechanism->cm_type != AES_GMAC_MECH_INFO_TYPE) return (CRYPTO_MECHANISM_INVALID); - if ((keysched = aes_alloc_keysched(&size, - crypto_kmflag(req))) == NULL) { + if ((keysched = aes_alloc_keysched(&size, KM_SLEEP)) == NULL) { return (CRYPTO_HOST_MEMORY); } @@ -1263,7 +1127,7 @@ aes_create_ctx_template(crypto_provider_handle_t provider, * in the key. */ if ((rv = init_keysched(key, keysched)) != CRYPTO_SUCCESS) { - bzero(keysched, size); + memset(keysched, 0, size); kmem_free(keysched, size); return (rv); } @@ -1283,7 +1147,8 @@ aes_free_context(crypto_ctx_t *ctx) if (aes_ctx != NULL) { if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { ASSERT(aes_ctx->ac_keysched_len != 0); - bzero(aes_ctx->ac_keysched, aes_ctx->ac_keysched_len); + memset(aes_ctx->ac_keysched, 0, + aes_ctx->ac_keysched_len); kmem_free(aes_ctx->ac_keysched, aes_ctx->ac_keysched_len); } @@ -1373,7 +1238,7 @@ aes_common_init_ctx(aes_ctx_t *aes_ctx, crypto_spi_ctx_template_t *template, if (rv != CRYPTO_SUCCESS) { if (aes_ctx->ac_flags & PROVIDER_OWNS_KEY_SCHEDULE) { - bzero(keysched, size); + memset(keysched, 0, size); kmem_free(keysched, size); } } @@ -1413,10 +1278,9 @@ process_gmac_mech(crypto_mechanism_t *mech, crypto_data_t *data, } static int -aes_mac_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, +aes_mac_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, - crypto_spi_ctx_template_t template, crypto_req_handle_t req) + crypto_spi_ctx_template_t template) { CK_AES_GCM_PARAMS gcm_params; crypto_mechanism_t gcm_mech; @@ -1430,15 +1294,13 @@ aes_mac_atomic(crypto_provider_handle_t provider, gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); gcm_mech.cm_param = (char *)&gcm_params; - return (aes_encrypt_atomic(provider, session_id, &gcm_mech, - key, &null_crypto_data, mac, template, req)); + return (aes_encrypt_atomic(&gcm_mech, + key, &null_crypto_data, mac, template)); } static int -aes_mac_verify_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, - crypto_spi_ctx_template_t template, crypto_req_handle_t req) +aes_mac_verify_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, + crypto_data_t *data, crypto_data_t *mac, crypto_spi_ctx_template_t template) { CK_AES_GCM_PARAMS gcm_params; crypto_mechanism_t gcm_mech; @@ -1452,6 +1314,6 @@ aes_mac_verify_atomic(crypto_provider_handle_t provider, gcm_mech.cm_param_len = sizeof (CK_AES_GCM_PARAMS); gcm_mech.cm_param = (char *)&gcm_params; - return (aes_decrypt_atomic(provider, session_id, &gcm_mech, - key, mac, &null_crypto_data, template, req)); + return (aes_decrypt_atomic(&gcm_mech, + key, mac, &null_crypto_data, template)); } diff --git a/sys/contrib/openzfs/module/icp/io/edonr_mod.c b/sys/contrib/openzfs/module/icp/io/edonr_mod.c deleted file mode 100644 index a806af610629..000000000000 --- a/sys/contrib/openzfs/module/icp/io/edonr_mod.c +++ /dev/null @@ -1,63 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://opensource.org/licenses/CDDL-1.0. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2013 Saso Kiselkov. All rights reserved. - */ - -#include <sys/modctl.h> -#include <sys/crypto/common.h> -#include <sys/crypto/icp.h> -#include <sys/crypto/spi.h> -#include <sys/sysmacros.h> -#include <sys/edonr.h> - -/* - * Unlike sha2 or skein, we won't expose edonr via the Kernel Cryptographic - * Framework (KCF), because Edon-R is *NOT* suitable for general-purpose - * cryptographic use. Users of Edon-R must interface directly to this module. - */ - -static struct modlmisc modlmisc = { - &mod_cryptoops, - "Edon-R Message-Digest Algorithm" -}; - -static struct modlinkage modlinkage = { - MODREV_1, {&modlmisc, NULL} -}; - -int -edonr_mod_init(void) -{ - int error; - - if ((error = mod_install(&modlinkage)) != 0) - return (error); - - return (0); -} - -int -edonr_mod_fini(void) -{ - return (mod_remove(&modlinkage)); -} diff --git a/sys/contrib/openzfs/module/icp/io/sha1_mod.c b/sys/contrib/openzfs/module/icp/io/sha1_mod.c deleted file mode 100644 index 6dcee6b2ecf2..000000000000 --- a/sys/contrib/openzfs/module/icp/io/sha1_mod.c +++ /dev/null @@ -1,1230 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ - -/* - * Copyright 2010 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/zfs_context.h> -#include <sys/modctl.h> -#include <sys/crypto/common.h> -#include <sys/crypto/icp.h> -#include <sys/crypto/spi.h> - -#include <sha1/sha1.h> -#include <sha1/sha1_impl.h> - -/* - * The sha1 module is created with two modlinkages: - * - a modlmisc that allows consumers to directly call the entry points - * SHA1Init, SHA1Update, and SHA1Final. - * - a modlcrypto that allows the module to register with the Kernel - * Cryptographic Framework (KCF) as a software provider for the SHA1 - * mechanisms. - */ - -static struct modlcrypto modlcrypto = { - &mod_cryptoops, - "SHA1 Kernel SW Provider 1.1" -}; - -static struct modlinkage modlinkage = { - MODREV_1, { &modlcrypto, NULL } -}; - - -/* - * Macros to access the SHA1 or SHA1-HMAC contexts from a context passed - * by KCF to one of the entry points. - */ - -#define PROV_SHA1_CTX(ctx) ((sha1_ctx_t *)(ctx)->cc_provider_private) -#define PROV_SHA1_HMAC_CTX(ctx) ((sha1_hmac_ctx_t *)(ctx)->cc_provider_private) - -/* to extract the digest length passed as mechanism parameter */ -#define PROV_SHA1_GET_DIGEST_LEN(m, len) { \ - if (IS_P2ALIGNED((m)->cm_param, sizeof (ulong_t))) \ - (len) = (uint32_t)*((ulong_t *)(void *)mechanism->cm_param); \ - else { \ - ulong_t tmp_ulong; \ - bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \ - (len) = (uint32_t)tmp_ulong; \ - } \ -} - -#define PROV_SHA1_DIGEST_KEY(ctx, key, len, digest) { \ - SHA1Init(ctx); \ - SHA1Update(ctx, key, len); \ - SHA1Final(digest, ctx); \ -} - -/* - * Mechanism info structure passed to KCF during registration. - */ -static crypto_mech_info_t sha1_mech_info_tab[] = { - /* SHA1 */ - {SUN_CKM_SHA1, SHA1_MECH_INFO_TYPE, - CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, - 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, - /* SHA1-HMAC */ - {SUN_CKM_SHA1_HMAC, SHA1_HMAC_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, - /* SHA1-HMAC GENERAL */ - {SUN_CKM_SHA1_HMAC_GENERAL, SHA1_HMAC_GEN_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA1_HMAC_MIN_KEY_LEN, SHA1_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES} -}; - -static void sha1_provider_status(crypto_provider_handle_t, uint_t *); - -static crypto_control_ops_t sha1_control_ops = { - sha1_provider_status -}; - -static int sha1_digest_init(crypto_ctx_t *, crypto_mechanism_t *, - crypto_req_handle_t); -static int sha1_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha1_digest_update(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha1_digest_final(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha1_digest_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); - -static crypto_digest_ops_t sha1_digest_ops = { - .digest_init = sha1_digest_init, - .digest = sha1_digest, - .digest_update = sha1_digest_update, - .digest_key = NULL, - .digest_final = sha1_digest_final, - .digest_atomic = sha1_digest_atomic -}; - -static int sha1_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); -static int sha1_mac_update(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha1_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); -static int sha1_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); -static int sha1_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); - -static crypto_mac_ops_t sha1_mac_ops = { - .mac_init = sha1_mac_init, - .mac = NULL, - .mac_update = sha1_mac_update, - .mac_final = sha1_mac_final, - .mac_atomic = sha1_mac_atomic, - .mac_verify_atomic = sha1_mac_verify_atomic -}; - -static int sha1_create_ctx_template(crypto_provider_handle_t, - crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, - size_t *, crypto_req_handle_t); -static int sha1_free_context(crypto_ctx_t *); - -static crypto_ctx_ops_t sha1_ctx_ops = { - .create_ctx_template = sha1_create_ctx_template, - .free_context = sha1_free_context -}; - -static crypto_ops_t sha1_crypto_ops = {{{{{ - &sha1_control_ops, - &sha1_digest_ops, - NULL, - &sha1_mac_ops, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - &sha1_ctx_ops, -}}}}}; - -static crypto_provider_info_t sha1_prov_info = {{{{ - CRYPTO_SPI_VERSION_1, - "SHA1 Software Provider", - CRYPTO_SW_PROVIDER, - NULL, - &sha1_crypto_ops, - sizeof (sha1_mech_info_tab)/sizeof (crypto_mech_info_t), - sha1_mech_info_tab -}}}}; - -static crypto_kcf_provider_handle_t sha1_prov_handle = 0; - -int -sha1_mod_init(void) -{ - int ret; - - if ((ret = mod_install(&modlinkage)) != 0) - return (ret); - - /* - * Register with KCF. If the registration fails, log an - * error but do not uninstall the module, since the functionality - * provided by misc/sha1 should still be available. - */ - if ((ret = crypto_register_provider(&sha1_prov_info, - &sha1_prov_handle)) != CRYPTO_SUCCESS) - cmn_err(CE_WARN, "sha1 _init: " - "crypto_register_provider() failed (0x%x)", ret); - - return (0); -} - -int -sha1_mod_fini(void) -{ - int ret; - - if (sha1_prov_handle != 0) { - if ((ret = crypto_unregister_provider(sha1_prov_handle)) != - CRYPTO_SUCCESS) { - cmn_err(CE_WARN, - "sha1 _fini: crypto_unregister_provider() " - "failed (0x%x)", ret); - return (EBUSY); - } - sha1_prov_handle = 0; - } - - return (mod_remove(&modlinkage)); -} - -/* - * KCF software provider control entry points. - */ -/* ARGSUSED */ -static void -sha1_provider_status(crypto_provider_handle_t provider, uint_t *status) -{ - *status = CRYPTO_PROVIDER_READY; -} - -/* - * KCF software provider digest entry points. - */ - -static int -sha1_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_req_handle_t req) -{ - if (mechanism->cm_type != SHA1_MECH_INFO_TYPE) - return (CRYPTO_MECHANISM_INVALID); - - /* - * Allocate and initialize SHA1 context. - */ - ctx->cc_provider_private = kmem_alloc(sizeof (sha1_ctx_t), - crypto_kmflag(req)); - if (ctx->cc_provider_private == NULL) - return (CRYPTO_HOST_MEMORY); - - PROV_SHA1_CTX(ctx)->sc_mech_type = SHA1_MECH_INFO_TYPE; - SHA1Init(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx); - - return (CRYPTO_SUCCESS); -} - -/* - * Helper SHA1 digest update function for uio data. - */ -static int -sha1_digest_update_uio(SHA1_CTX *sha1_ctx, crypto_data_t *data) -{ - off_t offset = data->cd_offset; - size_t length = data->cd_length; - uint_t vec_idx = 0; - size_t cur_len; - - /* we support only kernel buffer */ - if (zfs_uio_segflg(data->cd_uio) != UIO_SYSSPACE) - return (CRYPTO_ARGUMENTS_BAD); - - /* - * Jump to the first iovec containing data to be - * digested. - */ - offset = zfs_uio_index_at_offset(data->cd_uio, offset, &vec_idx); - if (vec_idx == zfs_uio_iovcnt(data->cd_uio)) { - /* - * The caller specified an offset that is larger than the - * total size of the buffers it provided. - */ - return (CRYPTO_DATA_LEN_RANGE); - } - - /* - * Now do the digesting on the iovecs. - */ - while (vec_idx < zfs_uio_iovcnt(data->cd_uio) && length > 0) { - cur_len = MIN(zfs_uio_iovlen(data->cd_uio, vec_idx) - - offset, length); - - SHA1Update(sha1_ctx, - (uint8_t *)zfs_uio_iovbase(data->cd_uio, vec_idx) + offset, - cur_len); - - length -= cur_len; - vec_idx++; - offset = 0; - } - - if (vec_idx == zfs_uio_iovcnt(data->cd_uio) && length > 0) { - /* - * The end of the specified iovec's was reached but - * the length requested could not be processed, i.e. - * The caller requested to digest more data than it provided. - */ - return (CRYPTO_DATA_LEN_RANGE); - } - - return (CRYPTO_SUCCESS); -} - -/* - * Helper SHA1 digest final function for uio data. - * digest_len is the length of the desired digest. If digest_len - * is smaller than the default SHA1 digest length, the caller - * must pass a scratch buffer, digest_scratch, which must - * be at least SHA1_DIGEST_LENGTH bytes. - */ -static int -sha1_digest_final_uio(SHA1_CTX *sha1_ctx, crypto_data_t *digest, - ulong_t digest_len, uchar_t *digest_scratch) -{ - off_t offset = digest->cd_offset; - uint_t vec_idx = 0; - - /* we support only kernel buffer */ - if (zfs_uio_segflg(digest->cd_uio) != UIO_SYSSPACE) - return (CRYPTO_ARGUMENTS_BAD); - - /* - * Jump to the first iovec containing ptr to the digest to - * be returned. - */ - offset = zfs_uio_index_at_offset(digest->cd_uio, offset, &vec_idx); - if (vec_idx == zfs_uio_iovcnt(digest->cd_uio)) { - /* - * The caller specified an offset that is - * larger than the total size of the buffers - * it provided. - */ - return (CRYPTO_DATA_LEN_RANGE); - } - - if (offset + digest_len <= - zfs_uio_iovlen(digest->cd_uio, vec_idx)) { - /* - * The computed SHA1 digest will fit in the current - * iovec. - */ - if (digest_len != SHA1_DIGEST_LENGTH) { - /* - * The caller requested a short digest. Digest - * into a scratch buffer and return to - * the user only what was requested. - */ - SHA1Final(digest_scratch, sha1_ctx); - bcopy(digest_scratch, (uchar_t *) - zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, - digest_len); - } else { - SHA1Final((uchar_t *)zfs_uio_iovbase(digest-> - cd_uio, vec_idx) + offset, - sha1_ctx); - } - } else { - /* - * The computed digest will be crossing one or more iovec's. - * This is bad performance-wise but we need to support it. - * Allocate a small scratch buffer on the stack and - * copy it piece meal to the specified digest iovec's. - */ - uchar_t digest_tmp[SHA1_DIGEST_LENGTH]; - off_t scratch_offset = 0; - size_t length = digest_len; - size_t cur_len; - - SHA1Final(digest_tmp, sha1_ctx); - - while (vec_idx < zfs_uio_iovcnt(digest->cd_uio) && length > 0) { - cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) - - offset, length); - bcopy(digest_tmp + scratch_offset, - zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, - cur_len); - - length -= cur_len; - vec_idx++; - scratch_offset += cur_len; - offset = 0; - } - - if (vec_idx == zfs_uio_iovcnt(digest->cd_uio) && length > 0) { - /* - * The end of the specified iovec's was reached but - * the length requested could not be processed, i.e. - * The caller requested to digest more data than it - * provided. - */ - return (CRYPTO_DATA_LEN_RANGE); - } - } - - return (CRYPTO_SUCCESS); -} - -/* ARGSUSED */ -static int -sha1_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, - crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - - ASSERT(ctx->cc_provider_private != NULL); - - /* - * We need to just return the length needed to store the output. - * We should not destroy the context for the following cases. - */ - if ((digest->cd_length == 0) || - (digest->cd_length < SHA1_DIGEST_LENGTH)) { - digest->cd_length = SHA1_DIGEST_LENGTH; - return (CRYPTO_BUFFER_TOO_SMALL); - } - - /* - * Do the SHA1 update on the specified input data. - */ - switch (data->cd_format) { - case CRYPTO_DATA_RAW: - SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, - (uint8_t *)data->cd_raw.iov_base + data->cd_offset, - data->cd_length); - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, - data); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - if (ret != CRYPTO_SUCCESS) { - /* the update failed, free context and bail */ - kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t)); - ctx->cc_provider_private = NULL; - digest->cd_length = 0; - return (ret); - } - - /* - * Do a SHA1 final, must be done separately since the digest - * type can be different than the input data type. - */ - switch (digest->cd_format) { - case CRYPTO_DATA_RAW: - SHA1Final((unsigned char *)digest->cd_raw.iov_base + - digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx); - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, - digest, SHA1_DIGEST_LENGTH, NULL); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - /* all done, free context and return */ - - if (ret == CRYPTO_SUCCESS) { - digest->cd_length = SHA1_DIGEST_LENGTH; - } else { - digest->cd_length = 0; - } - - kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t)); - ctx->cc_provider_private = NULL; - return (ret); -} - -/* ARGSUSED */ -static int -sha1_digest_update(crypto_ctx_t *ctx, crypto_data_t *data, - crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - - ASSERT(ctx->cc_provider_private != NULL); - - /* - * Do the SHA1 update on the specified input data. - */ - switch (data->cd_format) { - case CRYPTO_DATA_RAW: - SHA1Update(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, - (uint8_t *)data->cd_raw.iov_base + data->cd_offset, - data->cd_length); - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_update_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, - data); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - return (ret); -} - -/* ARGSUSED */ -static int -sha1_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest, - crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - - ASSERT(ctx->cc_provider_private != NULL); - - /* - * We need to just return the length needed to store the output. - * We should not destroy the context for the following cases. - */ - if ((digest->cd_length == 0) || - (digest->cd_length < SHA1_DIGEST_LENGTH)) { - digest->cd_length = SHA1_DIGEST_LENGTH; - return (CRYPTO_BUFFER_TOO_SMALL); - } - - /* - * Do a SHA1 final. - */ - switch (digest->cd_format) { - case CRYPTO_DATA_RAW: - SHA1Final((unsigned char *)digest->cd_raw.iov_base + - digest->cd_offset, &PROV_SHA1_CTX(ctx)->sc_sha1_ctx); - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_final_uio(&PROV_SHA1_CTX(ctx)->sc_sha1_ctx, - digest, SHA1_DIGEST_LENGTH, NULL); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - /* all done, free context and return */ - - if (ret == CRYPTO_SUCCESS) { - digest->cd_length = SHA1_DIGEST_LENGTH; - } else { - digest->cd_length = 0; - } - - kmem_free(ctx->cc_provider_private, sizeof (sha1_ctx_t)); - ctx->cc_provider_private = NULL; - - return (ret); -} - -/* ARGSUSED */ -static int -sha1_digest_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, - crypto_data_t *data, crypto_data_t *digest, - crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - SHA1_CTX sha1_ctx; - - if (mechanism->cm_type != SHA1_MECH_INFO_TYPE) - return (CRYPTO_MECHANISM_INVALID); - - /* - * Do the SHA1 init. - */ - SHA1Init(&sha1_ctx); - - /* - * Do the SHA1 update on the specified input data. - */ - switch (data->cd_format) { - case CRYPTO_DATA_RAW: - SHA1Update(&sha1_ctx, - (uint8_t *)data->cd_raw.iov_base + data->cd_offset, - data->cd_length); - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_update_uio(&sha1_ctx, data); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - if (ret != CRYPTO_SUCCESS) { - /* the update failed, bail */ - digest->cd_length = 0; - return (ret); - } - - /* - * Do a SHA1 final, must be done separately since the digest - * type can be different than the input data type. - */ - switch (digest->cd_format) { - case CRYPTO_DATA_RAW: - SHA1Final((unsigned char *)digest->cd_raw.iov_base + - digest->cd_offset, &sha1_ctx); - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_final_uio(&sha1_ctx, digest, - SHA1_DIGEST_LENGTH, NULL); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - if (ret == CRYPTO_SUCCESS) { - digest->cd_length = SHA1_DIGEST_LENGTH; - } else { - digest->cd_length = 0; - } - - return (ret); -} - -/* - * KCF software provider mac entry points. - * - * SHA1 HMAC is: SHA1(key XOR opad, SHA1(key XOR ipad, text)) - * - * Init: - * The initialization routine initializes what we denote - * as the inner and outer contexts by doing - * - for inner context: SHA1(key XOR ipad) - * - for outer context: SHA1(key XOR opad) - * - * Update: - * Each subsequent SHA1 HMAC update will result in an - * update of the inner context with the specified data. - * - * Final: - * The SHA1 HMAC final will do a SHA1 final operation on the - * inner context, and the resulting digest will be used - * as the data for an update on the outer context. Last - * but not least, a SHA1 final on the outer context will - * be performed to obtain the SHA1 HMAC digest to return - * to the user. - */ - -/* - * Initialize a SHA1-HMAC context. - */ -static void -sha1_mac_init_ctx(sha1_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes) -{ - uint32_t ipad[SHA1_HMAC_INTS_PER_BLOCK]; - uint32_t opad[SHA1_HMAC_INTS_PER_BLOCK]; - uint_t i; - - bzero(ipad, SHA1_HMAC_BLOCK_SIZE); - bzero(opad, SHA1_HMAC_BLOCK_SIZE); - - bcopy(keyval, ipad, length_in_bytes); - bcopy(keyval, opad, length_in_bytes); - - /* XOR key with ipad (0x36) and opad (0x5c) */ - for (i = 0; i < SHA1_HMAC_INTS_PER_BLOCK; i++) { - ipad[i] ^= 0x36363636; - opad[i] ^= 0x5c5c5c5c; - } - - /* perform SHA1 on ipad */ - SHA1Init(&ctx->hc_icontext); - SHA1Update(&ctx->hc_icontext, (uint8_t *)ipad, SHA1_HMAC_BLOCK_SIZE); - - /* perform SHA1 on opad */ - SHA1Init(&ctx->hc_ocontext); - SHA1Update(&ctx->hc_ocontext, (uint8_t *)opad, SHA1_HMAC_BLOCK_SIZE); -} - -/* - */ -static int -sha1_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, - crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); - - if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE && - mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE) - return (CRYPTO_MECHANISM_INVALID); - - /* Add support for key by attributes (RFE 4706552) */ - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - - ctx->cc_provider_private = kmem_alloc(sizeof (sha1_hmac_ctx_t), - crypto_kmflag(req)); - if (ctx->cc_provider_private == NULL) - return (CRYPTO_HOST_MEMORY); - - if (ctx_template != NULL) { - /* reuse context template */ - bcopy(ctx_template, PROV_SHA1_HMAC_CTX(ctx), - sizeof (sha1_hmac_ctx_t)); - } else { - /* no context template, compute context */ - if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { - uchar_t digested_key[SHA1_DIGEST_LENGTH]; - sha1_hmac_ctx_t *hmac_ctx = ctx->cc_provider_private; - - /* - * Hash the passed-in key to get a smaller key. - * The inner context is used since it hasn't been - * initialized yet. - */ - PROV_SHA1_DIGEST_KEY(&hmac_ctx->hc_icontext, - key->ck_data, keylen_in_bytes, digested_key); - sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx), - digested_key, SHA1_DIGEST_LENGTH); - } else { - sha1_mac_init_ctx(PROV_SHA1_HMAC_CTX(ctx), - key->ck_data, keylen_in_bytes); - } - } - - /* - * Get the mechanism parameters, if applicable. - */ - PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type; - if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) { - if (mechanism->cm_param == NULL || - mechanism->cm_param_len != sizeof (ulong_t)) - ret = CRYPTO_MECHANISM_PARAM_INVALID; - PROV_SHA1_GET_DIGEST_LEN(mechanism, - PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len); - if (PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len > - SHA1_DIGEST_LENGTH) - ret = CRYPTO_MECHANISM_PARAM_INVALID; - } - - if (ret != CRYPTO_SUCCESS) { - bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); - kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); - ctx->cc_provider_private = NULL; - } - - return (ret); -} - -/* ARGSUSED */ -static int -sha1_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - - ASSERT(ctx->cc_provider_private != NULL); - - /* - * Do a SHA1 update of the inner context using the specified - * data. - */ - switch (data->cd_format) { - case CRYPTO_DATA_RAW: - SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, - (uint8_t *)data->cd_raw.iov_base + data->cd_offset, - data->cd_length); - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_update_uio( - &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext, data); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - return (ret); -} - -/* ARGSUSED */ -static int -sha1_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - uchar_t digest[SHA1_DIGEST_LENGTH]; - uint32_t digest_len = SHA1_DIGEST_LENGTH; - - ASSERT(ctx->cc_provider_private != NULL); - - if (PROV_SHA1_HMAC_CTX(ctx)->hc_mech_type == - SHA1_HMAC_GEN_MECH_INFO_TYPE) - digest_len = PROV_SHA1_HMAC_CTX(ctx)->hc_digest_len; - - /* - * We need to just return the length needed to store the output. - * We should not destroy the context for the following cases. - */ - if ((mac->cd_length == 0) || (mac->cd_length < digest_len)) { - mac->cd_length = digest_len; - return (CRYPTO_BUFFER_TOO_SMALL); - } - - /* - * Do a SHA1 final on the inner context. - */ - SHA1Final(digest, &PROV_SHA1_HMAC_CTX(ctx)->hc_icontext); - - /* - * Do a SHA1 update on the outer context, feeding the inner - * digest as data. - */ - SHA1Update(&PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, digest, - SHA1_DIGEST_LENGTH); - - /* - * Do a SHA1 final on the outer context, storing the computing - * digest in the users buffer. - */ - switch (mac->cd_format) { - case CRYPTO_DATA_RAW: - if (digest_len != SHA1_DIGEST_LENGTH) { - /* - * The caller requested a short digest. Digest - * into a scratch buffer and return to - * the user only what was requested. - */ - SHA1Final(digest, - &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext); - bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + - mac->cd_offset, digest_len); - } else { - SHA1Final((unsigned char *)mac->cd_raw.iov_base + - mac->cd_offset, - &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext); - } - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_final_uio( - &PROV_SHA1_HMAC_CTX(ctx)->hc_ocontext, mac, - digest_len, digest); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - if (ret == CRYPTO_SUCCESS) { - mac->cd_length = digest_len; - } else { - mac->cd_length = 0; - } - - bzero(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); - kmem_free(ctx->cc_provider_private, sizeof (sha1_hmac_ctx_t)); - ctx->cc_provider_private = NULL; - - return (ret); -} - -#define SHA1_MAC_UPDATE(data, ctx, ret) { \ - switch (data->cd_format) { \ - case CRYPTO_DATA_RAW: \ - SHA1Update(&(ctx).hc_icontext, \ - (uint8_t *)data->cd_raw.iov_base + \ - data->cd_offset, data->cd_length); \ - break; \ - case CRYPTO_DATA_UIO: \ - ret = sha1_digest_update_uio(&(ctx).hc_icontext, data); \ - break; \ - default: \ - ret = CRYPTO_ARGUMENTS_BAD; \ - } \ -} - -/* ARGSUSED */ -static int -sha1_mac_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, - crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - uchar_t digest[SHA1_DIGEST_LENGTH]; - sha1_hmac_ctx_t sha1_hmac_ctx; - uint32_t digest_len = SHA1_DIGEST_LENGTH; - uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); - - if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE && - mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE) - return (CRYPTO_MECHANISM_INVALID); - - /* Add support for key by attributes (RFE 4706552) */ - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - - if (ctx_template != NULL) { - /* reuse context template */ - bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); - } else { - /* no context template, initialize context */ - if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { - /* - * Hash the passed-in key to get a smaller key. - * The inner context is used since it hasn't been - * initialized yet. - */ - PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext, - key->ck_data, keylen_in_bytes, digest); - sha1_mac_init_ctx(&sha1_hmac_ctx, digest, - SHA1_DIGEST_LENGTH); - } else { - sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data, - keylen_in_bytes); - } - } - - /* get the mechanism parameters, if applicable */ - if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) { - if (mechanism->cm_param == NULL || - mechanism->cm_param_len != sizeof (ulong_t)) { - ret = CRYPTO_MECHANISM_PARAM_INVALID; - goto bail; - } - PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len); - if (digest_len > SHA1_DIGEST_LENGTH) { - ret = CRYPTO_MECHANISM_PARAM_INVALID; - goto bail; - } - } - - /* do a SHA1 update of the inner context using the specified data */ - SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret); - if (ret != CRYPTO_SUCCESS) - /* the update failed, free context and bail */ - goto bail; - - /* - * Do a SHA1 final on the inner context. - */ - SHA1Final(digest, &sha1_hmac_ctx.hc_icontext); - - /* - * Do an SHA1 update on the outer context, feeding the inner - * digest as data. - */ - SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH); - - /* - * Do a SHA1 final on the outer context, storing the computed - * digest in the users buffer. - */ - switch (mac->cd_format) { - case CRYPTO_DATA_RAW: - if (digest_len != SHA1_DIGEST_LENGTH) { - /* - * The caller requested a short digest. Digest - * into a scratch buffer and return to - * the user only what was requested. - */ - SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext); - bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + - mac->cd_offset, digest_len); - } else { - SHA1Final((unsigned char *)mac->cd_raw.iov_base + - mac->cd_offset, &sha1_hmac_ctx.hc_ocontext); - } - break; - case CRYPTO_DATA_UIO: - ret = sha1_digest_final_uio(&sha1_hmac_ctx.hc_ocontext, mac, - digest_len, digest); - break; - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - if (ret == CRYPTO_SUCCESS) { - mac->cd_length = digest_len; - } else { - mac->cd_length = 0; - } - /* Extra paranoia: zeroize the context on the stack */ - bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); - - return (ret); -bail: - bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); - mac->cd_length = 0; - return (ret); -} - -/* ARGSUSED */ -static int -sha1_mac_verify_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, - crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) -{ - int ret = CRYPTO_SUCCESS; - uchar_t digest[SHA1_DIGEST_LENGTH]; - sha1_hmac_ctx_t sha1_hmac_ctx; - uint32_t digest_len = SHA1_DIGEST_LENGTH; - uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); - - if (mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE && - mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE) - return (CRYPTO_MECHANISM_INVALID); - - /* Add support for key by attributes (RFE 4706552) */ - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - - if (ctx_template != NULL) { - /* reuse context template */ - bcopy(ctx_template, &sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); - } else { - /* no context template, initialize context */ - if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { - /* - * Hash the passed-in key to get a smaller key. - * The inner context is used since it hasn't been - * initialized yet. - */ - PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx.hc_icontext, - key->ck_data, keylen_in_bytes, digest); - sha1_mac_init_ctx(&sha1_hmac_ctx, digest, - SHA1_DIGEST_LENGTH); - } else { - sha1_mac_init_ctx(&sha1_hmac_ctx, key->ck_data, - keylen_in_bytes); - } - } - - /* get the mechanism parameters, if applicable */ - if (mechanism->cm_type == SHA1_HMAC_GEN_MECH_INFO_TYPE) { - if (mechanism->cm_param == NULL || - mechanism->cm_param_len != sizeof (ulong_t)) { - ret = CRYPTO_MECHANISM_PARAM_INVALID; - goto bail; - } - PROV_SHA1_GET_DIGEST_LEN(mechanism, digest_len); - if (digest_len > SHA1_DIGEST_LENGTH) { - ret = CRYPTO_MECHANISM_PARAM_INVALID; - goto bail; - } - } - - if (mac->cd_length != digest_len) { - ret = CRYPTO_INVALID_MAC; - goto bail; - } - - /* do a SHA1 update of the inner context using the specified data */ - SHA1_MAC_UPDATE(data, sha1_hmac_ctx, ret); - if (ret != CRYPTO_SUCCESS) - /* the update failed, free context and bail */ - goto bail; - - /* do a SHA1 final on the inner context */ - SHA1Final(digest, &sha1_hmac_ctx.hc_icontext); - - /* - * Do an SHA1 update on the outer context, feeding the inner - * digest as data. - */ - SHA1Update(&sha1_hmac_ctx.hc_ocontext, digest, SHA1_DIGEST_LENGTH); - - /* - * Do a SHA1 final on the outer context, storing the computed - * digest in the users buffer. - */ - SHA1Final(digest, &sha1_hmac_ctx.hc_ocontext); - - /* - * Compare the computed digest against the expected digest passed - * as argument. - */ - - switch (mac->cd_format) { - - case CRYPTO_DATA_RAW: - if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base + - mac->cd_offset, digest_len) != 0) - ret = CRYPTO_INVALID_MAC; - break; - - case CRYPTO_DATA_UIO: { - off_t offset = mac->cd_offset; - uint_t vec_idx = 0; - off_t scratch_offset = 0; - size_t length = digest_len; - size_t cur_len; - - /* we support only kernel buffer */ - if (zfs_uio_segflg(mac->cd_uio) != UIO_SYSSPACE) - return (CRYPTO_ARGUMENTS_BAD); - - /* jump to the first iovec containing the expected digest */ - offset = zfs_uio_index_at_offset(mac->cd_uio, offset, &vec_idx); - if (vec_idx == zfs_uio_iovcnt(mac->cd_uio)) { - /* - * The caller specified an offset that is - * larger than the total size of the buffers - * it provided. - */ - ret = CRYPTO_DATA_LEN_RANGE; - break; - } - - /* do the comparison of computed digest vs specified one */ - while (vec_idx < zfs_uio_iovcnt(mac->cd_uio) && length > 0) { - cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) - - offset, length); - - if (bcmp(digest + scratch_offset, - zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset, - cur_len) != 0) { - ret = CRYPTO_INVALID_MAC; - break; - } - - length -= cur_len; - vec_idx++; - scratch_offset += cur_len; - offset = 0; - } - break; - } - - default: - ret = CRYPTO_ARGUMENTS_BAD; - } - - bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); - return (ret); -bail: - bzero(&sha1_hmac_ctx, sizeof (sha1_hmac_ctx_t)); - mac->cd_length = 0; - return (ret); -} - -/* - * KCF software provider context management entry points. - */ - -/* ARGSUSED */ -static int -sha1_create_ctx_template(crypto_provider_handle_t provider, - crypto_mechanism_t *mechanism, crypto_key_t *key, - crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, - crypto_req_handle_t req) -{ - sha1_hmac_ctx_t *sha1_hmac_ctx_tmpl; - uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); - - if ((mechanism->cm_type != SHA1_HMAC_MECH_INFO_TYPE) && - (mechanism->cm_type != SHA1_HMAC_GEN_MECH_INFO_TYPE)) { - return (CRYPTO_MECHANISM_INVALID); - } - - /* Add support for key by attributes (RFE 4706552) */ - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - - /* - * Allocate and initialize SHA1 context. - */ - sha1_hmac_ctx_tmpl = kmem_alloc(sizeof (sha1_hmac_ctx_t), - crypto_kmflag(req)); - if (sha1_hmac_ctx_tmpl == NULL) - return (CRYPTO_HOST_MEMORY); - - if (keylen_in_bytes > SHA1_HMAC_BLOCK_SIZE) { - uchar_t digested_key[SHA1_DIGEST_LENGTH]; - - /* - * Hash the passed-in key to get a smaller key. - * The inner context is used since it hasn't been - * initialized yet. - */ - PROV_SHA1_DIGEST_KEY(&sha1_hmac_ctx_tmpl->hc_icontext, - key->ck_data, keylen_in_bytes, digested_key); - sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, digested_key, - SHA1_DIGEST_LENGTH); - } else { - sha1_mac_init_ctx(sha1_hmac_ctx_tmpl, key->ck_data, - keylen_in_bytes); - } - - sha1_hmac_ctx_tmpl->hc_mech_type = mechanism->cm_type; - *ctx_template = (crypto_spi_ctx_template_t)sha1_hmac_ctx_tmpl; - *ctx_template_size = sizeof (sha1_hmac_ctx_t); - - - return (CRYPTO_SUCCESS); -} - -static int -sha1_free_context(crypto_ctx_t *ctx) -{ - uint_t ctx_len; - sha1_mech_type_t mech_type; - - if (ctx->cc_provider_private == NULL) - return (CRYPTO_SUCCESS); - - /* - * We have to free either SHA1 or SHA1-HMAC contexts, which - * have different lengths. - */ - - mech_type = PROV_SHA1_CTX(ctx)->sc_mech_type; - if (mech_type == SHA1_MECH_INFO_TYPE) - ctx_len = sizeof (sha1_ctx_t); - else { - ASSERT(mech_type == SHA1_HMAC_MECH_INFO_TYPE || - mech_type == SHA1_HMAC_GEN_MECH_INFO_TYPE); - ctx_len = sizeof (sha1_hmac_ctx_t); - } - - bzero(ctx->cc_provider_private, ctx_len); - kmem_free(ctx->cc_provider_private, ctx_len); - ctx->cc_provider_private = NULL; - - return (CRYPTO_SUCCESS); -} diff --git a/sys/contrib/openzfs/module/icp/io/sha2_mod.c b/sys/contrib/openzfs/module/icp/io/sha2_mod.c index d690cd0bcb05..f068951b07f5 100644 --- a/sys/contrib/openzfs/module/icp/io/sha2_mod.c +++ b/sys/contrib/openzfs/module/icp/io/sha2_mod.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -25,33 +25,13 @@ */ #include <sys/zfs_context.h> -#include <sys/modctl.h> #include <sys/crypto/common.h> #include <sys/crypto/spi.h> #include <sys/crypto/icp.h> -#define _SHA2_IMPL #include <sys/sha2.h> #include <sha2/sha2_impl.h> /* - * The sha2 module is created with two modlinkages: - * - a modlmisc that allows consumers to directly call the entry points - * SHA2Init, SHA2Update, and SHA2Final. - * - a modlcrypto that allows the module to register with the Kernel - * Cryptographic Framework (KCF) as a software provider for the SHA2 - * mechanisms. - */ - -static struct modlcrypto modlcrypto = { - &mod_cryptoops, - "SHA2 Kernel SW Provider" -}; - -static struct modlinkage modlinkage = { - MODREV_1, {&modlcrypto, NULL} -}; - -/* * Macros to access the SHA2 or SHA2-HMAC contexts from a context passed * by KCF to one of the entry points. */ @@ -65,7 +45,7 @@ static struct modlinkage modlinkage = { (len) = (uint32_t)*((ulong_t *)(m)->cm_param); \ else { \ ulong_t tmp_ulong; \ - bcopy((m)->cm_param, &tmp_ulong, sizeof (ulong_t)); \ + memcpy(&tmp_ulong, (m)->cm_param, sizeof (ulong_t)); \ (len) = (uint32_t)tmp_ulong; \ } \ } @@ -79,91 +59,61 @@ static struct modlinkage modlinkage = { /* * Mechanism info structure passed to KCF during registration. */ -static crypto_mech_info_t sha2_mech_info_tab[] = { +static const crypto_mech_info_t sha2_mech_info_tab[] = { /* SHA256 */ {SUN_CKM_SHA256, SHA256_MECH_INFO_TYPE, - CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, - 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC}, /* SHA256-HMAC */ {SUN_CKM_SHA256_HMAC, SHA256_HMAC_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, /* SHA256-HMAC GENERAL */ {SUN_CKM_SHA256_HMAC_GENERAL, SHA256_HMAC_GEN_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, /* SHA384 */ {SUN_CKM_SHA384, SHA384_MECH_INFO_TYPE, - CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, - 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC}, /* SHA384-HMAC */ {SUN_CKM_SHA384_HMAC, SHA384_HMAC_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, /* SHA384-HMAC GENERAL */ {SUN_CKM_SHA384_HMAC_GENERAL, SHA384_HMAC_GEN_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, /* SHA512 */ {SUN_CKM_SHA512, SHA512_MECH_INFO_TYPE, - CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, - 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC}, /* SHA512-HMAC */ {SUN_CKM_SHA512_HMAC, SHA512_HMAC_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, /* SHA512-HMAC GENERAL */ {SUN_CKM_SHA512_HMAC_GENERAL, SHA512_HMAC_GEN_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, - SHA2_HMAC_MIN_KEY_LEN, SHA2_HMAC_MAX_KEY_LEN, - CRYPTO_KEYSIZE_UNIT_IN_BYTES} + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, }; -static void sha2_provider_status(crypto_provider_handle_t, uint_t *); - -static crypto_control_ops_t sha2_control_ops = { - sha2_provider_status -}; +static int sha2_digest_init(crypto_ctx_t *, crypto_mechanism_t *); +static int sha2_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *); +static int sha2_digest_update(crypto_ctx_t *, crypto_data_t *); +static int sha2_digest_final(crypto_ctx_t *, crypto_data_t *); +static int sha2_digest_atomic(crypto_mechanism_t *, crypto_data_t *, + crypto_data_t *); -static int sha2_digest_init(crypto_ctx_t *, crypto_mechanism_t *, - crypto_req_handle_t); -static int sha2_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha2_digest_update(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha2_digest_final(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha2_digest_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); - -static crypto_digest_ops_t sha2_digest_ops = { +static const crypto_digest_ops_t sha2_digest_ops = { .digest_init = sha2_digest_init, .digest = sha2_digest, .digest_update = sha2_digest_update, - .digest_key = NULL, .digest_final = sha2_digest_final, .digest_atomic = sha2_digest_atomic }; static int sha2_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); -static int sha2_mac_update(crypto_ctx_t *, crypto_data_t *, - crypto_req_handle_t); -static int sha2_mac_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); -static int sha2_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); -static int sha2_mac_verify_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); - -static crypto_mac_ops_t sha2_mac_ops = { + crypto_spi_ctx_template_t); +static int sha2_mac_update(crypto_ctx_t *, crypto_data_t *); +static int sha2_mac_final(crypto_ctx_t *, crypto_data_t *); +static int sha2_mac_atomic(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); +static int sha2_mac_verify_atomic(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); + +static const crypto_mac_ops_t sha2_mac_ops = { .mac_init = sha2_mac_init, .mac = NULL, .mac_update = sha2_mac_update, @@ -172,42 +122,28 @@ static crypto_mac_ops_t sha2_mac_ops = { .mac_verify_atomic = sha2_mac_verify_atomic }; -static int sha2_create_ctx_template(crypto_provider_handle_t, - crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, - size_t *, crypto_req_handle_t); +static int sha2_create_ctx_template(crypto_mechanism_t *, crypto_key_t *, + crypto_spi_ctx_template_t *, size_t *); static int sha2_free_context(crypto_ctx_t *); -static crypto_ctx_ops_t sha2_ctx_ops = { +static const crypto_ctx_ops_t sha2_ctx_ops = { .create_ctx_template = sha2_create_ctx_template, .free_context = sha2_free_context }; -static crypto_ops_t sha2_crypto_ops = {{{{{ - &sha2_control_ops, +static const crypto_ops_t sha2_crypto_ops = { &sha2_digest_ops, NULL, &sha2_mac_ops, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - &sha2_ctx_ops -}}}}}; + &sha2_ctx_ops, +}; -static crypto_provider_info_t sha2_prov_info = {{{{ - CRYPTO_SPI_VERSION_1, +static const crypto_provider_info_t sha2_prov_info = { "SHA2 Software Provider", - CRYPTO_SW_PROVIDER, - NULL, &sha2_crypto_ops, - sizeof (sha2_mech_info_tab)/sizeof (crypto_mech_info_t), + sizeof (sha2_mech_info_tab) / sizeof (crypto_mech_info_t), sha2_mech_info_tab -}}}}; +}; static crypto_kcf_provider_handle_t sha2_prov_handle = 0; @@ -216,9 +152,6 @@ sha2_mod_init(void) { int ret; - if ((ret = mod_install(&modlinkage)) != 0) - return (ret); - /* * Register with KCF. If the registration fails, log an * error but do not uninstall the module, since the functionality @@ -235,7 +168,7 @@ sha2_mod_init(void) int sha2_mod_fini(void) { - int ret; + int ret = 0; if (sha2_prov_handle != 0) { if ((ret = crypto_unregister_provider(sha2_prov_handle)) != @@ -248,17 +181,7 @@ sha2_mod_fini(void) sha2_prov_handle = 0; } - return (mod_remove(&modlinkage)); -} - -/* - * KCF software provider control entry points. - */ -/* ARGSUSED */ -static void -sha2_provider_status(crypto_provider_handle_t provider, uint_t *status) -{ - *status = CRYPTO_PROVIDER_READY; + return (ret); } /* @@ -266,15 +189,13 @@ sha2_provider_status(crypto_provider_handle_t provider, uint_t *status) */ static int -sha2_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_req_handle_t req) +sha2_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism) { /* * Allocate and initialize SHA2 context. */ - ctx->cc_provider_private = kmem_alloc(sizeof (sha2_ctx_t), - crypto_kmflag(req)); + ctx->cc_provider_private = kmem_alloc(sizeof (sha2_ctx_t), KM_SLEEP); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); @@ -387,9 +308,9 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest, */ SHA2Final(digest_scratch, sha2_ctx); - bcopy(digest_scratch, (uchar_t *) + memcpy((uchar_t *) zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, - digest_len); + digest_scratch, digest_len); } else { SHA2Final((uchar_t *)zfs_uio_iovbase(digest-> cd_uio, vec_idx) + offset, @@ -414,8 +335,9 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest, cur_len = MIN(zfs_uio_iovlen(digest->cd_uio, vec_idx) - offset, length); - bcopy(digest_tmp + scratch_offset, + memcpy( zfs_uio_iovbase(digest->cd_uio, vec_idx) + offset, + digest_tmp + scratch_offset, cur_len); length -= cur_len; @@ -438,10 +360,8 @@ sha2_digest_final_uio(SHA2_CTX *sha2_ctx, crypto_data_t *digest, return (CRYPTO_SUCCESS); } -/* ARGSUSED */ static int -sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, - crypto_req_handle_t req) +sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest) { int ret = CRYPTO_SUCCESS; uint_t sha_digest_len; @@ -526,10 +446,8 @@ sha2_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, return (ret); } -/* ARGSUSED */ static int -sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data, - crypto_req_handle_t req) +sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data) { int ret = CRYPTO_SUCCESS; @@ -555,10 +473,8 @@ sha2_digest_update(crypto_ctx_t *ctx, crypto_data_t *data, return (ret); } -/* ARGSUSED */ static int -sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest, - crypto_req_handle_t req) +sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest) { int ret = CRYPTO_SUCCESS; uint_t sha_digest_len; @@ -618,12 +534,9 @@ sha2_digest_final(crypto_ctx_t *ctx, crypto_data_t *digest, return (ret); } -/* ARGSUSED */ static int -sha2_digest_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, - crypto_data_t *data, crypto_data_t *digest, - crypto_req_handle_t req) +sha2_digest_atomic(crypto_mechanism_t *mechanism, crypto_data_t *data, + crypto_data_t *digest) { int ret = CRYPTO_SUCCESS; SHA2_CTX sha2_ctx; @@ -717,8 +630,8 @@ sha2_digest_atomic(crypto_provider_handle_t provider, static void sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes) { - uint64_t ipad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)]; - uint64_t opad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)]; + uint64_t ipad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)] = {0}; + uint64_t opad[SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t)] = {0}; int i, block_size, blocks_per_int64; /* Determine the block size */ @@ -730,10 +643,15 @@ sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes) blocks_per_int64 = SHA512_HMAC_BLOCK_SIZE / sizeof (uint64_t); } - (void) bzero(ipad, block_size); - (void) bzero(opad, block_size); - (void) bcopy(keyval, ipad, length_in_bytes); - (void) bcopy(keyval, opad, length_in_bytes); + (void) memset(ipad, 0, block_size); + (void) memset(opad, 0, block_size); + + if (keyval != NULL) { + (void) memcpy(ipad, keyval, length_in_bytes); + (void) memcpy(opad, keyval, length_in_bytes); + } else { + ASSERT0(length_in_bytes); + } /* XOR key with ipad (0x36) and opad (0x5c) */ for (i = 0; i < blocks_per_int64; i ++) { @@ -748,15 +666,13 @@ sha2_mac_init_ctx(sha2_hmac_ctx_t *ctx, void *keyval, uint_t length_in_bytes) /* perform SHA2 on opad */ SHA2Init(ctx->hc_mech_type, &ctx->hc_ocontext); SHA2Update(&ctx->hc_ocontext, (uint8_t *)opad, block_size); - } /* */ static int sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, - crypto_req_handle_t req) + crypto_key_t *key, crypto_spi_ctx_template_t ctx_template) { int ret = CRYPTO_SUCCESS; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); @@ -783,18 +699,15 @@ sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, return (CRYPTO_MECHANISM_INVALID); } - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - - ctx->cc_provider_private = kmem_alloc(sizeof (sha2_hmac_ctx_t), - crypto_kmflag(req)); + ctx->cc_provider_private = + kmem_alloc(sizeof (sha2_hmac_ctx_t), KM_SLEEP); if (ctx->cc_provider_private == NULL) return (CRYPTO_HOST_MEMORY); PROV_SHA2_HMAC_CTX(ctx)->hc_mech_type = mechanism->cm_type; if (ctx_template != NULL) { /* reuse context template */ - bcopy(ctx_template, PROV_SHA2_HMAC_CTX(ctx), + memcpy(PROV_SHA2_HMAC_CTX(ctx), ctx_template, sizeof (sha2_hmac_ctx_t)); } else { /* no context template, compute context */ @@ -823,16 +736,19 @@ sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, */ if (mechanism->cm_type % 3 == 2) { if (mechanism->cm_param == NULL || - mechanism->cm_param_len != sizeof (ulong_t)) - ret = CRYPTO_MECHANISM_PARAM_INVALID; - PROV_SHA2_GET_DIGEST_LEN(mechanism, - PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len); - if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > sha_digest_len) + mechanism->cm_param_len != sizeof (ulong_t)) { ret = CRYPTO_MECHANISM_PARAM_INVALID; + } else { + PROV_SHA2_GET_DIGEST_LEN(mechanism, + PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len); + if (PROV_SHA2_HMAC_CTX(ctx)->hc_digest_len > + sha_digest_len) + ret = CRYPTO_MECHANISM_PARAM_INVALID; + } } if (ret != CRYPTO_SUCCESS) { - bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); + memset(ctx->cc_provider_private, 0, sizeof (sha2_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); ctx->cc_provider_private = NULL; } @@ -840,10 +756,8 @@ sha2_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, return (ret); } -/* ARGSUSED */ static int -sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, - crypto_req_handle_t req) +sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data) { int ret = CRYPTO_SUCCESS; @@ -870,9 +784,8 @@ sha2_mac_update(crypto_ctx_t *ctx, crypto_data_t *data, return (ret); } -/* ARGSUSED */ static int -sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req) +sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac) { int ret = CRYPTO_SUCCESS; uchar_t digest[SHA512_DIGEST_LENGTH]; @@ -939,8 +852,8 @@ sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req) */ SHA2Final(digest, &PROV_SHA2_HMAC_CTX(ctx)->hc_ocontext); - bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + - mac->cd_offset, digest_len); + memcpy((unsigned char *)mac->cd_raw.iov_base + + mac->cd_offset, digest, digest_len); } else { SHA2Final((unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, @@ -961,7 +874,7 @@ sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req) else mac->cd_length = 0; - bzero(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); + memset(ctx->cc_provider_private, 0, sizeof (sha2_hmac_ctx_t)); kmem_free(ctx->cc_provider_private, sizeof (sha2_hmac_ctx_t)); ctx->cc_provider_private = NULL; @@ -983,12 +896,10 @@ sha2_mac_final(crypto_ctx_t *ctx, crypto_data_t *mac, crypto_req_handle_t req) } \ } -/* ARGSUSED */ static int -sha2_mac_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, +sha2_mac_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, - crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) + crypto_spi_ctx_template_t ctx_template) { int ret = CRYPTO_SUCCESS; uchar_t digest[SHA512_DIGEST_LENGTH]; @@ -1017,13 +928,9 @@ sha2_mac_atomic(crypto_provider_handle_t provider, return (CRYPTO_MECHANISM_INVALID); } - /* Add support for key by attributes (RFE 4706552) */ - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - if (ctx_template != NULL) { /* reuse context template */ - bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); + memcpy(&sha2_hmac_ctx, ctx_template, sizeof (sha2_hmac_ctx_t)); } else { sha2_hmac_ctx.hc_mech_type = mechanism->cm_type; /* no context template, initialize context */ @@ -1096,8 +1003,8 @@ sha2_mac_atomic(crypto_provider_handle_t provider, * the user only what was requested. */ SHA2Final(digest, &sha2_hmac_ctx.hc_ocontext); - bcopy(digest, (unsigned char *)mac->cd_raw.iov_base + - mac->cd_offset, digest_len); + memcpy((unsigned char *)mac->cd_raw.iov_base + + mac->cd_offset, digest, digest_len); } else { SHA2Final((unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, &sha2_hmac_ctx.hc_ocontext); @@ -1116,17 +1023,15 @@ sha2_mac_atomic(crypto_provider_handle_t provider, return (CRYPTO_SUCCESS); } bail: - bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); + memset(&sha2_hmac_ctx, 0, sizeof (sha2_hmac_ctx_t)); mac->cd_length = 0; return (ret); } -/* ARGSUSED */ static int -sha2_mac_verify_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, +sha2_mac_verify_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, - crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) + crypto_spi_ctx_template_t ctx_template) { int ret = CRYPTO_SUCCESS; uchar_t digest[SHA512_DIGEST_LENGTH]; @@ -1155,13 +1060,9 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider, return (CRYPTO_MECHANISM_INVALID); } - /* Add support for key by attributes (RFE 4706552) */ - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - if (ctx_template != NULL) { /* reuse context template */ - bcopy(ctx_template, &sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); + memcpy(&sha2_hmac_ctx, ctx_template, sizeof (sha2_hmac_ctx_t)); } else { sha2_hmac_ctx.hc_mech_type = mechanism->cm_type; /* no context template, initialize context */ @@ -1238,7 +1139,7 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider, switch (mac->cd_format) { case CRYPTO_DATA_RAW: - if (bcmp(digest, (unsigned char *)mac->cd_raw.iov_base + + if (memcmp(digest, (unsigned char *)mac->cd_raw.iov_base + mac->cd_offset, digest_len) != 0) ret = CRYPTO_INVALID_MAC; break; @@ -1271,7 +1172,7 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider, cur_len = MIN(zfs_uio_iovlen(mac->cd_uio, vec_idx) - offset, length); - if (bcmp(digest + scratch_offset, + if (memcmp(digest + scratch_offset, zfs_uio_iovbase(mac->cd_uio, vec_idx) + offset, cur_len) != 0) { ret = CRYPTO_INVALID_MAC; @@ -1292,7 +1193,7 @@ sha2_mac_verify_atomic(crypto_provider_handle_t provider, return (ret); bail: - bzero(&sha2_hmac_ctx, sizeof (sha2_hmac_ctx_t)); + memset(&sha2_hmac_ctx, 0, sizeof (sha2_hmac_ctx_t)); mac->cd_length = 0; return (ret); } @@ -1301,12 +1202,9 @@ bail: * KCF software provider context management entry points. */ -/* ARGSUSED */ static int -sha2_create_ctx_template(crypto_provider_handle_t provider, - crypto_mechanism_t *mechanism, crypto_key_t *key, - crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, - crypto_req_handle_t req) +sha2_create_ctx_template(crypto_mechanism_t *mechanism, crypto_key_t *key, + crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size) { sha2_hmac_ctx_t *sha2_hmac_ctx_tmpl; uint_t keylen_in_bytes = CRYPTO_BITS2BYTES(key->ck_length); @@ -1333,15 +1231,10 @@ sha2_create_ctx_template(crypto_provider_handle_t provider, return (CRYPTO_MECHANISM_INVALID); } - /* Add support for key by attributes (RFE 4706552) */ - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); - /* * Allocate and initialize SHA2 context. */ - sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t), - crypto_kmflag(req)); + sha2_hmac_ctx_tmpl = kmem_alloc(sizeof (sha2_hmac_ctx_t), KM_SLEEP); if (sha2_hmac_ctx_tmpl == NULL) return (CRYPTO_HOST_MEMORY); @@ -1391,7 +1284,7 @@ sha2_free_context(crypto_ctx_t *ctx) else ctx_len = sizeof (sha2_hmac_ctx_t); - bzero(ctx->cc_provider_private, ctx_len); + memset(ctx->cc_provider_private, 0, ctx_len); kmem_free(ctx->cc_provider_private, ctx_len); ctx->cc_provider_private = NULL; diff --git a/sys/contrib/openzfs/module/icp/io/skein_mod.c b/sys/contrib/openzfs/module/icp/io/skein_mod.c index ac7d201eb708..221e1debd45b 100644 --- a/sys/contrib/openzfs/module/icp/io/skein_mod.c +++ b/sys/contrib/openzfs/module/icp/io/skein_mod.c @@ -23,7 +23,6 @@ * Copyright 2013 Saso Kiselkov. All rights reserved. */ -#include <sys/modctl.h> #include <sys/crypto/common.h> #include <sys/crypto/icp.h> #include <sys/crypto/spi.h> @@ -31,78 +30,42 @@ #define SKEIN_MODULE_IMPL #include <sys/skein.h> -/* - * Like the sha2 module, we create the skein module with two modlinkages: - * - modlmisc to allow direct calls to Skein_* API functions. - * - modlcrypto to integrate well into the Kernel Crypto Framework (KCF). - */ -static struct modlmisc modlmisc = { - &mod_cryptoops, - "Skein Message-Digest Algorithm" -}; - -static struct modlcrypto modlcrypto = { - &mod_cryptoops, - "Skein Kernel SW Provider" -}; - -static struct modlinkage modlinkage = { - MODREV_1, {&modlmisc, &modlcrypto, NULL} -}; - -static crypto_mech_info_t skein_mech_info_tab[] = { +static const crypto_mech_info_t skein_mech_info_tab[] = { {CKM_SKEIN_256, SKEIN_256_MECH_INFO_TYPE, - CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, - 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC}, {CKM_SKEIN_256_MAC, SKEIN_256_MAC_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, {CKM_SKEIN_512, SKEIN_512_MECH_INFO_TYPE, - CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, - 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC}, {CKM_SKEIN_512_MAC, SKEIN_512_MAC_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, - CRYPTO_KEYSIZE_UNIT_IN_BYTES}, + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, {CKM_SKEIN1024, SKEIN1024_MECH_INFO_TYPE, - CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC, - 0, 0, CRYPTO_KEYSIZE_UNIT_IN_BITS}, + CRYPTO_FG_DIGEST | CRYPTO_FG_DIGEST_ATOMIC}, {CKM_SKEIN1024_MAC, SKEIN1024_MAC_MECH_INFO_TYPE, - CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC, 1, INT_MAX, - CRYPTO_KEYSIZE_UNIT_IN_BYTES} + CRYPTO_FG_MAC | CRYPTO_FG_MAC_ATOMIC}, }; -static void skein_provider_status(crypto_provider_handle_t, uint_t *); +static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *); +static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *); +static int skein_update(crypto_ctx_t *, crypto_data_t *); +static int skein_final(crypto_ctx_t *, crypto_data_t *); +static int skein_digest_atomic(crypto_mechanism_t *, crypto_data_t *, + crypto_data_t *); -static crypto_control_ops_t skein_control_ops = { - skein_provider_status -}; - -static int skein_digest_init(crypto_ctx_t *, crypto_mechanism_t *, - crypto_req_handle_t); -static int skein_digest(crypto_ctx_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); -static int skein_update(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); -static int skein_final(crypto_ctx_t *, crypto_data_t *, crypto_req_handle_t); -static int skein_digest_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_data_t *, crypto_data_t *, - crypto_req_handle_t); - -static crypto_digest_ops_t skein_digest_ops = { +static const crypto_digest_ops_t skein_digest_ops = { .digest_init = skein_digest_init, .digest = skein_digest, .digest_update = skein_update, - .digest_key = NULL, .digest_final = skein_final, .digest_atomic = skein_digest_atomic }; static int skein_mac_init(crypto_ctx_t *, crypto_mechanism_t *, crypto_key_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); -static int skein_mac_atomic(crypto_provider_handle_t, crypto_session_id_t, - crypto_mechanism_t *, crypto_key_t *, crypto_data_t *, crypto_data_t *, - crypto_spi_ctx_template_t, crypto_req_handle_t); + crypto_spi_ctx_template_t); +static int skein_mac_atomic(crypto_mechanism_t *, crypto_key_t *, + crypto_data_t *, crypto_data_t *, crypto_spi_ctx_template_t); -static crypto_mac_ops_t skein_mac_ops = { +static const crypto_mac_ops_t skein_mac_ops = { .mac_init = skein_mac_init, .mac = NULL, .mac_update = skein_update, /* using regular digest update is OK here */ @@ -111,42 +74,28 @@ static crypto_mac_ops_t skein_mac_ops = { .mac_verify_atomic = NULL }; -static int skein_create_ctx_template(crypto_provider_handle_t, - crypto_mechanism_t *, crypto_key_t *, crypto_spi_ctx_template_t *, - size_t *, crypto_req_handle_t); +static int skein_create_ctx_template(crypto_mechanism_t *, crypto_key_t *, + crypto_spi_ctx_template_t *, size_t *); static int skein_free_context(crypto_ctx_t *); -static crypto_ctx_ops_t skein_ctx_ops = { +static const crypto_ctx_ops_t skein_ctx_ops = { .create_ctx_template = skein_create_ctx_template, .free_context = skein_free_context }; -static crypto_ops_t skein_crypto_ops = {{{{{ - &skein_control_ops, +static const crypto_ops_t skein_crypto_ops = { &skein_digest_ops, NULL, &skein_mac_ops, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, - NULL, &skein_ctx_ops, -}}}}}; +}; -static crypto_provider_info_t skein_prov_info = {{{{ - CRYPTO_SPI_VERSION_1, +static const crypto_provider_info_t skein_prov_info = { "Skein Software Provider", - CRYPTO_SW_PROVIDER, - NULL, &skein_crypto_ops, sizeof (skein_mech_info_tab) / sizeof (crypto_mech_info_t), skein_mech_info_tab -}}}}; +}; static crypto_kcf_provider_handle_t skein_prov_handle = 0; @@ -214,11 +163,6 @@ skein_get_digest_bitlen(const crypto_mechanism_t *mechanism, size_t *result) int skein_mod_init(void) { - int error; - - if ((error = mod_install(&modlinkage)) != 0) - return (error); - /* * Try to register with KCF - failure shouldn't unload us, since we * still may want to continue providing misc/skein functionality. @@ -231,7 +175,7 @@ skein_mod_init(void) int skein_mod_fini(void) { - int ret; + int ret = 0; if (skein_prov_handle != 0) { if ((ret = crypto_unregister_provider(skein_prov_handle)) != @@ -244,17 +188,7 @@ skein_mod_fini(void) skein_prov_handle = 0; } - return (mod_remove(&modlinkage)); -} - -/* - * KCF software provider control entry points. - */ -/* ARGSUSED */ -static void -skein_provider_status(crypto_provider_handle_t provider, uint_t *status) -{ - *status = CRYPTO_PROVIDER_READY; + return (0); } /* @@ -318,8 +252,7 @@ skein_digest_update_uio(skein_ctx_t *ctx, const crypto_data_t *data) * Performs a Final on a context and writes to a uio digest output. */ static int -skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest, - crypto_req_handle_t req) +skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest) { off_t offset = digest->cd_offset; uint_t vec_idx = 0; @@ -352,15 +285,15 @@ skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest, size_t cur_len; digest_tmp = kmem_alloc(CRYPTO_BITS2BYTES( - ctx->sc_digest_bitlen), crypto_kmflag(req)); + ctx->sc_digest_bitlen), KM_SLEEP); if (digest_tmp == NULL) return (CRYPTO_HOST_MEMORY); SKEIN_OP(ctx, Final, digest_tmp); while (vec_idx < zfs_uio_iovcnt(uio) && length > 0) { cur_len = MIN(zfs_uio_iovlen(uio, vec_idx) - offset, length); - bcopy(digest_tmp + scratch_offset, - zfs_uio_iovbase(uio, vec_idx) + offset, cur_len); + memcpy(zfs_uio_iovbase(uio, vec_idx) + offset, + digest_tmp + scratch_offset, cur_len); length -= cur_len; vec_idx++; @@ -396,16 +329,14 @@ skein_digest_final_uio(skein_ctx_t *ctx, crypto_data_t *digest, * for Skein-1024). */ static int -skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_req_handle_t req) +skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism) { int error = CRYPTO_SUCCESS; if (!VALID_SKEIN_DIGEST_MECH(mechanism->cm_type)) return (CRYPTO_MECHANISM_INVALID); - SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), - crypto_kmflag(req)); + SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), KM_SLEEP); if (SKEIN_CTX(ctx) == NULL) return (CRYPTO_HOST_MEMORY); @@ -418,7 +349,7 @@ skein_digest_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, return (CRYPTO_SUCCESS); errout: - bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); SKEIN_CTX_LVALUE(ctx) = NULL; return (error); @@ -430,8 +361,7 @@ errout: * see what to pass here. */ static int -skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, - crypto_req_handle_t req) +skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest) { int error = CRYPTO_SUCCESS; @@ -444,15 +374,15 @@ skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, return (CRYPTO_BUFFER_TOO_SMALL); } - error = skein_update(ctx, data, req); + error = skein_update(ctx, data); if (error != CRYPTO_SUCCESS) { - bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); SKEIN_CTX_LVALUE(ctx) = NULL; digest->cd_length = 0; return (error); } - error = skein_final(ctx, digest, req); + error = skein_final(ctx, digest); return (error); } @@ -462,9 +392,8 @@ skein_digest(crypto_ctx_t *ctx, crypto_data_t *data, crypto_data_t *digest, * can push more data). This is used both for digest and MAC operation. * Supported input data formats are raw, uio and mblk. */ -/*ARGSUSED*/ static int -skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) +skein_update(crypto_ctx_t *ctx, crypto_data_t *data) { int error = CRYPTO_SUCCESS; @@ -491,9 +420,8 @@ skein_update(crypto_ctx_t *ctx, crypto_data_t *data, crypto_req_handle_t req) * for digest and MAC operation. * Supported output digest formats are raw, uio and mblk. */ -/*ARGSUSED*/ static int -skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) +skein_final_nofree(crypto_ctx_t *ctx, crypto_data_t *digest) { int error = CRYPTO_SUCCESS; @@ -512,7 +440,7 @@ skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) (uint8_t *)digest->cd_raw.iov_base + digest->cd_offset); break; case CRYPTO_DATA_UIO: - error = skein_digest_final_uio(SKEIN_CTX(ctx), digest, req); + error = skein_digest_final_uio(SKEIN_CTX(ctx), digest); break; default: error = CRYPTO_ARGUMENTS_BAD; @@ -524,7 +452,18 @@ skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) else digest->cd_length = 0; - bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + return (error); +} + +static int +skein_final(crypto_ctx_t *ctx, crypto_data_t *digest) +{ + int error = skein_final_nofree(ctx, digest); + + if (error == CRYPTO_BUFFER_TOO_SMALL) + return (error); + + memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*(SKEIN_CTX(ctx)))); SKEIN_CTX_LVALUE(ctx) = NULL; @@ -537,15 +476,13 @@ skein_final(crypto_ctx_t *ctx, crypto_data_t *digest, crypto_req_handle_t req) * `data' and writing the output to `digest'. * Supported input/output formats are raw, uio and mblk. */ -/*ARGSUSED*/ static int -skein_digest_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, - crypto_data_t *data, crypto_data_t *digest, crypto_req_handle_t req) +skein_digest_atomic(crypto_mechanism_t *mechanism, crypto_data_t *data, + crypto_data_t *digest) { - int error; - skein_ctx_t skein_ctx; - crypto_ctx_t ctx; + int error; + skein_ctx_t skein_ctx; + crypto_ctx_t ctx; SKEIN_CTX_LVALUE(&ctx) = &skein_ctx; /* Init */ @@ -557,9 +494,9 @@ skein_digest_atomic(crypto_provider_handle_t provider, goto out; SKEIN_OP(&skein_ctx, Init, skein_ctx.sc_digest_bitlen); - if ((error = skein_update(&ctx, data, digest)) != CRYPTO_SUCCESS) + if ((error = skein_update(&ctx, data)) != CRYPTO_SUCCESS) goto out; - if ((error = skein_final(&ctx, data, digest)) != CRYPTO_SUCCESS) + if ((error = skein_final_nofree(&ctx, data)) != CRYPTO_SUCCESS) goto out; out: @@ -568,7 +505,7 @@ out: CRYPTO_BITS2BYTES(skein_ctx.sc_digest_bitlen); else digest->cd_length = 0; - bzero(&skein_ctx, sizeof (skein_ctx)); + memset(&skein_ctx, 0, sizeof (skein_ctx)); return (error); } @@ -585,8 +522,6 @@ skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism, if (!VALID_SKEIN_MAC_MECH(mechanism->cm_type)) return (CRYPTO_MECHANISM_INVALID); - if (key->ck_format != CRYPTO_KEY_RAW) - return (CRYPTO_ARGUMENTS_BAD); ctx->sc_mech_type = mechanism->cm_type; error = skein_get_digest_bitlen(mechanism, &ctx->sc_digest_bitlen); if (error != CRYPTO_SUCCESS) @@ -610,18 +545,16 @@ skein_mac_ctx_build(skein_ctx_t *ctx, crypto_mechanism_t *mechanism, */ static int skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, - crypto_key_t *key, crypto_spi_ctx_template_t ctx_template, - crypto_req_handle_t req) + crypto_key_t *key, crypto_spi_ctx_template_t ctx_template) { int error; - SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), - crypto_kmflag(req)); + SKEIN_CTX_LVALUE(ctx) = kmem_alloc(sizeof (*SKEIN_CTX(ctx)), KM_SLEEP); if (SKEIN_CTX(ctx) == NULL) return (CRYPTO_HOST_MEMORY); if (ctx_template != NULL) { - bcopy(ctx_template, SKEIN_CTX(ctx), + memcpy(SKEIN_CTX(ctx), ctx_template, sizeof (*SKEIN_CTX(ctx))); } else { error = skein_mac_ctx_build(SKEIN_CTX(ctx), mechanism, key); @@ -631,7 +564,7 @@ skein_mac_init(crypto_ctx_t *ctx, crypto_mechanism_t *mechanism, return (CRYPTO_SUCCESS); errout: - bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); return (error); } @@ -640,40 +573,38 @@ errout: * The MAC update and final calls are reused from the regular digest code. */ -/*ARGSUSED*/ /* * Same as skein_digest_atomic, performs an atomic Skein MAC operation in * one step. All the same properties apply to the arguments of this * function as to those of the partial operations above. */ static int -skein_mac_atomic(crypto_provider_handle_t provider, - crypto_session_id_t session_id, crypto_mechanism_t *mechanism, +skein_mac_atomic(crypto_mechanism_t *mechanism, crypto_key_t *key, crypto_data_t *data, crypto_data_t *mac, - crypto_spi_ctx_template_t ctx_template, crypto_req_handle_t req) + crypto_spi_ctx_template_t ctx_template) { /* faux crypto context just for skein_digest_{update,final} */ - int error; - crypto_ctx_t ctx; - skein_ctx_t skein_ctx; + int error; + crypto_ctx_t ctx; + skein_ctx_t skein_ctx; SKEIN_CTX_LVALUE(&ctx) = &skein_ctx; if (ctx_template != NULL) { - bcopy(ctx_template, &skein_ctx, sizeof (skein_ctx)); + memcpy(&skein_ctx, ctx_template, sizeof (skein_ctx)); } else { error = skein_mac_ctx_build(&skein_ctx, mechanism, key); if (error != CRYPTO_SUCCESS) goto errout; } - if ((error = skein_update(&ctx, data, req)) != CRYPTO_SUCCESS) + if ((error = skein_update(&ctx, data)) != CRYPTO_SUCCESS) goto errout; - if ((error = skein_final(&ctx, mac, req)) != CRYPTO_SUCCESS) + if ((error = skein_final_nofree(&ctx, mac)) != CRYPTO_SUCCESS) goto errout; return (CRYPTO_SUCCESS); errout: - bzero(&skein_ctx, sizeof (skein_ctx)); + memset(&skein_ctx, 0, sizeof (skein_ctx)); return (error); } @@ -686,17 +617,14 @@ errout: * properties apply to the arguments of this function as to those of * skein_mac_init. */ -/*ARGSUSED*/ static int -skein_create_ctx_template(crypto_provider_handle_t provider, - crypto_mechanism_t *mechanism, crypto_key_t *key, - crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size, - crypto_req_handle_t req) +skein_create_ctx_template(crypto_mechanism_t *mechanism, crypto_key_t *key, + crypto_spi_ctx_template_t *ctx_template, size_t *ctx_template_size) { - int error; - skein_ctx_t *ctx_tmpl; + int error; + skein_ctx_t *ctx_tmpl; - ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), crypto_kmflag(req)); + ctx_tmpl = kmem_alloc(sizeof (*ctx_tmpl), KM_SLEEP); if (ctx_tmpl == NULL) return (CRYPTO_HOST_MEMORY); error = skein_mac_ctx_build(ctx_tmpl, mechanism, key); @@ -707,7 +635,7 @@ skein_create_ctx_template(crypto_provider_handle_t provider, return (CRYPTO_SUCCESS); errout: - bzero(ctx_tmpl, sizeof (*ctx_tmpl)); + memset(ctx_tmpl, 0, sizeof (*ctx_tmpl)); kmem_free(ctx_tmpl, sizeof (*ctx_tmpl)); return (error); } @@ -719,7 +647,7 @@ static int skein_free_context(crypto_ctx_t *ctx) { if (SKEIN_CTX(ctx) != NULL) { - bzero(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); + memset(SKEIN_CTX(ctx), 0, sizeof (*SKEIN_CTX(ctx))); kmem_free(SKEIN_CTX(ctx), sizeof (*SKEIN_CTX(ctx))); SKEIN_CTX_LVALUE(ctx) = NULL; } diff --git a/sys/contrib/openzfs/module/icp/os/modconf.c b/sys/contrib/openzfs/module/icp/os/modconf.c deleted file mode 100644 index 3743416ed951..000000000000 --- a/sys/contrib/openzfs/module/icp/os/modconf.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -#include <sys/zfs_context.h> -#include <sys/modctl.h> - -/* - * Null operations; used for uninitialized and "misc" modules. - */ -static int mod_null(struct modlmisc *, struct modlinkage *); -static int mod_infonull(void *, struct modlinkage *, int *); - -/* - * Cryptographic Modules - */ -struct mod_ops mod_cryptoops = { - .modm_install = mod_null, - .modm_remove = mod_null, - .modm_info = mod_infonull -}; - -/* - * Null operation; return 0. - */ -static int -mod_null(struct modlmisc *modl, struct modlinkage *modlp) -{ - return (0); -} - -/* - * Status for User modules. - */ -static int -mod_infonull(void *modl, struct modlinkage *modlp, int *p0) -{ - *p0 = -1; /* for modinfo display */ - return (0); -} - -/* - * Install a module. - * (This routine is in the Solaris SPARC DDI/DKI) - */ -int -mod_install(struct modlinkage *modlp) -{ - int retval = -1; /* No linkage structures */ - struct modlmisc **linkpp; - struct modlmisc **linkpp1; - - if (modlp->ml_rev != MODREV_1) { - cmn_err(CE_WARN, "mod_install: " - "modlinkage structure is not MODREV_1\n"); - return (EINVAL); - } - linkpp = (struct modlmisc **)&modlp->ml_linkage[0]; - - while (*linkpp != NULL) { - if ((retval = MODL_INSTALL(*linkpp, modlp)) != 0) { - linkpp1 = (struct modlmisc **)&modlp->ml_linkage[0]; - - while (linkpp1 != linkpp) { - MODL_REMOVE(*linkpp1, modlp); /* clean up */ - linkpp1++; - } - break; - } - linkpp++; - } - return (retval); -} - -static char *reins_err = - "Could not reinstall %s\nReboot to correct the problem"; - -/* - * Remove a module. This is called by the module wrapper routine. - * (This routine is in the Solaris SPARC DDI/DKI) - */ -int -mod_remove(struct modlinkage *modlp) -{ - int retval = 0; - struct modlmisc **linkpp, *last_linkp; - - linkpp = (struct modlmisc **)&modlp->ml_linkage[0]; - - while (*linkpp != NULL) { - if ((retval = MODL_REMOVE(*linkpp, modlp)) != 0) { - last_linkp = *linkpp; - linkpp = (struct modlmisc **)&modlp->ml_linkage[0]; - while (*linkpp != last_linkp) { - if (MODL_INSTALL(*linkpp, modlp) != 0) { - cmn_err(CE_WARN, reins_err, - (*linkpp)->misc_linkinfo); - break; - } - linkpp++; - } - break; - } - linkpp++; - } - return (retval); -} - -/* - * Get module status. - * (This routine is in the Solaris SPARC DDI/DKI) - */ -int -mod_info(struct modlinkage *modlp, struct modinfo *modinfop) -{ - int i; - int retval = 0; - struct modspecific_info *msip; - struct modlmisc **linkpp; - - modinfop->mi_rev = modlp->ml_rev; - - linkpp = (struct modlmisc **)modlp->ml_linkage; - msip = &modinfop->mi_msinfo[0]; - - for (i = 0; i < MODMAXLINK; i++) { - if (*linkpp == NULL) { - msip->msi_linkinfo[0] = '\0'; - } else { - (void) strlcpy(msip->msi_linkinfo, - (*linkpp)->misc_linkinfo, MODMAXLINKINFOLEN); - retval = MODL_INFO(*linkpp, modlp, &msip->msi_p0); - if (retval != 0) - break; - linkpp++; - } - msip++; - } - - if (modinfop->mi_info == MI_INFO_LINKAGE) { - /* - * Slight kludge used to extract the address of the - * modlinkage structure from the module (just after - * loading a module for the very first time) - */ - modinfop->mi_base = (void *)modlp; - } - - if (retval == 0) - return (1); - return (0); -} diff --git a/sys/contrib/openzfs/module/icp/os/modhash.c b/sys/contrib/openzfs/module/icp/os/modhash.c deleted file mode 100644 index a897871001ce..000000000000 --- a/sys/contrib/openzfs/module/icp/os/modhash.c +++ /dev/null @@ -1,927 +0,0 @@ -/* - * CDDL HEADER START - * - * The contents of this file are subject to the terms of the - * Common Development and Distribution License (the "License"). - * You may not use this file except in compliance with the License. - * - * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. - * See the License for the specific language governing permissions - * and limitations under the License. - * - * When distributing Covered Code, include this CDDL HEADER in each - * file and include the License file at usr/src/OPENSOLARIS.LICENSE. - * If applicable, add the following below this CDDL HEADER, with the - * fields enclosed by brackets "[]" replaced with your own identifying - * information: Portions Copyright [yyyy] [name of copyright owner] - * - * CDDL HEADER END - */ -/* - * Copyright 2008 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. - */ - -/* - * mod_hash: flexible hash table implementation. - * - * This is a reasonably fast, reasonably flexible hash table implementation - * which features pluggable hash algorithms to support storing arbitrary keys - * and values. It is designed to handle small (< 100,000 items) amounts of - * data. The hash uses chaining to resolve collisions, and does not feature a - * mechanism to grow the hash. Care must be taken to pick nchains to be large - * enough for the application at hand, or lots of time will be wasted searching - * hash chains. - * - * The client of the hash is required to supply a number of items to support - * the various hash functions: - * - * - Destructor functions for the key and value being hashed. - * A destructor is responsible for freeing an object when the hash - * table is no longer storing it. Since keys and values can be of - * arbitrary type, separate destructors for keys & values are used. - * These may be mod_hash_null_keydtor and mod_hash_null_valdtor if no - * destructor is needed for either a key or value. - * - * - A hashing algorithm which returns a uint_t representing a hash index - * The number returned need _not_ be between 0 and nchains. The mod_hash - * code will take care of doing that. The second argument (after the - * key) to the hashing function is a void * that represents - * hash_alg_data-- this is provided so that the hashing algorithm can - * maintain some state across calls, or keep algorithm-specific - * constants associated with the hash table. - * - * A pointer-hashing and a string-hashing algorithm are supplied in - * this file. - * - * - A key comparator (a la qsort). - * This is used when searching the hash chain. The key comparator - * determines if two keys match. It should follow the return value - * semantics of strcmp. - * - * string and pointer comparators are supplied in this file. - * - * mod_hash_create_strhash() and mod_hash_create_ptrhash() provide good - * examples of how to create a customized hash table. - * - * Basic hash operations: - * - * mod_hash_create_strhash(name, nchains, dtor), - * create a hash using strings as keys. - * NOTE: This create a hash which automatically cleans up the string - * values it is given for keys. - * - * mod_hash_create_ptrhash(name, nchains, dtor, key_elem_size): - * create a hash using pointers as keys. - * - * mod_hash_create_extended(name, nchains, kdtor, vdtor, - * hash_alg, hash_alg_data, - * keycmp, sleep) - * create a customized hash table. - * - * mod_hash_destroy_hash(hash): - * destroy the given hash table, calling the key and value destructors - * on each key-value pair stored in the hash. - * - * mod_hash_insert(hash, key, val): - * place a key, value pair into the given hash. - * duplicate keys are rejected. - * - * mod_hash_insert_reserve(hash, key, val, handle): - * place a key, value pair into the given hash, using handle to indicate - * the reserved storage for the pair. (no memory allocation is needed - * during a mod_hash_insert_reserve.) duplicate keys are rejected. - * - * mod_hash_reserve(hash, *handle): - * reserve storage for a key-value pair using the memory allocation - * policy of 'hash', returning the storage handle in 'handle'. - * - * mod_hash_reserve_nosleep(hash, *handle): reserve storage for a key-value - * pair ignoring the memory allocation policy of 'hash' and always without - * sleep, returning the storage handle in 'handle'. - * - * mod_hash_remove(hash, key, *val): - * remove a key-value pair with key 'key' from 'hash', destroying the - * stored key, and returning the value in val. - * - * mod_hash_replace(hash, key, val) - * atomically remove an existing key-value pair from a hash, and replace - * the key and value with the ones supplied. The removed key and value - * (if any) are destroyed. - * - * mod_hash_destroy(hash, key): - * remove a key-value pair with key 'key' from 'hash', destroying both - * stored key and stored value. - * - * mod_hash_find(hash, key, val): - * find a value in the hash table corresponding to the given key. - * - * mod_hash_find_cb(hash, key, val, found_callback) - * find a value in the hash table corresponding to the given key. - * If a value is found, call specified callback passing key and val to it. - * The callback is called with the hash lock held. - * It is intended to be used in situations where the act of locating the - * data must also modify it - such as in reference counting schemes. - * - * mod_hash_walk(hash, callback(key, elem, arg), arg) - * walks all the elements in the hashtable and invokes the callback - * function with the key/value pair for each element. the hashtable - * is locked for readers so the callback function should not attempt - * to do any updates to the hashable. the callback function should - * return MH_WALK_CONTINUE to continue walking the hashtable or - * MH_WALK_TERMINATE to abort the walk of the hashtable. - * - * mod_hash_clear(hash): - * clears the given hash table of entries, calling the key and value - * destructors for every element in the hash. - */ - -#include <sys/zfs_context.h> -#include <sys/bitmap.h> -#include <sys/modhash_impl.h> -#include <sys/sysmacros.h> - -/* - * MH_KEY_DESTROY() - * Invoke the key destructor. - */ -#define MH_KEY_DESTROY(hash, key) ((hash->mh_kdtor)(key)) - -/* - * MH_VAL_DESTROY() - * Invoke the value destructor. - */ -#define MH_VAL_DESTROY(hash, val) ((hash->mh_vdtor)(val)) - -/* - * MH_KEYCMP() - * Call the key comparator for the given hash keys. - */ -#define MH_KEYCMP(hash, key1, key2) ((hash->mh_keycmp)(key1, key2)) - -/* - * Cache for struct mod_hash_entry - */ -kmem_cache_t *mh_e_cache = NULL; -mod_hash_t *mh_head = NULL; -kmutex_t mh_head_lock; - -/* - * mod_hash_null_keydtor() - * mod_hash_null_valdtor() - * no-op key and value destructors. - */ -/*ARGSUSED*/ -void -mod_hash_null_keydtor(mod_hash_key_t key) -{ -} - -/*ARGSUSED*/ -void -mod_hash_null_valdtor(mod_hash_val_t val) -{ -} - -/* - * mod_hash_bystr() - * mod_hash_strkey_cmp() - * mod_hash_strkey_dtor() - * mod_hash_strval_dtor() - * Hash and key comparison routines for hashes with string keys. - * - * mod_hash_create_strhash() - * Create a hash using strings as keys - * - * The string hashing algorithm is from the "Dragon Book" -- - * "Compilers: Principles, Tools & Techniques", by Aho, Sethi, Ullman - */ - -/*ARGSUSED*/ -uint_t -mod_hash_bystr(void *hash_data, mod_hash_key_t key) -{ - uint_t hash = 0; - uint_t g; - char *p, *k = (char *)key; - - ASSERT(k); - for (p = k; *p != '\0'; p++) { - hash = (hash << 4) + *p; - if ((g = (hash & 0xf0000000)) != 0) { - hash ^= (g >> 24); - hash ^= g; - } - } - return (hash); -} - -int -mod_hash_strkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2) -{ - return (strcmp((char *)key1, (char *)key2)); -} - -void -mod_hash_strkey_dtor(mod_hash_key_t key) -{ - char *c = (char *)key; - kmem_free(c, strlen(c) + 1); -} - -void -mod_hash_strval_dtor(mod_hash_val_t val) -{ - char *c = (char *)val; - kmem_free(c, strlen(c) + 1); -} - -mod_hash_t * -mod_hash_create_strhash_nodtr(char *name, size_t nchains, - void (*val_dtor)(mod_hash_val_t)) -{ - return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor, - val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); -} - -mod_hash_t * -mod_hash_create_strhash(char *name, size_t nchains, - void (*val_dtor)(mod_hash_val_t)) -{ - return mod_hash_create_extended(name, nchains, mod_hash_strkey_dtor, - val_dtor, mod_hash_bystr, NULL, mod_hash_strkey_cmp, KM_SLEEP); -} - -void -mod_hash_destroy_strhash(mod_hash_t *strhash) -{ - ASSERT(strhash); - mod_hash_destroy_hash(strhash); -} - - -/* - * mod_hash_byptr() - * mod_hash_ptrkey_cmp() - * Hash and key comparison routines for hashes with pointer keys. - * - * mod_hash_create_ptrhash() - * mod_hash_destroy_ptrhash() - * Create a hash that uses pointers as keys. This hash algorithm - * picks an appropriate set of middle bits in the address to hash on - * based on the size of the hash table and a hint about the size of - * the items pointed at. - */ -uint_t -mod_hash_byptr(void *hash_data, mod_hash_key_t key) -{ - uintptr_t k = (uintptr_t)key; - k >>= (int)(uintptr_t)hash_data; - - return ((uint_t)k); -} - -int -mod_hash_ptrkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2) -{ - uintptr_t k1 = (uintptr_t)key1; - uintptr_t k2 = (uintptr_t)key2; - if (k1 > k2) - return (-1); - else if (k1 < k2) - return (1); - else - return (0); -} - -mod_hash_t * -mod_hash_create_ptrhash(char *name, size_t nchains, - void (*val_dtor)(mod_hash_val_t), size_t key_elem_size) -{ - size_t rshift; - - /* - * We want to hash on the bits in the middle of the address word - * Bits far to the right in the word have little significance, and - * are likely to all look the same (for example, an array of - * 256-byte structures will have the bottom 8 bits of address - * words the same). So we want to right-shift each address to - * ignore the bottom bits. - * - * The high bits, which are also unused, will get taken out when - * mod_hash takes hashkey % nchains. - */ - rshift = highbit64(key_elem_size); - - return mod_hash_create_extended(name, nchains, mod_hash_null_keydtor, - val_dtor, mod_hash_byptr, (void *)rshift, mod_hash_ptrkey_cmp, - KM_SLEEP); -} - -void -mod_hash_destroy_ptrhash(mod_hash_t *hash) -{ - ASSERT(hash); - mod_hash_destroy_hash(hash); -} - -/* - * mod_hash_byid() - * mod_hash_idkey_cmp() - * Hash and key comparison routines for hashes with 32-bit unsigned keys. - * - * mod_hash_create_idhash() - * mod_hash_destroy_idhash() - * mod_hash_iddata_gen() - * Create a hash that uses numeric keys. - * - * The hash algorithm is documented in "Introduction to Algorithms" - * (Cormen, Leiserson, Rivest); when the hash table is created, it - * attempts to find the next largest prime above the number of hash - * slots. The hash index is then this number times the key modulo - * the hash size, or (key * prime) % nchains. - */ -uint_t -mod_hash_byid(void *hash_data, mod_hash_key_t key) -{ - uint_t kval = (uint_t)(uintptr_t)hash_data; - return ((uint_t)(uintptr_t)key * (uint_t)kval); -} - -int -mod_hash_idkey_cmp(mod_hash_key_t key1, mod_hash_key_t key2) -{ - return ((uint_t)(uintptr_t)key1 - (uint_t)(uintptr_t)key2); -} - -/* - * Generate the next largest prime number greater than nchains; this value - * is intended to be later passed in to mod_hash_create_extended() as the - * hash_data. - */ -uint_t -mod_hash_iddata_gen(size_t nchains) -{ - uint_t kval, i, prime; - - /* - * Pick the first (odd) prime greater than nchains. Make sure kval is - * odd (so start with nchains +1 or +2 as appropriate). - */ - kval = (nchains % 2 == 0) ? nchains + 1 : nchains + 2; - - for (;;) { - prime = 1; - for (i = 3; i * i <= kval; i += 2) { - if (kval % i == 0) - prime = 0; - } - if (prime == 1) - break; - kval += 2; - } - return (kval); -} - -mod_hash_t * -mod_hash_create_idhash(char *name, size_t nchains, - void (*val_dtor)(mod_hash_val_t)) -{ - uint_t kval = mod_hash_iddata_gen(nchains); - - return (mod_hash_create_extended(name, nchains, mod_hash_null_keydtor, - val_dtor, mod_hash_byid, (void *)(uintptr_t)kval, - mod_hash_idkey_cmp, KM_SLEEP)); -} - -void -mod_hash_destroy_idhash(mod_hash_t *hash) -{ - ASSERT(hash); - mod_hash_destroy_hash(hash); -} - -void -mod_hash_fini(void) -{ - mutex_destroy(&mh_head_lock); - - if (mh_e_cache) { - kmem_cache_destroy(mh_e_cache); - mh_e_cache = NULL; - } -} - -/* - * mod_hash_init() - * sets up globals, etc for mod_hash_* - */ -void -mod_hash_init(void) -{ - ASSERT(mh_e_cache == NULL); - mh_e_cache = kmem_cache_create("mod_hash_entries", - sizeof (struct mod_hash_entry), 0, NULL, NULL, NULL, NULL, - NULL, 0); - - mutex_init(&mh_head_lock, NULL, MUTEX_DEFAULT, NULL); -} - -/* - * mod_hash_create_extended() - * The full-blown hash creation function. - * - * notes: - * nchains - how many hash slots to create. More hash slots will - * result in shorter hash chains, but will consume - * slightly more memory up front. - * sleep - should be KM_SLEEP or KM_NOSLEEP, to indicate whether - * to sleep for memory, or fail in low-memory conditions. - * - * Fails only if KM_NOSLEEP was specified, and no memory was available. - */ -mod_hash_t * -mod_hash_create_extended( - char *hname, /* descriptive name for hash */ - size_t nchains, /* number of hash slots */ - void (*kdtor)(mod_hash_key_t), /* key destructor */ - void (*vdtor)(mod_hash_val_t), /* value destructor */ - uint_t (*hash_alg)(void *, mod_hash_key_t), /* hash algorithm */ - void *hash_alg_data, /* pass-thru arg for hash_alg */ - int (*keycmp)(mod_hash_key_t, mod_hash_key_t), /* key comparator */ - int sleep) /* whether to sleep for mem */ -{ - mod_hash_t *mod_hash; - size_t size; - ASSERT(hname && keycmp && hash_alg && vdtor && kdtor); - - if ((mod_hash = kmem_zalloc(MH_SIZE(nchains), sleep)) == NULL) - return (NULL); - - size = strlen(hname) + 1; - mod_hash->mh_name = kmem_alloc(size, sleep); - if (mod_hash->mh_name == NULL) { - kmem_free(mod_hash, MH_SIZE(nchains)); - return (NULL); - } - (void) strlcpy(mod_hash->mh_name, hname, size); - - rw_init(&mod_hash->mh_contents, NULL, RW_DEFAULT, NULL); - mod_hash->mh_sleep = sleep; - mod_hash->mh_nchains = nchains; - mod_hash->mh_kdtor = kdtor; - mod_hash->mh_vdtor = vdtor; - mod_hash->mh_hashalg = hash_alg; - mod_hash->mh_hashalg_data = hash_alg_data; - mod_hash->mh_keycmp = keycmp; - - /* - * Link the hash up on the list of hashes - */ - mutex_enter(&mh_head_lock); - mod_hash->mh_next = mh_head; - mh_head = mod_hash; - mutex_exit(&mh_head_lock); - - return (mod_hash); -} - -/* - * mod_hash_destroy_hash() - * destroy a hash table, destroying all of its stored keys and values - * as well. - */ -void -mod_hash_destroy_hash(mod_hash_t *hash) -{ - mod_hash_t *mhp, *mhpp; - - mutex_enter(&mh_head_lock); - /* - * Remove the hash from the hash list - */ - if (hash == mh_head) { /* removing 1st list elem */ - mh_head = mh_head->mh_next; - } else { - /* - * mhpp can start out NULL since we know the 1st elem isn't the - * droid we're looking for. - */ - mhpp = NULL; - for (mhp = mh_head; mhp != NULL; mhp = mhp->mh_next) { - if (mhp == hash) { - mhpp->mh_next = mhp->mh_next; - break; - } - mhpp = mhp; - } - } - mutex_exit(&mh_head_lock); - - /* - * Clean out keys and values. - */ - mod_hash_clear(hash); - - rw_destroy(&hash->mh_contents); - kmem_free(hash->mh_name, strlen(hash->mh_name) + 1); - kmem_free(hash, MH_SIZE(hash->mh_nchains)); -} - -/* - * i_mod_hash() - * Call the hashing algorithm for this hash table, with the given key. - */ -uint_t -i_mod_hash(mod_hash_t *hash, mod_hash_key_t key) -{ - uint_t h; - /* - * Prevent div by 0 problems; - * Also a nice shortcut when using a hash as a list - */ - if (hash->mh_nchains == 1) - return (0); - - h = (hash->mh_hashalg)(hash->mh_hashalg_data, key); - return (h % (hash->mh_nchains - 1)); -} - -/* - * i_mod_hash_insert_nosync() - * mod_hash_insert() - * mod_hash_insert_reserve() - * insert 'val' into the hash table, using 'key' as its key. If 'key' is - * already a key in the hash, an error will be returned, and the key-val - * pair will not be inserted. i_mod_hash_insert_nosync() supports a simple - * handle abstraction, allowing hash entry allocation to be separated from - * the hash insertion. this abstraction allows simple use of the mod_hash - * structure in situations where mod_hash_insert() with a KM_SLEEP - * allocation policy would otherwise be unsafe. - */ -int -i_mod_hash_insert_nosync(mod_hash_t *hash, mod_hash_key_t key, - mod_hash_val_t val, mod_hash_hndl_t handle) -{ - uint_t hashidx; - struct mod_hash_entry *entry; - - ASSERT(hash); - - /* - * If we've not been given reserved storage, allocate storage directly, - * using the hash's allocation policy. - */ - if (handle == (mod_hash_hndl_t)0) { - entry = kmem_cache_alloc(mh_e_cache, hash->mh_sleep); - if (entry == NULL) { - hash->mh_stat.mhs_nomem++; - return (MH_ERR_NOMEM); - } - } else { - entry = (struct mod_hash_entry *)handle; - } - - hashidx = i_mod_hash(hash, key); - entry->mhe_key = key; - entry->mhe_val = val; - entry->mhe_next = hash->mh_entries[hashidx]; - - hash->mh_entries[hashidx] = entry; - hash->mh_stat.mhs_nelems++; - - return (0); -} - -int -mod_hash_insert(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val) -{ - int res; - mod_hash_val_t v; - - rw_enter(&hash->mh_contents, RW_WRITER); - - /* - * Disallow duplicate keys in the hash - */ - if (i_mod_hash_find_nosync(hash, key, &v) == 0) { - rw_exit(&hash->mh_contents); - hash->mh_stat.mhs_coll++; - return (MH_ERR_DUPLICATE); - } - - res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0); - rw_exit(&hash->mh_contents); - - return (res); -} - -int -mod_hash_insert_reserve(mod_hash_t *hash, mod_hash_key_t key, - mod_hash_val_t val, mod_hash_hndl_t handle) -{ - int res; - mod_hash_val_t v; - - rw_enter(&hash->mh_contents, RW_WRITER); - - /* - * Disallow duplicate keys in the hash - */ - if (i_mod_hash_find_nosync(hash, key, &v) == 0) { - rw_exit(&hash->mh_contents); - hash->mh_stat.mhs_coll++; - return (MH_ERR_DUPLICATE); - } - res = i_mod_hash_insert_nosync(hash, key, val, handle); - rw_exit(&hash->mh_contents); - - return (res); -} - -/* - * mod_hash_reserve() - * mod_hash_reserve_nosleep() - * mod_hash_cancel() - * Make or cancel a mod_hash_entry_t reservation. Reservations are used in - * mod_hash_insert_reserve() above. - */ -int -mod_hash_reserve(mod_hash_t *hash, mod_hash_hndl_t *handlep) -{ - *handlep = kmem_cache_alloc(mh_e_cache, hash->mh_sleep); - if (*handlep == NULL) { - hash->mh_stat.mhs_nomem++; - return (MH_ERR_NOMEM); - } - - return (0); -} - -int -mod_hash_reserve_nosleep(mod_hash_t *hash, mod_hash_hndl_t *handlep) -{ - *handlep = kmem_cache_alloc(mh_e_cache, KM_NOSLEEP); - if (*handlep == NULL) { - hash->mh_stat.mhs_nomem++; - return (MH_ERR_NOMEM); - } - - return (0); - -} - -/*ARGSUSED*/ -void -mod_hash_cancel(mod_hash_t *hash, mod_hash_hndl_t *handlep) -{ - kmem_cache_free(mh_e_cache, *handlep); - *handlep = (mod_hash_hndl_t)0; -} - -/* - * i_mod_hash_remove_nosync() - * mod_hash_remove() - * Remove an element from the hash table. - */ -int -i_mod_hash_remove_nosync(mod_hash_t *hash, mod_hash_key_t key, - mod_hash_val_t *val) -{ - int hashidx; - struct mod_hash_entry *e, *ep; - - hashidx = i_mod_hash(hash, key); - ep = NULL; /* e's parent */ - - for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) { - if (MH_KEYCMP(hash, e->mhe_key, key) == 0) - break; - ep = e; - } - - if (e == NULL) { /* not found */ - return (MH_ERR_NOTFOUND); - } - - if (ep == NULL) /* special case 1st element in bucket */ - hash->mh_entries[hashidx] = e->mhe_next; - else - ep->mhe_next = e->mhe_next; - - /* - * Clean up resources used by the node's key. - */ - MH_KEY_DESTROY(hash, e->mhe_key); - - *val = e->mhe_val; - kmem_cache_free(mh_e_cache, e); - hash->mh_stat.mhs_nelems--; - - return (0); -} - -int -mod_hash_remove(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val) -{ - int res; - - rw_enter(&hash->mh_contents, RW_WRITER); - res = i_mod_hash_remove_nosync(hash, key, val); - rw_exit(&hash->mh_contents); - - return (res); -} - -/* - * mod_hash_replace() - * atomically remove an existing key-value pair from a hash, and replace - * the key and value with the ones supplied. The removed key and value - * (if any) are destroyed. - */ -int -mod_hash_replace(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t val) -{ - int res; - mod_hash_val_t v; - - rw_enter(&hash->mh_contents, RW_WRITER); - - if (i_mod_hash_remove_nosync(hash, key, &v) == 0) { - /* - * mod_hash_remove() takes care of freeing up the key resources. - */ - MH_VAL_DESTROY(hash, v); - } - res = i_mod_hash_insert_nosync(hash, key, val, (mod_hash_hndl_t)0); - - rw_exit(&hash->mh_contents); - - return (res); -} - -/* - * mod_hash_destroy() - * Remove an element from the hash table matching 'key', and destroy it. - */ -int -mod_hash_destroy(mod_hash_t *hash, mod_hash_key_t key) -{ - mod_hash_val_t val; - int rv; - - rw_enter(&hash->mh_contents, RW_WRITER); - - if ((rv = i_mod_hash_remove_nosync(hash, key, &val)) == 0) { - /* - * mod_hash_remove() takes care of freeing up the key resources. - */ - MH_VAL_DESTROY(hash, val); - } - - rw_exit(&hash->mh_contents); - return (rv); -} - -/* - * i_mod_hash_find_nosync() - * mod_hash_find() - * Find a value in the hash table corresponding to the given key. - */ -int -i_mod_hash_find_nosync(mod_hash_t *hash, mod_hash_key_t key, - mod_hash_val_t *val) -{ - uint_t hashidx; - struct mod_hash_entry *e; - - hashidx = i_mod_hash(hash, key); - - for (e = hash->mh_entries[hashidx]; e != NULL; e = e->mhe_next) { - if (MH_KEYCMP(hash, e->mhe_key, key) == 0) { - *val = e->mhe_val; - hash->mh_stat.mhs_hit++; - return (0); - } - } - hash->mh_stat.mhs_miss++; - return (MH_ERR_NOTFOUND); -} - -int -mod_hash_find(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val) -{ - int res; - - rw_enter(&hash->mh_contents, RW_READER); - res = i_mod_hash_find_nosync(hash, key, val); - rw_exit(&hash->mh_contents); - - return (res); -} - -int -mod_hash_find_cb(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val, - void (*find_cb)(mod_hash_key_t, mod_hash_val_t)) -{ - int res; - - rw_enter(&hash->mh_contents, RW_READER); - res = i_mod_hash_find_nosync(hash, key, val); - if (res == 0) { - find_cb(key, *val); - } - rw_exit(&hash->mh_contents); - - return (res); -} - -int -mod_hash_find_cb_rval(mod_hash_t *hash, mod_hash_key_t key, mod_hash_val_t *val, - int (*find_cb)(mod_hash_key_t, mod_hash_val_t), int *cb_rval) -{ - int res; - - rw_enter(&hash->mh_contents, RW_READER); - res = i_mod_hash_find_nosync(hash, key, val); - if (res == 0) { - *cb_rval = find_cb(key, *val); - } - rw_exit(&hash->mh_contents); - - return (res); -} - -void -i_mod_hash_walk_nosync(mod_hash_t *hash, - uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg) -{ - struct mod_hash_entry *e; - uint_t hashidx; - int res = MH_WALK_CONTINUE; - - for (hashidx = 0; - (hashidx < (hash->mh_nchains - 1)) && (res == MH_WALK_CONTINUE); - hashidx++) { - e = hash->mh_entries[hashidx]; - while ((e != NULL) && (res == MH_WALK_CONTINUE)) { - res = callback(e->mhe_key, e->mhe_val, arg); - e = e->mhe_next; - } - } -} - -/* - * mod_hash_walk() - * Walks all the elements in the hashtable and invokes the callback - * function with the key/value pair for each element. The hashtable - * is locked for readers so the callback function should not attempt - * to do any updates to the hashable. The callback function should - * return MH_WALK_CONTINUE to continue walking the hashtable or - * MH_WALK_TERMINATE to abort the walk of the hashtable. - */ -void -mod_hash_walk(mod_hash_t *hash, - uint_t (*callback)(mod_hash_key_t, mod_hash_val_t *, void *), void *arg) -{ - rw_enter(&hash->mh_contents, RW_READER); - i_mod_hash_walk_nosync(hash, callback, arg); - rw_exit(&hash->mh_contents); -} - - -/* - * i_mod_hash_clear_nosync() - * mod_hash_clear() - * Clears the given hash table by calling the destructor of every hash - * element and freeing up all mod_hash_entry's. - */ -void -i_mod_hash_clear_nosync(mod_hash_t *hash) -{ - int i; - struct mod_hash_entry *e, *old_e; - - for (i = 0; i < hash->mh_nchains; i++) { - e = hash->mh_entries[i]; - while (e != NULL) { - MH_KEY_DESTROY(hash, e->mhe_key); - MH_VAL_DESTROY(hash, e->mhe_val); - old_e = e; - e = e->mhe_next; - kmem_cache_free(mh_e_cache, old_e); - } - hash->mh_entries[i] = NULL; - } - hash->mh_stat.mhs_nelems = 0; -} - -void -mod_hash_clear(mod_hash_t *hash) -{ - ASSERT(hash); - rw_enter(&hash->mh_contents, RW_WRITER); - i_mod_hash_clear_nosync(hash); - rw_exit(&hash->mh_contents); -} diff --git a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c index 34b36b81c0ab..b0af101990ed 100644 --- a/sys/contrib/openzfs/module/icp/spi/kcf_spi.c +++ b/sys/contrib/openzfs/module/icp/spi/kcf_spi.c @@ -6,7 +6,7 @@ * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE - * or http://www.opensolaris.org/os/licensing. + * or https://opensource.org/licenses/CDDL-1.0. * See the License for the specific language governing permissions * and limitations under the License. * @@ -36,146 +36,35 @@ #include <sys/crypto/sched_impl.h> #include <sys/crypto/spi.h> -/* - * minalloc and maxalloc values to be used for taskq_create(). - */ -int crypto_taskq_threads = CRYPTO_TASKQ_THREADS; -int crypto_taskq_minalloc = CRYPTO_TASKQ_MIN; -int crypto_taskq_maxalloc = CRYPTO_TASKQ_MAX; - -static void remove_provider(kcf_provider_desc_t *); -static void process_logical_providers(crypto_provider_info_t *, +static int init_prov_mechs(const crypto_provider_info_t *, kcf_provider_desc_t *); -static int init_prov_mechs(crypto_provider_info_t *, kcf_provider_desc_t *); -static int kcf_prov_kstat_update(kstat_t *, int); -static void delete_kstat(kcf_provider_desc_t *); - -static kcf_prov_stats_t kcf_stats_ks_data_template = { - { "kcf_ops_total", KSTAT_DATA_UINT64 }, - { "kcf_ops_passed", KSTAT_DATA_UINT64 }, - { "kcf_ops_failed", KSTAT_DATA_UINT64 }, - { "kcf_ops_returned_busy", KSTAT_DATA_UINT64 } -}; - -#define KCF_SPI_COPY_OPS(src, dst, ops) if ((src)->ops != NULL) \ - *((dst)->ops) = *((src)->ops); - -/* - * Copy an ops vector from src to dst. Used during provider registration - * to copy the ops vector from the provider info structure to the - * provider descriptor maintained by KCF. - * Copying the ops vector specified by the provider is needed since the - * framework does not require the provider info structure to be - * persistent. - */ -static void -copy_ops_vector_v1(crypto_ops_t *src_ops, crypto_ops_t *dst_ops) -{ - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_control_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_digest_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_cipher_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_mac_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_sign_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_verify_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_dual_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_dual_cipher_mac_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_random_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_session_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_object_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_key_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_provider_ops); - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_ctx_ops); -} - -static void -copy_ops_vector_v2(crypto_ops_t *src_ops, crypto_ops_t *dst_ops) -{ - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_mech_ops); -} - -static void -copy_ops_vector_v3(crypto_ops_t *src_ops, crypto_ops_t *dst_ops) -{ - KCF_SPI_COPY_OPS(src_ops, dst_ops, co_nostore_key_ops); -} /* * This routine is used to add cryptographic providers to the KEF framework. * Providers pass a crypto_provider_info structure to crypto_register_provider() * and get back a handle. The crypto_provider_info structure contains a * list of mechanisms supported by the provider and an ops vector containing - * provider entry points. Hardware providers call this routine in their attach - * routines. Software providers call this routine in their _init() routine. + * provider entry points. Providers call this routine in their _init() routine. */ int -crypto_register_provider(crypto_provider_info_t *info, +crypto_register_provider(const crypto_provider_info_t *info, crypto_kcf_provider_handle_t *handle) { - char *ks_name; - kcf_provider_desc_t *prov_desc = NULL; int ret = CRYPTO_ARGUMENTS_BAD; - if (info->pi_interface_version > CRYPTO_SPI_VERSION_3) - return (CRYPTO_VERSION_MISMATCH); - - /* - * Check provider type, must be software, hardware, or logical. - */ - if (info->pi_provider_type != CRYPTO_HW_PROVIDER && - info->pi_provider_type != CRYPTO_SW_PROVIDER && - info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER) - return (CRYPTO_ARGUMENTS_BAD); - /* * Allocate and initialize a new provider descriptor. We also * hold it and release it when done. */ - prov_desc = kcf_alloc_provider_desc(info); + prov_desc = kcf_alloc_provider_desc(); KCF_PROV_REFHOLD(prov_desc); - prov_desc->pd_prov_type = info->pi_provider_type; - - /* provider-private handle, opaque to KCF */ - prov_desc->pd_prov_handle = info->pi_provider_handle; - /* copy provider description string */ - if (info->pi_provider_description != NULL) { - /* - * pi_provider_descriptor is a string that can contain - * up to CRYPTO_PROVIDER_DESCR_MAX_LEN + 1 characters - * INCLUDING the terminating null character. A bcopy() - * is necessary here as pd_description should not have - * a null character. See comments in kcf_alloc_provider_desc() - * for details on pd_description field. - */ - bcopy(info->pi_provider_description, prov_desc->pd_description, - MIN(strlen(info->pi_provider_description), - (size_t)CRYPTO_PROVIDER_DESCR_MAX_LEN)); - } + prov_desc->pd_description = info->pi_provider_description; - if (info->pi_provider_type != CRYPTO_LOGICAL_PROVIDER) { - if (info->pi_ops_vector == NULL) { - goto bail; - } - copy_ops_vector_v1(info->pi_ops_vector, - prov_desc->pd_ops_vector); - if (info->pi_interface_version >= CRYPTO_SPI_VERSION_2) { - copy_ops_vector_v2(info->pi_ops_vector, - prov_desc->pd_ops_vector); - prov_desc->pd_flags = info->pi_flags; - } - if (info->pi_interface_version == CRYPTO_SPI_VERSION_3) { - copy_ops_vector_v3(info->pi_ops_vector, - prov_desc->pd_ops_vector); - } - } - - /* object_ops and nostore_key_ops are mutually exclusive */ - if (prov_desc->pd_ops_vector->co_object_ops && - prov_desc->pd_ops_vector->co_nostore_key_ops) { - goto bail; - } + /* Change from Illumos: the ops vector is persistent. */ + prov_desc->pd_ops_vector = info->pi_ops_vector; /* process the mechanisms supported by the provider */ if ((ret = init_prov_mechs(info, prov_desc)) != CRYPTO_SUCCESS) @@ -191,86 +80,15 @@ crypto_register_provider(crypto_provider_info_t *info, } /* - * We create a taskq only for a hardware provider. The global - * software queue is used for software providers. We handle ordering + * The global queue is used for providers. We handle ordering * of multi-part requests in the taskq routine. So, it is safe to * have multiple threads for the taskq. We pass TASKQ_PREPOPULATE flag * to keep some entries cached to improve performance. */ - if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER) - prov_desc->pd_sched_info.ks_taskq = taskq_create("kcf_taskq", - crypto_taskq_threads, minclsyspri, - crypto_taskq_minalloc, crypto_taskq_maxalloc, - TASKQ_PREPOPULATE); - else - prov_desc->pd_sched_info.ks_taskq = NULL; - - /* no kernel session to logical providers */ - if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) { - /* - * Open a session for session-oriented providers. This session - * is used for all kernel consumers. This is fine as a provider - * is required to support multiple thread access to a session. - * We can do this only after the taskq has been created as we - * do a kcf_submit_request() to open the session. - */ - if (KCF_PROV_SESSION_OPS(prov_desc) != NULL) { - kcf_req_params_t params; - - KCF_WRAP_SESSION_OPS_PARAMS(¶ms, - KCF_OP_SESSION_OPEN, &prov_desc->pd_sid, 0, - CRYPTO_USER, NULL, 0, prov_desc); - ret = kcf_submit_request(prov_desc, NULL, NULL, ¶ms, - B_FALSE); - - if (ret != CRYPTO_SUCCESS) { - undo_register_provider(prov_desc, B_TRUE); - ret = CRYPTO_FAILED; - goto bail; - } - } - } - - if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) { - /* - * Create the kstat for this provider. There is a kstat - * installed for each successfully registered provider. - * This kstat is deleted, when the provider unregisters. - */ - if (prov_desc->pd_prov_type == CRYPTO_SW_PROVIDER) { - ks_name = kmem_asprintf("%s_%s", - "NONAME", "provider_stats"); - } else { - ks_name = kmem_asprintf("%s_%d_%u_%s", - "NONAME", 0, prov_desc->pd_prov_id, - "provider_stats"); - } - - prov_desc->pd_kstat = kstat_create("kcf", 0, ks_name, "crypto", - KSTAT_TYPE_NAMED, sizeof (kcf_prov_stats_t) / - sizeof (kstat_named_t), KSTAT_FLAG_VIRTUAL); - - if (prov_desc->pd_kstat != NULL) { - bcopy(&kcf_stats_ks_data_template, - &prov_desc->pd_ks_data, - sizeof (kcf_stats_ks_data_template)); - prov_desc->pd_kstat->ks_data = &prov_desc->pd_ks_data; - KCF_PROV_REFHOLD(prov_desc); - KCF_PROV_IREFHOLD(prov_desc); - prov_desc->pd_kstat->ks_private = prov_desc; - prov_desc->pd_kstat->ks_update = kcf_prov_kstat_update; - kstat_install(prov_desc->pd_kstat); - } - kmem_strfree(ks_name); - } - - if (prov_desc->pd_prov_type == CRYPTO_HW_PROVIDER) - process_logical_providers(info, prov_desc); mutex_enter(&prov_desc->pd_lock); prov_desc->pd_state = KCF_PROV_READY; mutex_exit(&prov_desc->pd_lock); - kcf_do_notify(prov_desc, B_TRUE); *handle = prov_desc->pd_kcf_prov_handle; ret = CRYPTO_SUCCESS; @@ -282,8 +100,7 @@ bail: /* * This routine is used to notify the framework when a provider is being - * removed. Hardware providers call this routine in their detach routines. - * Software providers call this routine in their _fini() routine. + * removed. Providers call this routine in their _fini() routine. */ int crypto_unregister_provider(crypto_kcf_provider_handle_t handle) @@ -311,46 +128,30 @@ crypto_unregister_provider(crypto_kcf_provider_handle_t handle) saved_state = desc->pd_state; desc->pd_state = KCF_PROV_REMOVED; - if (saved_state == KCF_PROV_BUSY) { - /* - * The per-provider taskq threads may be waiting. We - * signal them so that they can start failing requests. - */ - cv_broadcast(&desc->pd_resume_cv); - } - - if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) { + /* + * Check if this provider is currently being used. + * pd_irefcnt is the number of holds from the internal + * structures. We add one to account for the above lookup. + */ + if (desc->pd_refcnt > desc->pd_irefcnt + 1) { + desc->pd_state = saved_state; + mutex_exit(&desc->pd_lock); + /* Release reference held by kcf_prov_tab_lookup(). */ + KCF_PROV_REFRELE(desc); /* - * Check if this provider is currently being used. - * pd_irefcnt is the number of holds from the internal - * structures. We add one to account for the above lookup. + * The administrator will presumably stop the clients, + * thus removing the holds, when they get the busy + * return value. Any retry will succeed then. */ - if (desc->pd_refcnt > desc->pd_irefcnt + 1) { - desc->pd_state = saved_state; - mutex_exit(&desc->pd_lock); - /* Release reference held by kcf_prov_tab_lookup(). */ - KCF_PROV_REFRELE(desc); - /* - * The administrator presumably will stop the clients - * thus removing the holds, when they get the busy - * return value. Any retry will succeed then. - */ - return (CRYPTO_BUSY); - } + return (CRYPTO_BUSY); } mutex_exit(&desc->pd_lock); - if (desc->pd_prov_type != CRYPTO_SW_PROVIDER) { - remove_provider(desc); - } - - if (desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) { - /* remove the provider from the mechanisms tables */ - for (mech_idx = 0; mech_idx < desc->pd_mech_list_count; - mech_idx++) { - kcf_remove_mech_provider( - desc->pd_mechanisms[mech_idx].cm_mech_name, desc); - } + /* remove the provider from the mechanisms tables */ + for (mech_idx = 0; mech_idx < desc->pd_mech_list_count; + mech_idx++) { + kcf_remove_mech_provider( + desc->pd_mechanisms[mech_idx].cm_mech_name, desc); } /* remove provider from providers table */ @@ -361,228 +162,46 @@ crypto_unregister_provider(crypto_kcf_provider_handle_t handle) return (CRYPTO_UNKNOWN_PROVIDER); } - delete_kstat(desc); + /* Release reference held by kcf_prov_tab_lookup(). */ + KCF_PROV_REFRELE(desc); - if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) { - /* Release reference held by kcf_prov_tab_lookup(). */ - KCF_PROV_REFRELE(desc); - - /* - * Wait till the existing requests complete. - */ - mutex_enter(&desc->pd_lock); - while (desc->pd_state != KCF_PROV_FREED) - cv_wait(&desc->pd_remove_cv, &desc->pd_lock); - mutex_exit(&desc->pd_lock); - } else { - /* - * Wait until requests that have been sent to the provider - * complete. - */ - mutex_enter(&desc->pd_lock); - while (desc->pd_irefcnt > 0) - cv_wait(&desc->pd_remove_cv, &desc->pd_lock); - mutex_exit(&desc->pd_lock); - } - - kcf_do_notify(desc, B_FALSE); + /* + * Wait till the existing requests complete. + */ + mutex_enter(&desc->pd_lock); + while (desc->pd_state != KCF_PROV_FREED) + cv_wait(&desc->pd_remove_cv, &desc->pd_lock); + mutex_exit(&desc->pd_lock); - if (desc->pd_prov_type == CRYPTO_SW_PROVIDER) { - /* - * This is the only place where kcf_free_provider_desc() - * is called directly. KCF_PROV_REFRELE() should free the - * structure in all other places. - */ - ASSERT(desc->pd_state == KCF_PROV_FREED && - desc->pd_refcnt == 0); - kcf_free_provider_desc(desc); - } else { - KCF_PROV_REFRELE(desc); - } + /* + * This is the only place where kcf_free_provider_desc() + * is called directly. KCF_PROV_REFRELE() should free the + * structure in all other places. + */ + ASSERT(desc->pd_state == KCF_PROV_FREED && + desc->pd_refcnt == 0); + kcf_free_provider_desc(desc); return (CRYPTO_SUCCESS); } /* - * This routine is used to notify the framework that the state of - * a cryptographic provider has changed. Valid state codes are: - * - * CRYPTO_PROVIDER_READY - * The provider indicates that it can process more requests. A provider - * will notify with this event if it previously has notified us with a - * CRYPTO_PROVIDER_BUSY. - * - * CRYPTO_PROVIDER_BUSY - * The provider can not take more requests. - * - * CRYPTO_PROVIDER_FAILED - * The provider encountered an internal error. The framework will not - * be sending any more requests to the provider. The provider may notify - * with a CRYPTO_PROVIDER_READY, if it is able to recover from the error. - * - * This routine can be called from user or interrupt context. - */ -void -crypto_provider_notification(crypto_kcf_provider_handle_t handle, uint_t state) -{ - kcf_provider_desc_t *pd; - - /* lookup the provider from the given handle */ - if ((pd = kcf_prov_tab_lookup((crypto_provider_id_t)handle)) == NULL) - return; - - mutex_enter(&pd->pd_lock); - - if (pd->pd_state <= KCF_PROV_VERIFICATION_FAILED) - goto out; - - if (pd->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - cmn_err(CE_WARN, "crypto_provider_notification: " - "logical provider (%x) ignored\n", handle); - goto out; - } - switch (state) { - case CRYPTO_PROVIDER_READY: - switch (pd->pd_state) { - case KCF_PROV_BUSY: - pd->pd_state = KCF_PROV_READY; - /* - * Signal the per-provider taskq threads that they - * can start submitting requests. - */ - cv_broadcast(&pd->pd_resume_cv); - break; - - case KCF_PROV_FAILED: - /* - * The provider recovered from the error. Let us - * use it now. - */ - pd->pd_state = KCF_PROV_READY; - break; - default: - break; - } - break; - - case CRYPTO_PROVIDER_BUSY: - switch (pd->pd_state) { - case KCF_PROV_READY: - pd->pd_state = KCF_PROV_BUSY; - break; - default: - break; - } - break; - - case CRYPTO_PROVIDER_FAILED: - /* - * We note the failure and return. The per-provider taskq - * threads check this flag and start failing the - * requests, if it is set. See process_req_hwp() for details. - */ - switch (pd->pd_state) { - case KCF_PROV_READY: - pd->pd_state = KCF_PROV_FAILED; - break; - - case KCF_PROV_BUSY: - pd->pd_state = KCF_PROV_FAILED; - /* - * The per-provider taskq threads may be waiting. We - * signal them so that they can start failing requests. - */ - cv_broadcast(&pd->pd_resume_cv); - break; - default: - break; - } - break; - default: - break; - } -out: - mutex_exit(&pd->pd_lock); - KCF_PROV_REFRELE(pd); -} - -/* - * This routine is used to notify the framework the result of - * an asynchronous request handled by a provider. Valid error - * codes are the same as the CRYPTO_* errors defined in common.h. - * - * This routine can be called from user or interrupt context. - */ -void -crypto_op_notification(crypto_req_handle_t handle, int error) -{ - kcf_call_type_t ctype; - - if (handle == NULL) - return; - - if ((ctype = GET_REQ_TYPE(handle)) == CRYPTO_SYNCH) { - kcf_sreq_node_t *sreq = (kcf_sreq_node_t *)handle; - - if (error != CRYPTO_SUCCESS) - sreq->sn_provider->pd_sched_info.ks_nfails++; - KCF_PROV_IREFRELE(sreq->sn_provider); - kcf_sop_done(sreq, error); - } else { - kcf_areq_node_t *areq = (kcf_areq_node_t *)handle; - - ASSERT(ctype == CRYPTO_ASYNCH); - if (error != CRYPTO_SUCCESS) - areq->an_provider->pd_sched_info.ks_nfails++; - KCF_PROV_IREFRELE(areq->an_provider); - kcf_aop_done(areq, error); - } -} - -/* - * This routine is used by software providers to determine - * whether to use KM_SLEEP or KM_NOSLEEP during memory allocation. - * Note that hardware providers can always use KM_SLEEP. So, - * they do not need to call this routine. - * - * This routine can be called from user or interrupt context. - */ -int -crypto_kmflag(crypto_req_handle_t handle) -{ - return (REQHNDL2_KMFLAG(handle)); -} - -/* * Process the mechanism info structures specified by the provider * during registration. A NULL crypto_provider_info_t indicates * an already initialized provider descriptor. * - * Mechanisms are not added to the kernel's mechanism table if the - * provider is a logical provider. - * * Returns CRYPTO_SUCCESS on success, CRYPTO_ARGUMENTS if one * of the specified mechanisms was malformed, or CRYPTO_HOST_MEMORY * if the table of mechanisms is full. */ static int -init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc) +init_prov_mechs(const crypto_provider_info_t *info, kcf_provider_desc_t *desc) { uint_t mech_idx; uint_t cleanup_idx; int err = CRYPTO_SUCCESS; kcf_prov_mech_desc_t *pmd; int desc_use_count = 0; - int mcount = desc->pd_mech_list_count; - - if (desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER) { - if (info != NULL) { - ASSERT(info->pi_mechanisms != NULL); - bcopy(info->pi_mechanisms, desc->pd_mechanisms, - sizeof (crypto_mech_info_t) * mcount); - } - return (CRYPTO_SUCCESS); - } /* * Copy the mechanism list from the provider info to the provider @@ -591,29 +210,9 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc) * mechanism, SUN_RANDOM, in this case. */ if (info != NULL) { - if (info->pi_ops_vector->co_random_ops != NULL) { - crypto_mech_info_t *rand_mi; - - /* - * Need the following check as it is possible to have - * a provider that implements just random_ops and has - * pi_mechanisms == NULL. - */ - if (info->pi_mechanisms != NULL) { - bcopy(info->pi_mechanisms, desc->pd_mechanisms, - sizeof (crypto_mech_info_t) * (mcount - 1)); - } - rand_mi = &desc->pd_mechanisms[mcount - 1]; - - bzero(rand_mi, sizeof (crypto_mech_info_t)); - (void) strncpy(rand_mi->cm_mech_name, SUN_RANDOM, - CRYPTO_MAX_MECH_NAME); - rand_mi->cm_func_group_mask = CRYPTO_FG_RANDOM; - } else { - ASSERT(info->pi_mechanisms != NULL); - bcopy(info->pi_mechanisms, desc->pd_mechanisms, - sizeof (crypto_mech_info_t) * mcount); - } + ASSERT(info->pi_mechanisms != NULL); + desc->pd_mech_list_count = info->pi_mech_list_count; + desc->pd_mechanisms = info->pi_mechanisms; } /* @@ -621,32 +220,6 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc) * to the corresponding KCF mechanism mech_entry chain. */ for (mech_idx = 0; mech_idx < desc->pd_mech_list_count; mech_idx++) { - crypto_mech_info_t *mi = &desc->pd_mechanisms[mech_idx]; - - if ((mi->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BITS) && - (mi->cm_mech_flags & CRYPTO_KEYSIZE_UNIT_IN_BYTES)) { - err = CRYPTO_ARGUMENTS_BAD; - break; - } - - if (desc->pd_flags & CRYPTO_HASH_NO_UPDATE && - mi->cm_func_group_mask & CRYPTO_FG_DIGEST) { - /* - * We ask the provider to specify the limit - * per hash mechanism. But, in practice, a - * hardware limitation means all hash mechanisms - * will have the same maximum size allowed for - * input data. So, we make it a per provider - * limit to keep it simple. - */ - if (mi->cm_max_input_length == 0) { - err = CRYPTO_ARGUMENTS_BAD; - break; - } else { - desc->pd_hash_limit = mi->cm_max_input_length; - } - } - if ((err = kcf_add_mech_provider(mech_idx, desc, &pmd)) != KCF_SUCCESS) break; @@ -659,12 +232,12 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc) } /* - * Don't allow multiple software providers with disabled mechanisms + * Don't allow multiple providers with disabled mechanisms * to register. Subsequent enabling of mechanisms will result in - * an unsupported configuration, i.e. multiple software providers + * an unsupported configuration, i.e. multiple providers * per mechanism. */ - if (desc_use_count == 0 && desc->pd_prov_type == CRYPTO_SW_PROVIDER) + if (desc_use_count == 0) return (CRYPTO_ARGUMENTS_BAD); if (err == KCF_SUCCESS) @@ -686,35 +259,6 @@ init_prov_mechs(crypto_provider_info_t *info, kcf_provider_desc_t *desc) } /* - * Update routine for kstat. Only privileged users are allowed to - * access this information, since this information is sensitive. - * There are some cryptographic attacks (e.g. traffic analysis) - * which can use this information. - */ -static int -kcf_prov_kstat_update(kstat_t *ksp, int rw) -{ - kcf_prov_stats_t *ks_data; - kcf_provider_desc_t *pd = (kcf_provider_desc_t *)ksp->ks_private; - - if (rw == KSTAT_WRITE) - return (EACCES); - - ks_data = ksp->ks_data; - - ks_data->ps_ops_total.value.ui64 = pd->pd_sched_info.ks_ndispatches; - ks_data->ps_ops_failed.value.ui64 = pd->pd_sched_info.ks_nfails; - ks_data->ps_ops_busy_rval.value.ui64 = pd->pd_sched_info.ks_nbusy_rval; - ks_data->ps_ops_passed.value.ui64 = - pd->pd_sched_info.ks_ndispatches - - pd->pd_sched_info.ks_nfails - - pd->pd_sched_info.ks_nbusy_rval; - - return (0); -} - - -/* * Utility routine called from failure paths in crypto_register_provider() * and from crypto_load_soft_disabled(). */ @@ -734,192 +278,3 @@ undo_register_provider(kcf_provider_desc_t *desc, boolean_t remove_prov) if (remove_prov) (void) kcf_prov_tab_rem_provider(desc->pd_prov_id); } - -/* - * Utility routine called from crypto_load_soft_disabled(). Callers - * should have done a prior undo_register_provider(). - */ -void -redo_register_provider(kcf_provider_desc_t *pd) -{ - /* process the mechanisms supported by the provider */ - (void) init_prov_mechs(NULL, pd); - - /* - * Hold provider in providers table. We should not call - * kcf_prov_tab_add_provider() here as the provider descriptor - * is still valid which means it has an entry in the provider - * table. - */ - KCF_PROV_REFHOLD(pd); - KCF_PROV_IREFHOLD(pd); -} - -/* - * Add provider (p1) to another provider's array of providers (p2). - * Hardware and logical providers use this array to cross-reference - * each other. - */ -static void -add_provider_to_array(kcf_provider_desc_t *p1, kcf_provider_desc_t *p2) -{ - kcf_provider_list_t *new; - - new = kmem_alloc(sizeof (kcf_provider_list_t), KM_SLEEP); - mutex_enter(&p2->pd_lock); - new->pl_next = p2->pd_provider_list; - p2->pd_provider_list = new; - KCF_PROV_IREFHOLD(p1); - new->pl_provider = p1; - mutex_exit(&p2->pd_lock); -} - -/* - * Remove provider (p1) from another provider's array of providers (p2). - * Hardware and logical providers use this array to cross-reference - * each other. - */ -static void -remove_provider_from_array(kcf_provider_desc_t *p1, kcf_provider_desc_t *p2) -{ - - kcf_provider_list_t *pl = NULL, **prev; - - mutex_enter(&p2->pd_lock); - for (pl = p2->pd_provider_list, prev = &p2->pd_provider_list; - pl != NULL; prev = &pl->pl_next, pl = pl->pl_next) { - if (pl->pl_provider == p1) { - break; - } - } - - if (p1 == NULL) { - mutex_exit(&p2->pd_lock); - return; - } - - /* detach and free kcf_provider_list structure */ - KCF_PROV_IREFRELE(p1); - *prev = pl->pl_next; - kmem_free(pl, sizeof (*pl)); - mutex_exit(&p2->pd_lock); -} - -/* - * Convert an array of logical provider handles (crypto_provider_id) - * stored in a crypto_provider_info structure into an array of provider - * descriptors (kcf_provider_desc_t) attached to a logical provider. - */ -static void -process_logical_providers(crypto_provider_info_t *info, kcf_provider_desc_t *hp) -{ - kcf_provider_desc_t *lp; - crypto_provider_id_t handle; - int count = info->pi_logical_provider_count; - int i; - - /* add hardware provider to each logical provider */ - for (i = 0; i < count; i++) { - handle = info->pi_logical_providers[i]; - lp = kcf_prov_tab_lookup((crypto_provider_id_t)handle); - if (lp == NULL) { - continue; - } - add_provider_to_array(hp, lp); - hp->pd_flags |= KCF_LPROV_MEMBER; - - /* - * A hardware provider has to have the provider descriptor of - * every logical provider it belongs to, so it can be removed - * from the logical provider if the hardware provider - * unregisters from the framework. - */ - add_provider_to_array(lp, hp); - KCF_PROV_REFRELE(lp); - } -} - -/* - * This routine removes a provider from all of the logical or - * hardware providers it belongs to, and frees the provider's - * array of pointers to providers. - */ -static void -remove_provider(kcf_provider_desc_t *pp) -{ - kcf_provider_desc_t *p; - kcf_provider_list_t *e, *next; - - mutex_enter(&pp->pd_lock); - for (e = pp->pd_provider_list; e != NULL; e = next) { - p = e->pl_provider; - remove_provider_from_array(pp, p); - if (p->pd_prov_type == CRYPTO_HW_PROVIDER && - p->pd_provider_list == NULL) - p->pd_flags &= ~KCF_LPROV_MEMBER; - KCF_PROV_IREFRELE(p); - next = e->pl_next; - kmem_free(e, sizeof (*e)); - } - pp->pd_provider_list = NULL; - mutex_exit(&pp->pd_lock); -} - -/* - * Dispatch events as needed for a provider. is_added flag tells - * whether the provider is registering or unregistering. - */ -void -kcf_do_notify(kcf_provider_desc_t *prov_desc, boolean_t is_added) -{ - int i; - crypto_notify_event_change_t ec; - - ASSERT(prov_desc->pd_state > KCF_PROV_VERIFICATION_FAILED); - - /* - * Inform interested clients of the mechanisms becoming - * available/unavailable. We skip this for logical providers - * as they do not affect mechanisms. - */ - if (prov_desc->pd_prov_type != CRYPTO_LOGICAL_PROVIDER) { - ec.ec_provider_type = prov_desc->pd_prov_type; - ec.ec_change = is_added ? CRYPTO_MECH_ADDED : - CRYPTO_MECH_REMOVED; - for (i = 0; i < prov_desc->pd_mech_list_count; i++) { - (void) strlcpy(ec.ec_mech_name, - prov_desc->pd_mechanisms[i].cm_mech_name, - CRYPTO_MAX_MECH_NAME); - kcf_walk_ntfylist(CRYPTO_EVENT_MECHS_CHANGED, &ec); - } - - } - - /* - * Inform interested clients about the new or departing provider. - * In case of a logical provider, we need to notify the event only - * for the logical provider and not for the underlying - * providers which are known by the KCF_LPROV_MEMBER bit. - */ - if (prov_desc->pd_prov_type == CRYPTO_LOGICAL_PROVIDER || - (prov_desc->pd_flags & KCF_LPROV_MEMBER) == 0) { - kcf_walk_ntfylist(is_added ? CRYPTO_EVENT_PROVIDER_REGISTERED : - CRYPTO_EVENT_PROVIDER_UNREGISTERED, prov_desc); - } -} - -static void -delete_kstat(kcf_provider_desc_t *desc) -{ - /* destroy the kstat created for this provider */ - if (desc->pd_kstat != NULL) { - kcf_provider_desc_t *kspd = desc->pd_kstat->ks_private; - - /* release reference held by desc->pd_kstat->ks_private */ - ASSERT(desc == kspd); - kstat_delete(kspd->pd_kstat); - desc->pd_kstat = NULL; - KCF_PROV_REFRELE(kspd); - KCF_PROV_IREFRELE(kspd); - } -} |