aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/icp/algs/modes/gcm.c')
-rw-r--r--sys/contrib/openzfs/module/icp/algs/modes/gcm.c1543
1 files changed, 1543 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
new file mode 100644
index 000000000000..5553c55e11cd
--- /dev/null
+++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c
@@ -0,0 +1,1543 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
+ */
+
+#include <sys/zfs_context.h>
+#include <modes/modes.h>
+#include <sys/crypto/common.h>
+#include <sys/crypto/icp.h>
+#include <sys/crypto/impl.h>
+#include <sys/byteorder.h>
+#include <sys/simd.h>
+#include <modes/gcm_impl.h>
+#ifdef CAN_USE_GCM_ASM
+#include <aes/aes_impl.h>
+#endif
+
+#define GHASH(c, d, t, o) \
+ xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \
+ (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \
+ (uint64_t *)(void *)(t));
+
+/* Select GCM implementation */
+#define IMPL_FASTEST (UINT32_MAX)
+#define IMPL_CYCLE (UINT32_MAX-1)
+#ifdef CAN_USE_GCM_ASM
+#define IMPL_AVX (UINT32_MAX-2)
+#endif
+#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i))
+static uint32_t icp_gcm_impl = IMPL_FASTEST;
+static uint32_t user_sel_impl = IMPL_FASTEST;
+
+#ifdef CAN_USE_GCM_ASM
+/* Does the architecture we run on support the MOVBE instruction? */
+boolean_t gcm_avx_can_use_movbe = B_FALSE;
+/*
+ * Whether to use the optimized openssl gcm and ghash implementations.
+ * Set to true if module parameter icp_gcm_impl == "avx".
+ */
+static boolean_t gcm_use_avx = B_FALSE;
+#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx)
+
+static inline boolean_t gcm_avx_will_work(void);
+static inline void gcm_set_avx(boolean_t);
+static inline boolean_t gcm_toggle_avx(void);
+extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *);
+
+static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t,
+ crypto_data_t *, size_t);
+
+static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
+static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t);
+static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *,
+ size_t, size_t);
+#endif /* ifdef CAN_USE_GCM_ASM */
+
+/*
+ * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode
+ * is done in another function.
+ */
+int
+gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+#ifdef CAN_USE_GCM_ASM
+ if (ctx->gcm_use_avx == B_TRUE)
+ return (gcm_mode_encrypt_contiguous_blocks_avx(
+ ctx, data, length, out, block_size));
+#endif
+
+ const gcm_impl_ops_t *gops;
+ size_t remainder = length;
+ size_t need = 0;
+ uint8_t *datap = (uint8_t *)data;
+ uint8_t *blockp;
+ uint8_t *lastp;
+ void *iov_or_mp;
+ offset_t offset;
+ uint8_t *out_data_1;
+ uint8_t *out_data_2;
+ size_t out_data_1_len;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+
+ if (length + ctx->gcm_remainder_len < block_size) {
+ /* accumulate bytes here and return */
+ bcopy(datap,
+ (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len,
+ length);
+ ctx->gcm_remainder_len += length;
+ if (ctx->gcm_copy_to == NULL) {
+ ctx->gcm_copy_to = datap;
+ }
+ return (CRYPTO_SUCCESS);
+ }
+
+ lastp = (uint8_t *)ctx->gcm_cb;
+ crypto_init_ptrs(out, &iov_or_mp, &offset);
+
+ gops = gcm_impl_get_ops();
+ do {
+ /* Unprocessed data from last call. */
+ if (ctx->gcm_remainder_len > 0) {
+ need = block_size - ctx->gcm_remainder_len;
+
+ if (need > remainder)
+ return (CRYPTO_DATA_LEN_RANGE);
+
+ bcopy(datap, &((uint8_t *)ctx->gcm_remainder)
+ [ctx->gcm_remainder_len], need);
+
+ blockp = (uint8_t *)ctx->gcm_remainder;
+ } else {
+ blockp = datap;
+ }
+
+ /*
+ * Increment counter. Counter bits are confined
+ * to the bottom 32 bits of the counter block.
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
+ (uint8_t *)ctx->gcm_tmp);
+ xor_block(blockp, (uint8_t *)ctx->gcm_tmp);
+
+ lastp = (uint8_t *)ctx->gcm_tmp;
+
+ ctx->gcm_processed_data_len += block_size;
+
+ crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1,
+ &out_data_1_len, &out_data_2, block_size);
+
+ /* copy block to where it belongs */
+ if (out_data_1_len == block_size) {
+ copy_block(lastp, out_data_1);
+ } else {
+ bcopy(lastp, out_data_1, out_data_1_len);
+ if (out_data_2 != NULL) {
+ bcopy(lastp + out_data_1_len,
+ out_data_2,
+ block_size - out_data_1_len);
+ }
+ }
+ /* update offset */
+ out->cd_offset += block_size;
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops);
+
+ /* Update pointer to next block of data to be processed. */
+ if (ctx->gcm_remainder_len != 0) {
+ datap += need;
+ ctx->gcm_remainder_len = 0;
+ } else {
+ datap += block_size;
+ }
+
+ remainder = (size_t)&data[length] - (size_t)datap;
+
+ /* Incomplete last block. */
+ if (remainder > 0 && remainder < block_size) {
+ bcopy(datap, ctx->gcm_remainder, remainder);
+ ctx->gcm_remainder_len = remainder;
+ ctx->gcm_copy_to = datap;
+ goto out;
+ }
+ ctx->gcm_copy_to = NULL;
+
+ } while (remainder > 0);
+out:
+ return (CRYPTO_SUCCESS);
+}
+
+/* ARGSUSED */
+int
+gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+#ifdef CAN_USE_GCM_ASM
+ if (ctx->gcm_use_avx == B_TRUE)
+ return (gcm_encrypt_final_avx(ctx, out, block_size));
+#endif
+
+ const gcm_impl_ops_t *gops;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ uint8_t *ghash, *macp = NULL;
+ int i, rv;
+
+ if (out->cd_length <
+ (ctx->gcm_remainder_len + ctx->gcm_tag_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ gops = gcm_impl_get_ops();
+ ghash = (uint8_t *)ctx->gcm_ghash;
+
+ if (ctx->gcm_remainder_len > 0) {
+ uint64_t counter;
+ uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp;
+
+ /*
+ * Here is where we deal with data that is not a
+ * multiple of the block size.
+ */
+
+ /*
+ * Increment counter.
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb,
+ (uint8_t *)ctx->gcm_tmp);
+
+ macp = (uint8_t *)ctx->gcm_remainder;
+ bzero(macp + ctx->gcm_remainder_len,
+ block_size - ctx->gcm_remainder_len);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->gcm_remainder_len; i++) {
+ macp[i] ^= tmpp[i];
+ }
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, macp, ghash, gops);
+
+ ctx->gcm_processed_data_len += ctx->gcm_remainder_len;
+ }
+
+ ctx->gcm_len_a_len_c[1] =
+ htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
+ GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
+ (uint8_t *)ctx->gcm_J0);
+ xor_block((uint8_t *)ctx->gcm_J0, ghash);
+
+ if (ctx->gcm_remainder_len > 0) {
+ rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+ out->cd_offset += ctx->gcm_remainder_len;
+ ctx->gcm_remainder_len = 0;
+ rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += ctx->gcm_tag_len;
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * This will only deal with decrypting the last block of the input that
+ * might not be a multiple of block length.
+ */
+static void
+gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ uint8_t *datap, *outp, *counterp;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ int i;
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 32 bits
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ datap = (uint8_t *)ctx->gcm_remainder;
+ outp = &((ctx->gcm_pt_buf)[index]);
+ counterp = (uint8_t *)ctx->gcm_tmp;
+
+ /* authentication tag */
+ bzero((uint8_t *)ctx->gcm_tmp, block_size);
+ bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len);
+
+ /* add ciphertext to the hash */
+ GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops());
+
+ /* decrypt remaining ciphertext */
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp);
+
+ /* XOR with counter block */
+ for (i = 0; i < ctx->gcm_remainder_len; i++) {
+ outp[i] = datap[i] ^ counterp[i];
+ }
+}
+
+/* ARGSUSED */
+int
+gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length,
+ crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ size_t new_len;
+ uint8_t *new;
+
+ /*
+ * Copy contiguous ciphertext input blocks to plaintext buffer.
+ * Ciphertext will be decrypted in the final.
+ */
+ if (length > 0) {
+ new_len = ctx->gcm_pt_buf_len + length;
+ new = vmem_alloc(new_len, ctx->gcm_kmflag);
+ if (new == NULL) {
+ vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ ctx->gcm_pt_buf = NULL;
+ return (CRYPTO_HOST_MEMORY);
+ }
+ bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len);
+ vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len);
+ ctx->gcm_pt_buf = new;
+ ctx->gcm_pt_buf_len = new_len;
+ bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len],
+ length);
+ ctx->gcm_processed_data_len += length;
+ }
+
+ ctx->gcm_remainder_len = 0;
+ return (CRYPTO_SUCCESS);
+}
+
+int
+gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+#ifdef CAN_USE_GCM_ASM
+ if (ctx->gcm_use_avx == B_TRUE)
+ return (gcm_decrypt_final_avx(ctx, out, block_size));
+#endif
+
+ const gcm_impl_ops_t *gops;
+ size_t pt_len;
+ size_t remainder;
+ uint8_t *ghash;
+ uint8_t *blockp;
+ uint8_t *cbp;
+ uint64_t counter;
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ int processed = 0, rv;
+
+ ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len);
+
+ gops = gcm_impl_get_ops();
+ pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ blockp = ctx->gcm_pt_buf;
+ remainder = pt_len;
+ while (remainder > 0) {
+ /* Incomplete last block */
+ if (remainder < block_size) {
+ bcopy(blockp, ctx->gcm_remainder, remainder);
+ ctx->gcm_remainder_len = remainder;
+ /*
+ * not expecting anymore ciphertext, just
+ * compute plaintext for the remaining input
+ */
+ gcm_decrypt_incomplete_block(ctx, block_size,
+ processed, encrypt_block, xor_block);
+ ctx->gcm_remainder_len = 0;
+ goto out;
+ }
+ /* add ciphertext to the hash */
+ GHASH(ctx, blockp, ghash, gops);
+
+ /*
+ * Increment counter.
+ * Counter bits are confined to the bottom 32 bits
+ */
+ counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+ counter = htonll(counter + 1);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+
+ cbp = (uint8_t *)ctx->gcm_tmp;
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp);
+
+ /* XOR with ciphertext */
+ xor_block(cbp, blockp);
+
+ processed += block_size;
+ blockp += block_size;
+ remainder -= block_size;
+ }
+out:
+ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
+ GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops);
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0,
+ (uint8_t *)ctx->gcm_J0);
+ xor_block((uint8_t *)ctx->gcm_J0, ghash);
+
+ /* compare the input authentication tag with what we calculated */
+ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
+ /* They don't match */
+ return (CRYPTO_INVALID_MAC);
+ } else {
+ rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ out->cd_offset += pt_len;
+ }
+ return (CRYPTO_SUCCESS);
+}
+
+static int
+gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param)
+{
+ size_t tag_len;
+
+ /*
+ * Check the length of the authentication tag (in bits).
+ */
+ tag_len = gcm_param->ulTagBits;
+ switch (tag_len) {
+ case 32:
+ case 64:
+ case 96:
+ case 104:
+ case 112:
+ case 120:
+ case 128:
+ break;
+ default:
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+ if (gcm_param->ulIvLen == 0)
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+
+ return (CRYPTO_SUCCESS);
+}
+
+static void
+gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len,
+ gcm_ctx_t *ctx, size_t block_size,
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ const gcm_impl_ops_t *gops;
+ uint8_t *cb;
+ ulong_t remainder = iv_len;
+ ulong_t processed = 0;
+ uint8_t *datap, *ghash;
+ uint64_t len_a_len_c[2];
+
+ gops = gcm_impl_get_ops();
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ cb = (uint8_t *)ctx->gcm_cb;
+ if (iv_len == 12) {
+ bcopy(iv, cb, 12);
+ cb[12] = 0;
+ cb[13] = 0;
+ cb[14] = 0;
+ cb[15] = 1;
+ /* J0 will be used again in the final */
+ copy_block(cb, (uint8_t *)ctx->gcm_J0);
+ } else {
+ /* GHASH the IV */
+ do {
+ if (remainder < block_size) {
+ bzero(cb, block_size);
+ bcopy(&(iv[processed]), cb, remainder);
+ datap = (uint8_t *)cb;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(iv[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+ GHASH(ctx, datap, ghash, gops);
+ } while (remainder > 0);
+
+ len_a_len_c[0] = 0;
+ len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len));
+ GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops);
+
+ /* J0 will be used again in the final */
+ copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb);
+ }
+}
+
+static int
+gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
+ unsigned char *auth_data, size_t auth_data_len, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ const gcm_impl_ops_t *gops;
+ uint8_t *ghash, *datap, *authp;
+ size_t remainder, processed;
+
+ /* encrypt zero block to get subkey H */
+ bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
+ encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H,
+ (uint8_t *)ctx->gcm_H);
+
+ gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
+ copy_block, xor_block);
+
+ gops = gcm_impl_get_ops();
+ authp = (uint8_t *)ctx->gcm_tmp;
+ ghash = (uint8_t *)ctx->gcm_ghash;
+ bzero(authp, block_size);
+ bzero(ghash, block_size);
+
+ processed = 0;
+ remainder = auth_data_len;
+ do {
+ if (remainder < block_size) {
+ /*
+ * There's not a block full of data, pad rest of
+ * buffer with zero
+ */
+ bzero(authp, block_size);
+ bcopy(&(auth_data[processed]), authp, remainder);
+ datap = (uint8_t *)authp;
+ remainder = 0;
+ } else {
+ datap = (uint8_t *)(&(auth_data[processed]));
+ processed += block_size;
+ remainder -= block_size;
+ }
+
+ /* add auth data to the hash */
+ GHASH(ctx, datap, ghash, gops);
+
+ } while (remainder > 0);
+
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * The following function is called at encrypt or decrypt init time
+ * for AES GCM mode.
+ *
+ * Init the GCM context struct. Handle the cycle and avx implementations here.
+ */
+int
+gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_GCM_PARAMS *gcm_param;
+
+ if (param != NULL) {
+ gcm_param = (CK_AES_GCM_PARAMS *)(void *)param;
+
+ if ((rv = gcm_validate_args(gcm_param)) != 0) {
+ return (rv);
+ }
+
+ gcm_ctx->gcm_tag_len = gcm_param->ulTagBits;
+ gcm_ctx->gcm_tag_len >>= 3;
+ gcm_ctx->gcm_processed_data_len = 0;
+
+ /* these values are in bits */
+ gcm_ctx->gcm_len_a_len_c[0]
+ = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen));
+
+ rv = CRYPTO_SUCCESS;
+ gcm_ctx->gcm_flags |= GCM_MODE;
+ } else {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+#ifdef CAN_USE_GCM_ASM
+ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
+ gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
+ } else {
+ /*
+ * Handle the "cycle" implementation by creating avx and
+ * non-avx contexts alternately.
+ */
+ gcm_ctx->gcm_use_avx = gcm_toggle_avx();
+ /*
+ * We don't handle byte swapped key schedules in the avx
+ * code path.
+ */
+ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
+ if (ks->ops->needs_byteswap == B_TRUE) {
+ gcm_ctx->gcm_use_avx = B_FALSE;
+ }
+ /* Use the MOVBE and the BSWAP variants alternately. */
+ if (gcm_ctx->gcm_use_avx == B_TRUE &&
+ zfs_movbe_available() == B_TRUE) {
+ (void) atomic_toggle_boolean_nv(
+ (volatile boolean_t *)&gcm_avx_can_use_movbe);
+ }
+ }
+ /* Avx and non avx context initialization differs from here on. */
+ if (gcm_ctx->gcm_use_avx == B_FALSE) {
+#endif /* ifdef CAN_USE_GCM_ASM */
+ if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
+ gcm_param->pAAD, gcm_param->ulAADLen, block_size,
+ encrypt_block, copy_block, xor_block) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+#ifdef CAN_USE_GCM_ASM
+ } else {
+ if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen,
+ gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+ }
+#endif /* ifdef CAN_USE_GCM_ASM */
+
+ return (rv);
+}
+
+int
+gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size,
+ int (*encrypt_block)(const void *, const uint8_t *, uint8_t *),
+ void (*copy_block)(uint8_t *, uint8_t *),
+ void (*xor_block)(uint8_t *, uint8_t *))
+{
+ int rv;
+ CK_AES_GMAC_PARAMS *gmac_param;
+
+ if (param != NULL) {
+ gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param;
+
+ gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS);
+ gcm_ctx->gcm_processed_data_len = 0;
+
+ /* these values are in bits */
+ gcm_ctx->gcm_len_a_len_c[0]
+ = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen));
+
+ rv = CRYPTO_SUCCESS;
+ gcm_ctx->gcm_flags |= GMAC_MODE;
+ } else {
+ return (CRYPTO_MECHANISM_PARAM_INVALID);
+ }
+
+#ifdef CAN_USE_GCM_ASM
+ /*
+ * Handle the "cycle" implementation by creating avx and non avx
+ * contexts alternately.
+ */
+ if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) {
+ gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX;
+ } else {
+ gcm_ctx->gcm_use_avx = gcm_toggle_avx();
+ }
+ /* We don't handle byte swapped key schedules in the avx code path. */
+ aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched;
+ if (ks->ops->needs_byteswap == B_TRUE) {
+ gcm_ctx->gcm_use_avx = B_FALSE;
+ }
+ /* Avx and non avx context initialization differs from here on. */
+ if (gcm_ctx->gcm_use_avx == B_FALSE) {
+#endif /* ifdef CAN_USE_GCM_ASM */
+ if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
+ gmac_param->pAAD, gmac_param->ulAADLen, block_size,
+ encrypt_block, copy_block, xor_block) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+#ifdef CAN_USE_GCM_ASM
+ } else {
+ if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN,
+ gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) {
+ rv = CRYPTO_MECHANISM_PARAM_INVALID;
+ }
+ }
+#endif /* ifdef CAN_USE_GCM_ASM */
+
+ return (rv);
+}
+
+void *
+gcm_alloc_ctx(int kmflag)
+{
+ gcm_ctx_t *gcm_ctx;
+
+ if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ gcm_ctx->gcm_flags = GCM_MODE;
+ return (gcm_ctx);
+}
+
+void *
+gmac_alloc_ctx(int kmflag)
+{
+ gcm_ctx_t *gcm_ctx;
+
+ if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL)
+ return (NULL);
+
+ gcm_ctx->gcm_flags = GMAC_MODE;
+ return (gcm_ctx);
+}
+
+void
+gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag)
+{
+ ctx->gcm_kmflag = kmflag;
+}
+
+/* GCM implementation that contains the fastest methods */
+static gcm_impl_ops_t gcm_fastest_impl = {
+ .name = "fastest"
+};
+
+/* All compiled in implementations */
+const gcm_impl_ops_t *gcm_all_impl[] = {
+ &gcm_generic_impl,
+#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
+ &gcm_pclmulqdq_impl,
+#endif
+};
+
+/* Indicate that benchmark has been completed */
+static boolean_t gcm_impl_initialized = B_FALSE;
+
+/* Hold all supported implementations */
+static size_t gcm_supp_impl_cnt = 0;
+static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)];
+
+/*
+ * Returns the GCM operations for encrypt/decrypt/key setup. When a
+ * SIMD implementation is not allowed in the current context, then
+ * fallback to the fastest generic implementation.
+ */
+const gcm_impl_ops_t *
+gcm_impl_get_ops()
+{
+ if (!kfpu_allowed())
+ return (&gcm_generic_impl);
+
+ const gcm_impl_ops_t *ops = NULL;
+ const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
+
+ switch (impl) {
+ case IMPL_FASTEST:
+ ASSERT(gcm_impl_initialized);
+ ops = &gcm_fastest_impl;
+ break;
+ case IMPL_CYCLE:
+ /* Cycle through supported implementations */
+ ASSERT(gcm_impl_initialized);
+ ASSERT3U(gcm_supp_impl_cnt, >, 0);
+ static size_t cycle_impl_idx = 0;
+ size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt;
+ ops = gcm_supp_impl[idx];
+ break;
+#ifdef CAN_USE_GCM_ASM
+ case IMPL_AVX:
+ /*
+ * Make sure that we return a valid implementation while
+ * switching to the avx implementation since there still
+ * may be unfinished non-avx contexts around.
+ */
+ ops = &gcm_generic_impl;
+ break;
+#endif
+ default:
+ ASSERT3U(impl, <, gcm_supp_impl_cnt);
+ ASSERT3U(gcm_supp_impl_cnt, >, 0);
+ if (impl < ARRAY_SIZE(gcm_all_impl))
+ ops = gcm_supp_impl[impl];
+ break;
+ }
+
+ ASSERT3P(ops, !=, NULL);
+
+ return (ops);
+}
+
+/*
+ * Initialize all supported implementations.
+ */
+void
+gcm_impl_init(void)
+{
+ gcm_impl_ops_t *curr_impl;
+ int i, c;
+
+ /* Move supported implementations into gcm_supp_impls */
+ for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) {
+ curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i];
+
+ if (curr_impl->is_supported())
+ gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl;
+ }
+ gcm_supp_impl_cnt = c;
+
+ /*
+ * Set the fastest implementation given the assumption that the
+ * hardware accelerated version is the fastest.
+ */
+#if defined(__x86_64) && defined(HAVE_PCLMULQDQ)
+ if (gcm_pclmulqdq_impl.is_supported()) {
+ memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl,
+ sizeof (gcm_fastest_impl));
+ } else
+#endif
+ {
+ memcpy(&gcm_fastest_impl, &gcm_generic_impl,
+ sizeof (gcm_fastest_impl));
+ }
+
+ strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX);
+
+#ifdef CAN_USE_GCM_ASM
+ /*
+ * Use the avx implementation if it's available and the implementation
+ * hasn't changed from its default value of fastest on module load.
+ */
+ if (gcm_avx_will_work()) {
+#ifdef HAVE_MOVBE
+ if (zfs_movbe_available() == B_TRUE) {
+ atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE);
+ }
+#endif
+ if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) {
+ gcm_set_avx(B_TRUE);
+ }
+ }
+#endif
+ /* Finish initialization */
+ atomic_swap_32(&icp_gcm_impl, user_sel_impl);
+ gcm_impl_initialized = B_TRUE;
+}
+
+static const struct {
+ char *name;
+ uint32_t sel;
+} gcm_impl_opts[] = {
+ { "cycle", IMPL_CYCLE },
+ { "fastest", IMPL_FASTEST },
+#ifdef CAN_USE_GCM_ASM
+ { "avx", IMPL_AVX },
+#endif
+};
+
+/*
+ * Function sets desired gcm implementation.
+ *
+ * If we are called before init(), user preference will be saved in
+ * user_sel_impl, and applied in later init() call. This occurs when module
+ * parameter is specified on module load. Otherwise, directly update
+ * icp_gcm_impl.
+ *
+ * @val Name of gcm implementation to use
+ * @param Unused.
+ */
+int
+gcm_impl_set(const char *val)
+{
+ int err = -EINVAL;
+ char req_name[GCM_IMPL_NAME_MAX];
+ uint32_t impl = GCM_IMPL_READ(user_sel_impl);
+ size_t i;
+
+ /* sanitize input */
+ i = strnlen(val, GCM_IMPL_NAME_MAX);
+ if (i == 0 || i >= GCM_IMPL_NAME_MAX)
+ return (err);
+
+ strlcpy(req_name, val, GCM_IMPL_NAME_MAX);
+ while (i > 0 && isspace(req_name[i-1]))
+ i--;
+ req_name[i] = '\0';
+
+ /* Check mandatory options */
+ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
+#ifdef CAN_USE_GCM_ASM
+ /* Ignore avx implementation if it won't work. */
+ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
+ continue;
+ }
+#endif
+ if (strcmp(req_name, gcm_impl_opts[i].name) == 0) {
+ impl = gcm_impl_opts[i].sel;
+ err = 0;
+ break;
+ }
+ }
+
+ /* check all supported impl if init() was already called */
+ if (err != 0 && gcm_impl_initialized) {
+ /* check all supported implementations */
+ for (i = 0; i < gcm_supp_impl_cnt; i++) {
+ if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) {
+ impl = i;
+ err = 0;
+ break;
+ }
+ }
+ }
+#ifdef CAN_USE_GCM_ASM
+ /*
+ * Use the avx implementation if available and the requested one is
+ * avx or fastest.
+ */
+ if (gcm_avx_will_work() == B_TRUE &&
+ (impl == IMPL_AVX || impl == IMPL_FASTEST)) {
+ gcm_set_avx(B_TRUE);
+ } else {
+ gcm_set_avx(B_FALSE);
+ }
+#endif
+
+ if (err == 0) {
+ if (gcm_impl_initialized)
+ atomic_swap_32(&icp_gcm_impl, impl);
+ else
+ atomic_swap_32(&user_sel_impl, impl);
+ }
+
+ return (err);
+}
+
+#if defined(_KERNEL) && defined(__linux__)
+
+static int
+icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp)
+{
+ return (gcm_impl_set(val));
+}
+
+static int
+icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+ int i, cnt = 0;
+ char *fmt;
+ const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl);
+
+ ASSERT(gcm_impl_initialized);
+
+ /* list mandatory options */
+ for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) {
+#ifdef CAN_USE_GCM_ASM
+ /* Ignore avx implementation if it won't work. */
+ if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) {
+ continue;
+ }
+#endif
+ fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name);
+ }
+
+ /* list all supported implementations */
+ for (i = 0; i < gcm_supp_impl_cnt; i++) {
+ fmt = (i == impl) ? "[%s] " : "%s ";
+ cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name);
+ }
+
+ return (cnt);
+}
+
+module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get,
+ NULL, 0644);
+MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation.");
+#endif /* defined(__KERNEL) */
+
+#ifdef CAN_USE_GCM_ASM
+#define GCM_BLOCK_LEN 16
+/*
+ * The openssl asm routines are 6x aggregated and need that many bytes
+ * at minimum.
+ */
+#define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6)
+#define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3)
+/*
+ * Ensure the chunk size is reasonable since we are allocating a
+ * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts.
+ */
+#define GCM_AVX_MAX_CHUNK_SIZE \
+ (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES)
+
+/* Get the chunk size module parameter. */
+#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size
+
+/* Clear the FPU registers since they hold sensitive internal state. */
+#define clear_fpu_regs() clear_fpu_regs_avx()
+#define GHASH_AVX(ctx, in, len) \
+ gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \
+ in, len)
+
+#define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1)
+
+/*
+ * Module parameter: number of bytes to process at once while owning the FPU.
+ * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is
+ * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES.
+ */
+static uint32_t gcm_avx_chunk_size =
+ ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
+
+extern void clear_fpu_regs_avx(void);
+extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst);
+extern void aes_encrypt_intel(const uint32_t rk[], int nr,
+ const uint32_t pt[4], uint32_t ct[4]);
+
+extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]);
+extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2],
+ const uint8_t *in, size_t len);
+
+extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t,
+ const void *, uint64_t *, uint64_t *);
+
+extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t,
+ const void *, uint64_t *, uint64_t *);
+
+static inline boolean_t
+gcm_avx_will_work(void)
+{
+ /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */
+ return (kfpu_allowed() &&
+ zfs_avx_available() && zfs_aes_available() &&
+ zfs_pclmulqdq_available());
+}
+
+static inline void
+gcm_set_avx(boolean_t val)
+{
+ if (gcm_avx_will_work() == B_TRUE) {
+ atomic_swap_32(&gcm_use_avx, val);
+ }
+}
+
+static inline boolean_t
+gcm_toggle_avx(void)
+{
+ if (gcm_avx_will_work() == B_TRUE) {
+ return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX));
+ } else {
+ return (B_FALSE);
+ }
+}
+
+/*
+ * Clear sensitive data in the context.
+ *
+ * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and
+ * ctx->gcm_Htable contain the hash sub key which protects authentication.
+ *
+ * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for
+ * a known plaintext attack, they consists of the IV and the first and last
+ * counter respectively. If they should be cleared is debatable.
+ */
+static inline void
+gcm_clear_ctx(gcm_ctx_t *ctx)
+{
+ bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder));
+ bzero(ctx->gcm_H, sizeof (ctx->gcm_H));
+ bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable));
+ bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0));
+ bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp));
+}
+
+/* Increment the GCM counter block by n. */
+static inline void
+gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n)
+{
+ uint64_t counter_mask = ntohll(0x00000000ffffffffULL);
+ uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask);
+
+ counter = htonll(counter + n);
+ counter &= counter_mask;
+ ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter;
+}
+
+/*
+ * Encrypt multiple blocks of data in GCM mode.
+ * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines
+ * if possible. While processing a chunk the FPU is "locked".
+ */
+static int
+gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data,
+ size_t length, crypto_data_t *out, size_t block_size)
+{
+ size_t bleft = length;
+ size_t need = 0;
+ size_t done = 0;
+ uint8_t *datap = (uint8_t *)data;
+ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
+ const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
+ uint64_t *ghash = ctx->gcm_ghash;
+ uint64_t *cb = ctx->gcm_cb;
+ uint8_t *ct_buf = NULL;
+ uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
+ int rv = CRYPTO_SUCCESS;
+
+ ASSERT(block_size == GCM_BLOCK_LEN);
+ /*
+ * If the last call left an incomplete block, try to fill
+ * it first.
+ */
+ if (ctx->gcm_remainder_len > 0) {
+ need = block_size - ctx->gcm_remainder_len;
+ if (length < need) {
+ /* Accumulate bytes here and return. */
+ bcopy(datap, (uint8_t *)ctx->gcm_remainder +
+ ctx->gcm_remainder_len, length);
+
+ ctx->gcm_remainder_len += length;
+ if (ctx->gcm_copy_to == NULL) {
+ ctx->gcm_copy_to = datap;
+ }
+ return (CRYPTO_SUCCESS);
+ } else {
+ /* Complete incomplete block. */
+ bcopy(datap, (uint8_t *)ctx->gcm_remainder +
+ ctx->gcm_remainder_len, need);
+
+ ctx->gcm_copy_to = NULL;
+ }
+ }
+
+ /* Allocate a buffer to encrypt to if there is enough input. */
+ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
+ ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag);
+ if (ct_buf == NULL) {
+ return (CRYPTO_HOST_MEMORY);
+ }
+ }
+
+ /* If we completed an incomplete block, encrypt and write it out. */
+ if (ctx->gcm_remainder_len > 0) {
+ kfpu_begin();
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr,
+ (const uint32_t *)cb, (uint32_t *)tmp);
+
+ gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp);
+ GHASH_AVX(ctx, tmp, block_size);
+ clear_fpu_regs();
+ kfpu_end();
+ rv = crypto_put_output_data(tmp, out, block_size);
+ out->cd_offset += block_size;
+ gcm_incr_counter_block(ctx);
+ ctx->gcm_processed_data_len += block_size;
+ bleft -= need;
+ datap += need;
+ ctx->gcm_remainder_len = 0;
+ }
+
+ /* Do the bulk encryption in chunk_size blocks. */
+ for (; bleft >= chunk_size; bleft -= chunk_size) {
+ kfpu_begin();
+ done = aesni_gcm_encrypt(
+ datap, ct_buf, chunk_size, key, cb, ghash);
+
+ clear_fpu_regs();
+ kfpu_end();
+ if (done != chunk_size) {
+ rv = CRYPTO_FAILED;
+ goto out_nofpu;
+ }
+ rv = crypto_put_output_data(ct_buf, out, chunk_size);
+ if (rv != CRYPTO_SUCCESS) {
+ goto out_nofpu;
+ }
+ out->cd_offset += chunk_size;
+ datap += chunk_size;
+ ctx->gcm_processed_data_len += chunk_size;
+ }
+ /* Check if we are already done. */
+ if (bleft == 0) {
+ goto out_nofpu;
+ }
+ /* Bulk encrypt the remaining data. */
+ kfpu_begin();
+ if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) {
+ done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash);
+ if (done == 0) {
+ rv = CRYPTO_FAILED;
+ goto out;
+ }
+ rv = crypto_put_output_data(ct_buf, out, done);
+ if (rv != CRYPTO_SUCCESS) {
+ goto out;
+ }
+ out->cd_offset += done;
+ ctx->gcm_processed_data_len += done;
+ datap += done;
+ bleft -= done;
+
+ }
+ /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */
+ while (bleft > 0) {
+ if (bleft < block_size) {
+ bcopy(datap, ctx->gcm_remainder, bleft);
+ ctx->gcm_remainder_len = bleft;
+ ctx->gcm_copy_to = datap;
+ goto out;
+ }
+ /* Encrypt, hash and write out. */
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr,
+ (const uint32_t *)cb, (uint32_t *)tmp);
+
+ gcm_xor_avx(datap, tmp);
+ GHASH_AVX(ctx, tmp, block_size);
+ rv = crypto_put_output_data(tmp, out, block_size);
+ if (rv != CRYPTO_SUCCESS) {
+ goto out;
+ }
+ out->cd_offset += block_size;
+ gcm_incr_counter_block(ctx);
+ ctx->gcm_processed_data_len += block_size;
+ datap += block_size;
+ bleft -= block_size;
+ }
+out:
+ clear_fpu_regs();
+ kfpu_end();
+out_nofpu:
+ if (ct_buf != NULL) {
+ vmem_free(ct_buf, chunk_size);
+ }
+ return (rv);
+}
+
+/*
+ * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual
+ * incomplete last block. Encrypt the ICB. Calculate the tag and write it out.
+ */
+static int
+gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
+{
+ uint8_t *ghash = (uint8_t *)ctx->gcm_ghash;
+ uint32_t *J0 = (uint32_t *)ctx->gcm_J0;
+ uint8_t *remainder = (uint8_t *)ctx->gcm_remainder;
+ size_t rem_len = ctx->gcm_remainder_len;
+ const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
+ int aes_rounds = ((aes_key_t *)keysched)->nr;
+ int rv;
+
+ ASSERT(block_size == GCM_BLOCK_LEN);
+
+ if (out->cd_length < (rem_len + ctx->gcm_tag_len)) {
+ return (CRYPTO_DATA_LEN_RANGE);
+ }
+
+ kfpu_begin();
+ /* Pad last incomplete block with zeros, encrypt and hash. */
+ if (rem_len > 0) {
+ uint8_t *tmp = (uint8_t *)ctx->gcm_tmp;
+ const uint32_t *cb = (uint32_t *)ctx->gcm_cb;
+
+ aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp);
+ bzero(remainder + rem_len, block_size - rem_len);
+ for (int i = 0; i < rem_len; i++) {
+ remainder[i] ^= tmp[i];
+ }
+ GHASH_AVX(ctx, remainder, block_size);
+ ctx->gcm_processed_data_len += rem_len;
+ /* No need to increment counter_block, it's the last block. */
+ }
+ /* Finish tag. */
+ ctx->gcm_len_a_len_c[1] =
+ htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len));
+ GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size);
+ aes_encrypt_intel(keysched, aes_rounds, J0, J0);
+
+ gcm_xor_avx((uint8_t *)J0, ghash);
+ clear_fpu_regs();
+ kfpu_end();
+
+ /* Output remainder. */
+ if (rem_len > 0) {
+ rv = crypto_put_output_data(remainder, out, rem_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+ }
+ out->cd_offset += rem_len;
+ ctx->gcm_remainder_len = 0;
+ rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len);
+ if (rv != CRYPTO_SUCCESS)
+ return (rv);
+
+ out->cd_offset += ctx->gcm_tag_len;
+ /* Clear sensitive data in the context before returning. */
+ gcm_clear_ctx(ctx);
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Finalize decryption: We just have accumulated crypto text, so now we
+ * decrypt it here inplace.
+ */
+static int
+gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size)
+{
+ ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len);
+ ASSERT3U(block_size, ==, 16);
+
+ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
+ size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len;
+ uint8_t *datap = ctx->gcm_pt_buf;
+ const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched);
+ uint32_t *cb = (uint32_t *)ctx->gcm_cb;
+ uint64_t *ghash = ctx->gcm_ghash;
+ uint32_t *tmp = (uint32_t *)ctx->gcm_tmp;
+ int rv = CRYPTO_SUCCESS;
+ size_t bleft, done;
+
+ /*
+ * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be
+ * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of
+ * GCM_AVX_MIN_DECRYPT_BYTES.
+ */
+ for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) {
+ kfpu_begin();
+ done = aesni_gcm_decrypt(datap, datap, chunk_size,
+ (const void *)key, ctx->gcm_cb, ghash);
+ clear_fpu_regs();
+ kfpu_end();
+ if (done != chunk_size) {
+ return (CRYPTO_FAILED);
+ }
+ datap += done;
+ }
+ /* Decrypt remainder, which is less then chunk size, in one go. */
+ kfpu_begin();
+ if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) {
+ done = aesni_gcm_decrypt(datap, datap, bleft,
+ (const void *)key, ctx->gcm_cb, ghash);
+ if (done == 0) {
+ clear_fpu_regs();
+ kfpu_end();
+ return (CRYPTO_FAILED);
+ }
+ datap += done;
+ bleft -= done;
+ }
+ ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES);
+
+ /*
+ * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain,
+ * decrypt them block by block.
+ */
+ while (bleft > 0) {
+ /* Incomplete last block. */
+ if (bleft < block_size) {
+ uint8_t *lastb = (uint8_t *)ctx->gcm_remainder;
+
+ bzero(lastb, block_size);
+ bcopy(datap, lastb, bleft);
+ /* The GCM processing. */
+ GHASH_AVX(ctx, lastb, block_size);
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
+ for (size_t i = 0; i < bleft; i++) {
+ datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i];
+ }
+ break;
+ }
+ /* The GCM processing. */
+ GHASH_AVX(ctx, datap, block_size);
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp);
+ gcm_xor_avx((uint8_t *)tmp, datap);
+ gcm_incr_counter_block(ctx);
+
+ datap += block_size;
+ bleft -= block_size;
+ }
+ if (rv != CRYPTO_SUCCESS) {
+ clear_fpu_regs();
+ kfpu_end();
+ return (rv);
+ }
+ /* Decryption done, finish the tag. */
+ ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len));
+ GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size);
+ aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0,
+ (uint32_t *)ctx->gcm_J0);
+
+ gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash);
+
+ /* We are done with the FPU, restore its state. */
+ clear_fpu_regs();
+ kfpu_end();
+
+ /* Compare the input authentication tag with what we calculated. */
+ if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) {
+ /* They don't match. */
+ return (CRYPTO_INVALID_MAC);
+ }
+ rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len);
+ if (rv != CRYPTO_SUCCESS) {
+ return (rv);
+ }
+ out->cd_offset += pt_len;
+ gcm_clear_ctx(ctx);
+ return (CRYPTO_SUCCESS);
+}
+
+/*
+ * Initialize the GCM params H, Htabtle and the counter block. Save the
+ * initial counter block.
+ */
+static int
+gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len,
+ unsigned char *auth_data, size_t auth_data_len, size_t block_size)
+{
+ uint8_t *cb = (uint8_t *)ctx->gcm_cb;
+ uint64_t *H = ctx->gcm_H;
+ const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32;
+ int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr;
+ uint8_t *datap = auth_data;
+ size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ;
+ size_t bleft;
+
+ ASSERT(block_size == GCM_BLOCK_LEN);
+
+ /* Init H (encrypt zero block) and create the initial counter block. */
+ bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash));
+ bzero(H, sizeof (ctx->gcm_H));
+ kfpu_begin();
+ aes_encrypt_intel(keysched, aes_rounds,
+ (const uint32_t *)H, (uint32_t *)H);
+
+ gcm_init_htab_avx(ctx->gcm_Htable, H);
+
+ if (iv_len == 12) {
+ bcopy(iv, cb, 12);
+ cb[12] = 0;
+ cb[13] = 0;
+ cb[14] = 0;
+ cb[15] = 1;
+ /* We need the ICB later. */
+ bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0));
+ } else {
+ /*
+ * Most consumers use 12 byte IVs, so it's OK to use the
+ * original routines for other IV sizes, just avoid nesting
+ * kfpu_begin calls.
+ */
+ clear_fpu_regs();
+ kfpu_end();
+ gcm_format_initial_blocks(iv, iv_len, ctx, block_size,
+ aes_copy_block, aes_xor_block);
+ kfpu_begin();
+ }
+
+ /* Openssl post increments the counter, adjust for that. */
+ gcm_incr_counter_block(ctx);
+
+ /* Ghash AAD in chunk_size blocks. */
+ for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) {
+ GHASH_AVX(ctx, datap, chunk_size);
+ datap += chunk_size;
+ clear_fpu_regs();
+ kfpu_end();
+ kfpu_begin();
+ }
+ /* Ghash the remainder and handle possible incomplete GCM block. */
+ if (bleft > 0) {
+ size_t incomp = bleft % block_size;
+
+ bleft -= incomp;
+ if (bleft > 0) {
+ GHASH_AVX(ctx, datap, bleft);
+ datap += bleft;
+ }
+ if (incomp > 0) {
+ /* Zero pad and hash incomplete last block. */
+ uint8_t *authp = (uint8_t *)ctx->gcm_tmp;
+
+ bzero(authp, block_size);
+ bcopy(datap, authp, incomp);
+ GHASH_AVX(ctx, authp, block_size);
+ }
+ }
+ clear_fpu_regs();
+ kfpu_end();
+ return (CRYPTO_SUCCESS);
+}
+
+#if defined(_KERNEL)
+static int
+icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp)
+{
+ unsigned long val;
+ char val_rounded[16];
+ int error = 0;
+
+ error = kstrtoul(buf, 0, &val);
+ if (error)
+ return (error);
+
+ val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES;
+
+ if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE)
+ return (-EINVAL);
+
+ snprintf(val_rounded, 16, "%u", (uint32_t)val);
+ error = param_set_uint(val_rounded, kp);
+ return (error);
+}
+
+module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size,
+ param_get_uint, &gcm_avx_chunk_size, 0644);
+
+MODULE_PARM_DESC(icp_gcm_avx_chunk_size,
+ "How many bytes to process while owning the FPU");
+
+#endif /* defined(__KERNEL) */
+#endif /* ifdef CAN_USE_GCM_ASM */