diff options
Diffstat (limited to 'sys/contrib/openzfs/module/icp/algs/modes/gcm.c')
-rw-r--r-- | sys/contrib/openzfs/module/icp/algs/modes/gcm.c | 1543 |
1 files changed, 1543 insertions, 0 deletions
diff --git a/sys/contrib/openzfs/module/icp/algs/modes/gcm.c b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c new file mode 100644 index 000000000000..5553c55e11cd --- /dev/null +++ b/sys/contrib/openzfs/module/icp/algs/modes/gcm.c @@ -0,0 +1,1543 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ +/* + * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. + */ + +#include <sys/zfs_context.h> +#include <modes/modes.h> +#include <sys/crypto/common.h> +#include <sys/crypto/icp.h> +#include <sys/crypto/impl.h> +#include <sys/byteorder.h> +#include <sys/simd.h> +#include <modes/gcm_impl.h> +#ifdef CAN_USE_GCM_ASM +#include <aes/aes_impl.h> +#endif + +#define GHASH(c, d, t, o) \ + xor_block((uint8_t *)(d), (uint8_t *)(c)->gcm_ghash); \ + (o)->mul((uint64_t *)(void *)(c)->gcm_ghash, (c)->gcm_H, \ + (uint64_t *)(void *)(t)); + +/* Select GCM implementation */ +#define IMPL_FASTEST (UINT32_MAX) +#define IMPL_CYCLE (UINT32_MAX-1) +#ifdef CAN_USE_GCM_ASM +#define IMPL_AVX (UINT32_MAX-2) +#endif +#define GCM_IMPL_READ(i) (*(volatile uint32_t *) &(i)) +static uint32_t icp_gcm_impl = IMPL_FASTEST; +static uint32_t user_sel_impl = IMPL_FASTEST; + +#ifdef CAN_USE_GCM_ASM +/* Does the architecture we run on support the MOVBE instruction? */ +boolean_t gcm_avx_can_use_movbe = B_FALSE; +/* + * Whether to use the optimized openssl gcm and ghash implementations. + * Set to true if module parameter icp_gcm_impl == "avx". + */ +static boolean_t gcm_use_avx = B_FALSE; +#define GCM_IMPL_USE_AVX (*(volatile boolean_t *)&gcm_use_avx) + +static inline boolean_t gcm_avx_will_work(void); +static inline void gcm_set_avx(boolean_t); +static inline boolean_t gcm_toggle_avx(void); +extern boolean_t atomic_toggle_boolean_nv(volatile boolean_t *); + +static int gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *, char *, size_t, + crypto_data_t *, size_t); + +static int gcm_encrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); +static int gcm_decrypt_final_avx(gcm_ctx_t *, crypto_data_t *, size_t); +static int gcm_init_avx(gcm_ctx_t *, unsigned char *, size_t, unsigned char *, + size_t, size_t); +#endif /* ifdef CAN_USE_GCM_ASM */ + +/* + * Encrypt multiple blocks of data in GCM mode. Decrypt for GCM mode + * is done in another function. + */ +int +gcm_mode_encrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, + crypto_data_t *out, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ +#ifdef CAN_USE_GCM_ASM + if (ctx->gcm_use_avx == B_TRUE) + return (gcm_mode_encrypt_contiguous_blocks_avx( + ctx, data, length, out, block_size)); +#endif + + const gcm_impl_ops_t *gops; + size_t remainder = length; + size_t need = 0; + uint8_t *datap = (uint8_t *)data; + uint8_t *blockp; + uint8_t *lastp; + void *iov_or_mp; + offset_t offset; + uint8_t *out_data_1; + uint8_t *out_data_2; + size_t out_data_1_len; + uint64_t counter; + uint64_t counter_mask = ntohll(0x00000000ffffffffULL); + + if (length + ctx->gcm_remainder_len < block_size) { + /* accumulate bytes here and return */ + bcopy(datap, + (uint8_t *)ctx->gcm_remainder + ctx->gcm_remainder_len, + length); + ctx->gcm_remainder_len += length; + if (ctx->gcm_copy_to == NULL) { + ctx->gcm_copy_to = datap; + } + return (CRYPTO_SUCCESS); + } + + lastp = (uint8_t *)ctx->gcm_cb; + crypto_init_ptrs(out, &iov_or_mp, &offset); + + gops = gcm_impl_get_ops(); + do { + /* Unprocessed data from last call. */ + if (ctx->gcm_remainder_len > 0) { + need = block_size - ctx->gcm_remainder_len; + + if (need > remainder) + return (CRYPTO_DATA_LEN_RANGE); + + bcopy(datap, &((uint8_t *)ctx->gcm_remainder) + [ctx->gcm_remainder_len], need); + + blockp = (uint8_t *)ctx->gcm_remainder; + } else { + blockp = datap; + } + + /* + * Increment counter. Counter bits are confined + * to the bottom 32 bits of the counter block. + */ + counter = ntohll(ctx->gcm_cb[1] & counter_mask); + counter = htonll(counter + 1); + counter &= counter_mask; + ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; + + encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, + (uint8_t *)ctx->gcm_tmp); + xor_block(blockp, (uint8_t *)ctx->gcm_tmp); + + lastp = (uint8_t *)ctx->gcm_tmp; + + ctx->gcm_processed_data_len += block_size; + + crypto_get_ptrs(out, &iov_or_mp, &offset, &out_data_1, + &out_data_1_len, &out_data_2, block_size); + + /* copy block to where it belongs */ + if (out_data_1_len == block_size) { + copy_block(lastp, out_data_1); + } else { + bcopy(lastp, out_data_1, out_data_1_len); + if (out_data_2 != NULL) { + bcopy(lastp + out_data_1_len, + out_data_2, + block_size - out_data_1_len); + } + } + /* update offset */ + out->cd_offset += block_size; + + /* add ciphertext to the hash */ + GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gops); + + /* Update pointer to next block of data to be processed. */ + if (ctx->gcm_remainder_len != 0) { + datap += need; + ctx->gcm_remainder_len = 0; + } else { + datap += block_size; + } + + remainder = (size_t)&data[length] - (size_t)datap; + + /* Incomplete last block. */ + if (remainder > 0 && remainder < block_size) { + bcopy(datap, ctx->gcm_remainder, remainder); + ctx->gcm_remainder_len = remainder; + ctx->gcm_copy_to = datap; + goto out; + } + ctx->gcm_copy_to = NULL; + + } while (remainder > 0); +out: + return (CRYPTO_SUCCESS); +} + +/* ARGSUSED */ +int +gcm_encrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ +#ifdef CAN_USE_GCM_ASM + if (ctx->gcm_use_avx == B_TRUE) + return (gcm_encrypt_final_avx(ctx, out, block_size)); +#endif + + const gcm_impl_ops_t *gops; + uint64_t counter_mask = ntohll(0x00000000ffffffffULL); + uint8_t *ghash, *macp = NULL; + int i, rv; + + if (out->cd_length < + (ctx->gcm_remainder_len + ctx->gcm_tag_len)) { + return (CRYPTO_DATA_LEN_RANGE); + } + + gops = gcm_impl_get_ops(); + ghash = (uint8_t *)ctx->gcm_ghash; + + if (ctx->gcm_remainder_len > 0) { + uint64_t counter; + uint8_t *tmpp = (uint8_t *)ctx->gcm_tmp; + + /* + * Here is where we deal with data that is not a + * multiple of the block size. + */ + + /* + * Increment counter. + */ + counter = ntohll(ctx->gcm_cb[1] & counter_mask); + counter = htonll(counter + 1); + counter &= counter_mask; + ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; + + encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, + (uint8_t *)ctx->gcm_tmp); + + macp = (uint8_t *)ctx->gcm_remainder; + bzero(macp + ctx->gcm_remainder_len, + block_size - ctx->gcm_remainder_len); + + /* XOR with counter block */ + for (i = 0; i < ctx->gcm_remainder_len; i++) { + macp[i] ^= tmpp[i]; + } + + /* add ciphertext to the hash */ + GHASH(ctx, macp, ghash, gops); + + ctx->gcm_processed_data_len += ctx->gcm_remainder_len; + } + + ctx->gcm_len_a_len_c[1] = + htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); + GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); + encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, + (uint8_t *)ctx->gcm_J0); + xor_block((uint8_t *)ctx->gcm_J0, ghash); + + if (ctx->gcm_remainder_len > 0) { + rv = crypto_put_output_data(macp, out, ctx->gcm_remainder_len); + if (rv != CRYPTO_SUCCESS) + return (rv); + } + out->cd_offset += ctx->gcm_remainder_len; + ctx->gcm_remainder_len = 0; + rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); + if (rv != CRYPTO_SUCCESS) + return (rv); + out->cd_offset += ctx->gcm_tag_len; + + return (CRYPTO_SUCCESS); +} + +/* + * This will only deal with decrypting the last block of the input that + * might not be a multiple of block length. + */ +static void +gcm_decrypt_incomplete_block(gcm_ctx_t *ctx, size_t block_size, size_t index, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ + uint8_t *datap, *outp, *counterp; + uint64_t counter; + uint64_t counter_mask = ntohll(0x00000000ffffffffULL); + int i; + + /* + * Increment counter. + * Counter bits are confined to the bottom 32 bits + */ + counter = ntohll(ctx->gcm_cb[1] & counter_mask); + counter = htonll(counter + 1); + counter &= counter_mask; + ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; + + datap = (uint8_t *)ctx->gcm_remainder; + outp = &((ctx->gcm_pt_buf)[index]); + counterp = (uint8_t *)ctx->gcm_tmp; + + /* authentication tag */ + bzero((uint8_t *)ctx->gcm_tmp, block_size); + bcopy(datap, (uint8_t *)ctx->gcm_tmp, ctx->gcm_remainder_len); + + /* add ciphertext to the hash */ + GHASH(ctx, ctx->gcm_tmp, ctx->gcm_ghash, gcm_impl_get_ops()); + + /* decrypt remaining ciphertext */ + encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, counterp); + + /* XOR with counter block */ + for (i = 0; i < ctx->gcm_remainder_len; i++) { + outp[i] = datap[i] ^ counterp[i]; + } +} + +/* ARGSUSED */ +int +gcm_mode_decrypt_contiguous_blocks(gcm_ctx_t *ctx, char *data, size_t length, + crypto_data_t *out, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ + size_t new_len; + uint8_t *new; + + /* + * Copy contiguous ciphertext input blocks to plaintext buffer. + * Ciphertext will be decrypted in the final. + */ + if (length > 0) { + new_len = ctx->gcm_pt_buf_len + length; + new = vmem_alloc(new_len, ctx->gcm_kmflag); + if (new == NULL) { + vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); + ctx->gcm_pt_buf = NULL; + return (CRYPTO_HOST_MEMORY); + } + bcopy(ctx->gcm_pt_buf, new, ctx->gcm_pt_buf_len); + vmem_free(ctx->gcm_pt_buf, ctx->gcm_pt_buf_len); + ctx->gcm_pt_buf = new; + ctx->gcm_pt_buf_len = new_len; + bcopy(data, &ctx->gcm_pt_buf[ctx->gcm_processed_data_len], + length); + ctx->gcm_processed_data_len += length; + } + + ctx->gcm_remainder_len = 0; + return (CRYPTO_SUCCESS); +} + +int +gcm_decrypt_final(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ +#ifdef CAN_USE_GCM_ASM + if (ctx->gcm_use_avx == B_TRUE) + return (gcm_decrypt_final_avx(ctx, out, block_size)); +#endif + + const gcm_impl_ops_t *gops; + size_t pt_len; + size_t remainder; + uint8_t *ghash; + uint8_t *blockp; + uint8_t *cbp; + uint64_t counter; + uint64_t counter_mask = ntohll(0x00000000ffffffffULL); + int processed = 0, rv; + + ASSERT(ctx->gcm_processed_data_len == ctx->gcm_pt_buf_len); + + gops = gcm_impl_get_ops(); + pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; + ghash = (uint8_t *)ctx->gcm_ghash; + blockp = ctx->gcm_pt_buf; + remainder = pt_len; + while (remainder > 0) { + /* Incomplete last block */ + if (remainder < block_size) { + bcopy(blockp, ctx->gcm_remainder, remainder); + ctx->gcm_remainder_len = remainder; + /* + * not expecting anymore ciphertext, just + * compute plaintext for the remaining input + */ + gcm_decrypt_incomplete_block(ctx, block_size, + processed, encrypt_block, xor_block); + ctx->gcm_remainder_len = 0; + goto out; + } + /* add ciphertext to the hash */ + GHASH(ctx, blockp, ghash, gops); + + /* + * Increment counter. + * Counter bits are confined to the bottom 32 bits + */ + counter = ntohll(ctx->gcm_cb[1] & counter_mask); + counter = htonll(counter + 1); + counter &= counter_mask; + ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; + + cbp = (uint8_t *)ctx->gcm_tmp; + encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_cb, cbp); + + /* XOR with ciphertext */ + xor_block(cbp, blockp); + + processed += block_size; + blockp += block_size; + remainder -= block_size; + } +out: + ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); + GHASH(ctx, ctx->gcm_len_a_len_c, ghash, gops); + encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_J0, + (uint8_t *)ctx->gcm_J0); + xor_block((uint8_t *)ctx->gcm_J0, ghash); + + /* compare the input authentication tag with what we calculated */ + if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { + /* They don't match */ + return (CRYPTO_INVALID_MAC); + } else { + rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); + if (rv != CRYPTO_SUCCESS) + return (rv); + out->cd_offset += pt_len; + } + return (CRYPTO_SUCCESS); +} + +static int +gcm_validate_args(CK_AES_GCM_PARAMS *gcm_param) +{ + size_t tag_len; + + /* + * Check the length of the authentication tag (in bits). + */ + tag_len = gcm_param->ulTagBits; + switch (tag_len) { + case 32: + case 64: + case 96: + case 104: + case 112: + case 120: + case 128: + break; + default: + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + + if (gcm_param->ulIvLen == 0) + return (CRYPTO_MECHANISM_PARAM_INVALID); + + return (CRYPTO_SUCCESS); +} + +static void +gcm_format_initial_blocks(uchar_t *iv, ulong_t iv_len, + gcm_ctx_t *ctx, size_t block_size, + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ + const gcm_impl_ops_t *gops; + uint8_t *cb; + ulong_t remainder = iv_len; + ulong_t processed = 0; + uint8_t *datap, *ghash; + uint64_t len_a_len_c[2]; + + gops = gcm_impl_get_ops(); + ghash = (uint8_t *)ctx->gcm_ghash; + cb = (uint8_t *)ctx->gcm_cb; + if (iv_len == 12) { + bcopy(iv, cb, 12); + cb[12] = 0; + cb[13] = 0; + cb[14] = 0; + cb[15] = 1; + /* J0 will be used again in the final */ + copy_block(cb, (uint8_t *)ctx->gcm_J0); + } else { + /* GHASH the IV */ + do { + if (remainder < block_size) { + bzero(cb, block_size); + bcopy(&(iv[processed]), cb, remainder); + datap = (uint8_t *)cb; + remainder = 0; + } else { + datap = (uint8_t *)(&(iv[processed])); + processed += block_size; + remainder -= block_size; + } + GHASH(ctx, datap, ghash, gops); + } while (remainder > 0); + + len_a_len_c[0] = 0; + len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(iv_len)); + GHASH(ctx, len_a_len_c, ctx->gcm_J0, gops); + + /* J0 will be used again in the final */ + copy_block((uint8_t *)ctx->gcm_J0, (uint8_t *)cb); + } +} + +static int +gcm_init(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, + unsigned char *auth_data, size_t auth_data_len, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ + const gcm_impl_ops_t *gops; + uint8_t *ghash, *datap, *authp; + size_t remainder, processed; + + /* encrypt zero block to get subkey H */ + bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); + encrypt_block(ctx->gcm_keysched, (uint8_t *)ctx->gcm_H, + (uint8_t *)ctx->gcm_H); + + gcm_format_initial_blocks(iv, iv_len, ctx, block_size, + copy_block, xor_block); + + gops = gcm_impl_get_ops(); + authp = (uint8_t *)ctx->gcm_tmp; + ghash = (uint8_t *)ctx->gcm_ghash; + bzero(authp, block_size); + bzero(ghash, block_size); + + processed = 0; + remainder = auth_data_len; + do { + if (remainder < block_size) { + /* + * There's not a block full of data, pad rest of + * buffer with zero + */ + bzero(authp, block_size); + bcopy(&(auth_data[processed]), authp, remainder); + datap = (uint8_t *)authp; + remainder = 0; + } else { + datap = (uint8_t *)(&(auth_data[processed])); + processed += block_size; + remainder -= block_size; + } + + /* add auth data to the hash */ + GHASH(ctx, datap, ghash, gops); + + } while (remainder > 0); + + return (CRYPTO_SUCCESS); +} + +/* + * The following function is called at encrypt or decrypt init time + * for AES GCM mode. + * + * Init the GCM context struct. Handle the cycle and avx implementations here. + */ +int +gcm_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ + int rv; + CK_AES_GCM_PARAMS *gcm_param; + + if (param != NULL) { + gcm_param = (CK_AES_GCM_PARAMS *)(void *)param; + + if ((rv = gcm_validate_args(gcm_param)) != 0) { + return (rv); + } + + gcm_ctx->gcm_tag_len = gcm_param->ulTagBits; + gcm_ctx->gcm_tag_len >>= 3; + gcm_ctx->gcm_processed_data_len = 0; + + /* these values are in bits */ + gcm_ctx->gcm_len_a_len_c[0] + = htonll(CRYPTO_BYTES2BITS(gcm_param->ulAADLen)); + + rv = CRYPTO_SUCCESS; + gcm_ctx->gcm_flags |= GCM_MODE; + } else { + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + +#ifdef CAN_USE_GCM_ASM + if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { + gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; + } else { + /* + * Handle the "cycle" implementation by creating avx and + * non-avx contexts alternately. + */ + gcm_ctx->gcm_use_avx = gcm_toggle_avx(); + /* + * We don't handle byte swapped key schedules in the avx + * code path. + */ + aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; + if (ks->ops->needs_byteswap == B_TRUE) { + gcm_ctx->gcm_use_avx = B_FALSE; + } + /* Use the MOVBE and the BSWAP variants alternately. */ + if (gcm_ctx->gcm_use_avx == B_TRUE && + zfs_movbe_available() == B_TRUE) { + (void) atomic_toggle_boolean_nv( + (volatile boolean_t *)&gcm_avx_can_use_movbe); + } + } + /* Avx and non avx context initialization differs from here on. */ + if (gcm_ctx->gcm_use_avx == B_FALSE) { +#endif /* ifdef CAN_USE_GCM_ASM */ + if (gcm_init(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, + gcm_param->pAAD, gcm_param->ulAADLen, block_size, + encrypt_block, copy_block, xor_block) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } +#ifdef CAN_USE_GCM_ASM + } else { + if (gcm_init_avx(gcm_ctx, gcm_param->pIv, gcm_param->ulIvLen, + gcm_param->pAAD, gcm_param->ulAADLen, block_size) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } + } +#endif /* ifdef CAN_USE_GCM_ASM */ + + return (rv); +} + +int +gmac_init_ctx(gcm_ctx_t *gcm_ctx, char *param, size_t block_size, + int (*encrypt_block)(const void *, const uint8_t *, uint8_t *), + void (*copy_block)(uint8_t *, uint8_t *), + void (*xor_block)(uint8_t *, uint8_t *)) +{ + int rv; + CK_AES_GMAC_PARAMS *gmac_param; + + if (param != NULL) { + gmac_param = (CK_AES_GMAC_PARAMS *)(void *)param; + + gcm_ctx->gcm_tag_len = CRYPTO_BITS2BYTES(AES_GMAC_TAG_BITS); + gcm_ctx->gcm_processed_data_len = 0; + + /* these values are in bits */ + gcm_ctx->gcm_len_a_len_c[0] + = htonll(CRYPTO_BYTES2BITS(gmac_param->ulAADLen)); + + rv = CRYPTO_SUCCESS; + gcm_ctx->gcm_flags |= GMAC_MODE; + } else { + return (CRYPTO_MECHANISM_PARAM_INVALID); + } + +#ifdef CAN_USE_GCM_ASM + /* + * Handle the "cycle" implementation by creating avx and non avx + * contexts alternately. + */ + if (GCM_IMPL_READ(icp_gcm_impl) != IMPL_CYCLE) { + gcm_ctx->gcm_use_avx = GCM_IMPL_USE_AVX; + } else { + gcm_ctx->gcm_use_avx = gcm_toggle_avx(); + } + /* We don't handle byte swapped key schedules in the avx code path. */ + aes_key_t *ks = (aes_key_t *)gcm_ctx->gcm_keysched; + if (ks->ops->needs_byteswap == B_TRUE) { + gcm_ctx->gcm_use_avx = B_FALSE; + } + /* Avx and non avx context initialization differs from here on. */ + if (gcm_ctx->gcm_use_avx == B_FALSE) { +#endif /* ifdef CAN_USE_GCM_ASM */ + if (gcm_init(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, + gmac_param->pAAD, gmac_param->ulAADLen, block_size, + encrypt_block, copy_block, xor_block) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } +#ifdef CAN_USE_GCM_ASM + } else { + if (gcm_init_avx(gcm_ctx, gmac_param->pIv, AES_GMAC_IV_LEN, + gmac_param->pAAD, gmac_param->ulAADLen, block_size) != 0) { + rv = CRYPTO_MECHANISM_PARAM_INVALID; + } + } +#endif /* ifdef CAN_USE_GCM_ASM */ + + return (rv); +} + +void * +gcm_alloc_ctx(int kmflag) +{ + gcm_ctx_t *gcm_ctx; + + if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) + return (NULL); + + gcm_ctx->gcm_flags = GCM_MODE; + return (gcm_ctx); +} + +void * +gmac_alloc_ctx(int kmflag) +{ + gcm_ctx_t *gcm_ctx; + + if ((gcm_ctx = kmem_zalloc(sizeof (gcm_ctx_t), kmflag)) == NULL) + return (NULL); + + gcm_ctx->gcm_flags = GMAC_MODE; + return (gcm_ctx); +} + +void +gcm_set_kmflag(gcm_ctx_t *ctx, int kmflag) +{ + ctx->gcm_kmflag = kmflag; +} + +/* GCM implementation that contains the fastest methods */ +static gcm_impl_ops_t gcm_fastest_impl = { + .name = "fastest" +}; + +/* All compiled in implementations */ +const gcm_impl_ops_t *gcm_all_impl[] = { + &gcm_generic_impl, +#if defined(__x86_64) && defined(HAVE_PCLMULQDQ) + &gcm_pclmulqdq_impl, +#endif +}; + +/* Indicate that benchmark has been completed */ +static boolean_t gcm_impl_initialized = B_FALSE; + +/* Hold all supported implementations */ +static size_t gcm_supp_impl_cnt = 0; +static gcm_impl_ops_t *gcm_supp_impl[ARRAY_SIZE(gcm_all_impl)]; + +/* + * Returns the GCM operations for encrypt/decrypt/key setup. When a + * SIMD implementation is not allowed in the current context, then + * fallback to the fastest generic implementation. + */ +const gcm_impl_ops_t * +gcm_impl_get_ops() +{ + if (!kfpu_allowed()) + return (&gcm_generic_impl); + + const gcm_impl_ops_t *ops = NULL; + const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); + + switch (impl) { + case IMPL_FASTEST: + ASSERT(gcm_impl_initialized); + ops = &gcm_fastest_impl; + break; + case IMPL_CYCLE: + /* Cycle through supported implementations */ + ASSERT(gcm_impl_initialized); + ASSERT3U(gcm_supp_impl_cnt, >, 0); + static size_t cycle_impl_idx = 0; + size_t idx = (++cycle_impl_idx) % gcm_supp_impl_cnt; + ops = gcm_supp_impl[idx]; + break; +#ifdef CAN_USE_GCM_ASM + case IMPL_AVX: + /* + * Make sure that we return a valid implementation while + * switching to the avx implementation since there still + * may be unfinished non-avx contexts around. + */ + ops = &gcm_generic_impl; + break; +#endif + default: + ASSERT3U(impl, <, gcm_supp_impl_cnt); + ASSERT3U(gcm_supp_impl_cnt, >, 0); + if (impl < ARRAY_SIZE(gcm_all_impl)) + ops = gcm_supp_impl[impl]; + break; + } + + ASSERT3P(ops, !=, NULL); + + return (ops); +} + +/* + * Initialize all supported implementations. + */ +void +gcm_impl_init(void) +{ + gcm_impl_ops_t *curr_impl; + int i, c; + + /* Move supported implementations into gcm_supp_impls */ + for (i = 0, c = 0; i < ARRAY_SIZE(gcm_all_impl); i++) { + curr_impl = (gcm_impl_ops_t *)gcm_all_impl[i]; + + if (curr_impl->is_supported()) + gcm_supp_impl[c++] = (gcm_impl_ops_t *)curr_impl; + } + gcm_supp_impl_cnt = c; + + /* + * Set the fastest implementation given the assumption that the + * hardware accelerated version is the fastest. + */ +#if defined(__x86_64) && defined(HAVE_PCLMULQDQ) + if (gcm_pclmulqdq_impl.is_supported()) { + memcpy(&gcm_fastest_impl, &gcm_pclmulqdq_impl, + sizeof (gcm_fastest_impl)); + } else +#endif + { + memcpy(&gcm_fastest_impl, &gcm_generic_impl, + sizeof (gcm_fastest_impl)); + } + + strlcpy(gcm_fastest_impl.name, "fastest", GCM_IMPL_NAME_MAX); + +#ifdef CAN_USE_GCM_ASM + /* + * Use the avx implementation if it's available and the implementation + * hasn't changed from its default value of fastest on module load. + */ + if (gcm_avx_will_work()) { +#ifdef HAVE_MOVBE + if (zfs_movbe_available() == B_TRUE) { + atomic_swap_32(&gcm_avx_can_use_movbe, B_TRUE); + } +#endif + if (GCM_IMPL_READ(user_sel_impl) == IMPL_FASTEST) { + gcm_set_avx(B_TRUE); + } + } +#endif + /* Finish initialization */ + atomic_swap_32(&icp_gcm_impl, user_sel_impl); + gcm_impl_initialized = B_TRUE; +} + +static const struct { + char *name; + uint32_t sel; +} gcm_impl_opts[] = { + { "cycle", IMPL_CYCLE }, + { "fastest", IMPL_FASTEST }, +#ifdef CAN_USE_GCM_ASM + { "avx", IMPL_AVX }, +#endif +}; + +/* + * Function sets desired gcm implementation. + * + * If we are called before init(), user preference will be saved in + * user_sel_impl, and applied in later init() call. This occurs when module + * parameter is specified on module load. Otherwise, directly update + * icp_gcm_impl. + * + * @val Name of gcm implementation to use + * @param Unused. + */ +int +gcm_impl_set(const char *val) +{ + int err = -EINVAL; + char req_name[GCM_IMPL_NAME_MAX]; + uint32_t impl = GCM_IMPL_READ(user_sel_impl); + size_t i; + + /* sanitize input */ + i = strnlen(val, GCM_IMPL_NAME_MAX); + if (i == 0 || i >= GCM_IMPL_NAME_MAX) + return (err); + + strlcpy(req_name, val, GCM_IMPL_NAME_MAX); + while (i > 0 && isspace(req_name[i-1])) + i--; + req_name[i] = '\0'; + + /* Check mandatory options */ + for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { +#ifdef CAN_USE_GCM_ASM + /* Ignore avx implementation if it won't work. */ + if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { + continue; + } +#endif + if (strcmp(req_name, gcm_impl_opts[i].name) == 0) { + impl = gcm_impl_opts[i].sel; + err = 0; + break; + } + } + + /* check all supported impl if init() was already called */ + if (err != 0 && gcm_impl_initialized) { + /* check all supported implementations */ + for (i = 0; i < gcm_supp_impl_cnt; i++) { + if (strcmp(req_name, gcm_supp_impl[i]->name) == 0) { + impl = i; + err = 0; + break; + } + } + } +#ifdef CAN_USE_GCM_ASM + /* + * Use the avx implementation if available and the requested one is + * avx or fastest. + */ + if (gcm_avx_will_work() == B_TRUE && + (impl == IMPL_AVX || impl == IMPL_FASTEST)) { + gcm_set_avx(B_TRUE); + } else { + gcm_set_avx(B_FALSE); + } +#endif + + if (err == 0) { + if (gcm_impl_initialized) + atomic_swap_32(&icp_gcm_impl, impl); + else + atomic_swap_32(&user_sel_impl, impl); + } + + return (err); +} + +#if defined(_KERNEL) && defined(__linux__) + +static int +icp_gcm_impl_set(const char *val, zfs_kernel_param_t *kp) +{ + return (gcm_impl_set(val)); +} + +static int +icp_gcm_impl_get(char *buffer, zfs_kernel_param_t *kp) +{ + int i, cnt = 0; + char *fmt; + const uint32_t impl = GCM_IMPL_READ(icp_gcm_impl); + + ASSERT(gcm_impl_initialized); + + /* list mandatory options */ + for (i = 0; i < ARRAY_SIZE(gcm_impl_opts); i++) { +#ifdef CAN_USE_GCM_ASM + /* Ignore avx implementation if it won't work. */ + if (gcm_impl_opts[i].sel == IMPL_AVX && !gcm_avx_will_work()) { + continue; + } +#endif + fmt = (impl == gcm_impl_opts[i].sel) ? "[%s] " : "%s "; + cnt += sprintf(buffer + cnt, fmt, gcm_impl_opts[i].name); + } + + /* list all supported implementations */ + for (i = 0; i < gcm_supp_impl_cnt; i++) { + fmt = (i == impl) ? "[%s] " : "%s "; + cnt += sprintf(buffer + cnt, fmt, gcm_supp_impl[i]->name); + } + + return (cnt); +} + +module_param_call(icp_gcm_impl, icp_gcm_impl_set, icp_gcm_impl_get, + NULL, 0644); +MODULE_PARM_DESC(icp_gcm_impl, "Select gcm implementation."); +#endif /* defined(__KERNEL) */ + +#ifdef CAN_USE_GCM_ASM +#define GCM_BLOCK_LEN 16 +/* + * The openssl asm routines are 6x aggregated and need that many bytes + * at minimum. + */ +#define GCM_AVX_MIN_DECRYPT_BYTES (GCM_BLOCK_LEN * 6) +#define GCM_AVX_MIN_ENCRYPT_BYTES (GCM_BLOCK_LEN * 6 * 3) +/* + * Ensure the chunk size is reasonable since we are allocating a + * GCM_AVX_MAX_CHUNK_SIZEd buffer and disabling preemption and interrupts. + */ +#define GCM_AVX_MAX_CHUNK_SIZE \ + (((128*1024)/GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES) + +/* Get the chunk size module parameter. */ +#define GCM_CHUNK_SIZE_READ *(volatile uint32_t *) &gcm_avx_chunk_size + +/* Clear the FPU registers since they hold sensitive internal state. */ +#define clear_fpu_regs() clear_fpu_regs_avx() +#define GHASH_AVX(ctx, in, len) \ + gcm_ghash_avx((ctx)->gcm_ghash, (const uint64_t (*)[2])(ctx)->gcm_Htable, \ + in, len) + +#define gcm_incr_counter_block(ctx) gcm_incr_counter_block_by(ctx, 1) + +/* + * Module parameter: number of bytes to process at once while owning the FPU. + * Rounded down to the next GCM_AVX_MIN_DECRYPT_BYTES byte boundary and is + * ensured to be greater or equal than GCM_AVX_MIN_DECRYPT_BYTES. + */ +static uint32_t gcm_avx_chunk_size = + ((32 * 1024) / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; + +extern void clear_fpu_regs_avx(void); +extern void gcm_xor_avx(const uint8_t *src, uint8_t *dst); +extern void aes_encrypt_intel(const uint32_t rk[], int nr, + const uint32_t pt[4], uint32_t ct[4]); + +extern void gcm_init_htab_avx(uint64_t Htable[16][2], const uint64_t H[2]); +extern void gcm_ghash_avx(uint64_t ghash[2], const uint64_t Htable[16][2], + const uint8_t *in, size_t len); + +extern size_t aesni_gcm_encrypt(const uint8_t *, uint8_t *, size_t, + const void *, uint64_t *, uint64_t *); + +extern size_t aesni_gcm_decrypt(const uint8_t *, uint8_t *, size_t, + const void *, uint64_t *, uint64_t *); + +static inline boolean_t +gcm_avx_will_work(void) +{ + /* Avx should imply aes-ni and pclmulqdq, but make sure anyhow. */ + return (kfpu_allowed() && + zfs_avx_available() && zfs_aes_available() && + zfs_pclmulqdq_available()); +} + +static inline void +gcm_set_avx(boolean_t val) +{ + if (gcm_avx_will_work() == B_TRUE) { + atomic_swap_32(&gcm_use_avx, val); + } +} + +static inline boolean_t +gcm_toggle_avx(void) +{ + if (gcm_avx_will_work() == B_TRUE) { + return (atomic_toggle_boolean_nv(&GCM_IMPL_USE_AVX)); + } else { + return (B_FALSE); + } +} + +/* + * Clear sensitive data in the context. + * + * ctx->gcm_remainder may contain a plaintext remainder. ctx->gcm_H and + * ctx->gcm_Htable contain the hash sub key which protects authentication. + * + * Although extremely unlikely, ctx->gcm_J0 and ctx->gcm_tmp could be used for + * a known plaintext attack, they consists of the IV and the first and last + * counter respectively. If they should be cleared is debatable. + */ +static inline void +gcm_clear_ctx(gcm_ctx_t *ctx) +{ + bzero(ctx->gcm_remainder, sizeof (ctx->gcm_remainder)); + bzero(ctx->gcm_H, sizeof (ctx->gcm_H)); + bzero(ctx->gcm_Htable, sizeof (ctx->gcm_Htable)); + bzero(ctx->gcm_J0, sizeof (ctx->gcm_J0)); + bzero(ctx->gcm_tmp, sizeof (ctx->gcm_tmp)); +} + +/* Increment the GCM counter block by n. */ +static inline void +gcm_incr_counter_block_by(gcm_ctx_t *ctx, int n) +{ + uint64_t counter_mask = ntohll(0x00000000ffffffffULL); + uint64_t counter = ntohll(ctx->gcm_cb[1] & counter_mask); + + counter = htonll(counter + n); + counter &= counter_mask; + ctx->gcm_cb[1] = (ctx->gcm_cb[1] & ~counter_mask) | counter; +} + +/* + * Encrypt multiple blocks of data in GCM mode. + * This is done in gcm_avx_chunk_size chunks, utilizing AVX assembler routines + * if possible. While processing a chunk the FPU is "locked". + */ +static int +gcm_mode_encrypt_contiguous_blocks_avx(gcm_ctx_t *ctx, char *data, + size_t length, crypto_data_t *out, size_t block_size) +{ + size_t bleft = length; + size_t need = 0; + size_t done = 0; + uint8_t *datap = (uint8_t *)data; + size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); + uint64_t *ghash = ctx->gcm_ghash; + uint64_t *cb = ctx->gcm_cb; + uint8_t *ct_buf = NULL; + uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; + int rv = CRYPTO_SUCCESS; + + ASSERT(block_size == GCM_BLOCK_LEN); + /* + * If the last call left an incomplete block, try to fill + * it first. + */ + if (ctx->gcm_remainder_len > 0) { + need = block_size - ctx->gcm_remainder_len; + if (length < need) { + /* Accumulate bytes here and return. */ + bcopy(datap, (uint8_t *)ctx->gcm_remainder + + ctx->gcm_remainder_len, length); + + ctx->gcm_remainder_len += length; + if (ctx->gcm_copy_to == NULL) { + ctx->gcm_copy_to = datap; + } + return (CRYPTO_SUCCESS); + } else { + /* Complete incomplete block. */ + bcopy(datap, (uint8_t *)ctx->gcm_remainder + + ctx->gcm_remainder_len, need); + + ctx->gcm_copy_to = NULL; + } + } + + /* Allocate a buffer to encrypt to if there is enough input. */ + if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { + ct_buf = vmem_alloc(chunk_size, ctx->gcm_kmflag); + if (ct_buf == NULL) { + return (CRYPTO_HOST_MEMORY); + } + } + + /* If we completed an incomplete block, encrypt and write it out. */ + if (ctx->gcm_remainder_len > 0) { + kfpu_begin(); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, + (const uint32_t *)cb, (uint32_t *)tmp); + + gcm_xor_avx((const uint8_t *) ctx->gcm_remainder, tmp); + GHASH_AVX(ctx, tmp, block_size); + clear_fpu_regs(); + kfpu_end(); + rv = crypto_put_output_data(tmp, out, block_size); + out->cd_offset += block_size; + gcm_incr_counter_block(ctx); + ctx->gcm_processed_data_len += block_size; + bleft -= need; + datap += need; + ctx->gcm_remainder_len = 0; + } + + /* Do the bulk encryption in chunk_size blocks. */ + for (; bleft >= chunk_size; bleft -= chunk_size) { + kfpu_begin(); + done = aesni_gcm_encrypt( + datap, ct_buf, chunk_size, key, cb, ghash); + + clear_fpu_regs(); + kfpu_end(); + if (done != chunk_size) { + rv = CRYPTO_FAILED; + goto out_nofpu; + } + rv = crypto_put_output_data(ct_buf, out, chunk_size); + if (rv != CRYPTO_SUCCESS) { + goto out_nofpu; + } + out->cd_offset += chunk_size; + datap += chunk_size; + ctx->gcm_processed_data_len += chunk_size; + } + /* Check if we are already done. */ + if (bleft == 0) { + goto out_nofpu; + } + /* Bulk encrypt the remaining data. */ + kfpu_begin(); + if (bleft >= GCM_AVX_MIN_ENCRYPT_BYTES) { + done = aesni_gcm_encrypt(datap, ct_buf, bleft, key, cb, ghash); + if (done == 0) { + rv = CRYPTO_FAILED; + goto out; + } + rv = crypto_put_output_data(ct_buf, out, done); + if (rv != CRYPTO_SUCCESS) { + goto out; + } + out->cd_offset += done; + ctx->gcm_processed_data_len += done; + datap += done; + bleft -= done; + + } + /* Less than GCM_AVX_MIN_ENCRYPT_BYTES remain, operate on blocks. */ + while (bleft > 0) { + if (bleft < block_size) { + bcopy(datap, ctx->gcm_remainder, bleft); + ctx->gcm_remainder_len = bleft; + ctx->gcm_copy_to = datap; + goto out; + } + /* Encrypt, hash and write out. */ + aes_encrypt_intel(key->encr_ks.ks32, key->nr, + (const uint32_t *)cb, (uint32_t *)tmp); + + gcm_xor_avx(datap, tmp); + GHASH_AVX(ctx, tmp, block_size); + rv = crypto_put_output_data(tmp, out, block_size); + if (rv != CRYPTO_SUCCESS) { + goto out; + } + out->cd_offset += block_size; + gcm_incr_counter_block(ctx); + ctx->gcm_processed_data_len += block_size; + datap += block_size; + bleft -= block_size; + } +out: + clear_fpu_regs(); + kfpu_end(); +out_nofpu: + if (ct_buf != NULL) { + vmem_free(ct_buf, chunk_size); + } + return (rv); +} + +/* + * Finalize the encryption: Zero fill, encrypt, hash and write out an eventual + * incomplete last block. Encrypt the ICB. Calculate the tag and write it out. + */ +static int +gcm_encrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) +{ + uint8_t *ghash = (uint8_t *)ctx->gcm_ghash; + uint32_t *J0 = (uint32_t *)ctx->gcm_J0; + uint8_t *remainder = (uint8_t *)ctx->gcm_remainder; + size_t rem_len = ctx->gcm_remainder_len; + const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; + int aes_rounds = ((aes_key_t *)keysched)->nr; + int rv; + + ASSERT(block_size == GCM_BLOCK_LEN); + + if (out->cd_length < (rem_len + ctx->gcm_tag_len)) { + return (CRYPTO_DATA_LEN_RANGE); + } + + kfpu_begin(); + /* Pad last incomplete block with zeros, encrypt and hash. */ + if (rem_len > 0) { + uint8_t *tmp = (uint8_t *)ctx->gcm_tmp; + const uint32_t *cb = (uint32_t *)ctx->gcm_cb; + + aes_encrypt_intel(keysched, aes_rounds, cb, (uint32_t *)tmp); + bzero(remainder + rem_len, block_size - rem_len); + for (int i = 0; i < rem_len; i++) { + remainder[i] ^= tmp[i]; + } + GHASH_AVX(ctx, remainder, block_size); + ctx->gcm_processed_data_len += rem_len; + /* No need to increment counter_block, it's the last block. */ + } + /* Finish tag. */ + ctx->gcm_len_a_len_c[1] = + htonll(CRYPTO_BYTES2BITS(ctx->gcm_processed_data_len)); + GHASH_AVX(ctx, (const uint8_t *)ctx->gcm_len_a_len_c, block_size); + aes_encrypt_intel(keysched, aes_rounds, J0, J0); + + gcm_xor_avx((uint8_t *)J0, ghash); + clear_fpu_regs(); + kfpu_end(); + + /* Output remainder. */ + if (rem_len > 0) { + rv = crypto_put_output_data(remainder, out, rem_len); + if (rv != CRYPTO_SUCCESS) + return (rv); + } + out->cd_offset += rem_len; + ctx->gcm_remainder_len = 0; + rv = crypto_put_output_data(ghash, out, ctx->gcm_tag_len); + if (rv != CRYPTO_SUCCESS) + return (rv); + + out->cd_offset += ctx->gcm_tag_len; + /* Clear sensitive data in the context before returning. */ + gcm_clear_ctx(ctx); + return (CRYPTO_SUCCESS); +} + +/* + * Finalize decryption: We just have accumulated crypto text, so now we + * decrypt it here inplace. + */ +static int +gcm_decrypt_final_avx(gcm_ctx_t *ctx, crypto_data_t *out, size_t block_size) +{ + ASSERT3U(ctx->gcm_processed_data_len, ==, ctx->gcm_pt_buf_len); + ASSERT3U(block_size, ==, 16); + + size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + size_t pt_len = ctx->gcm_processed_data_len - ctx->gcm_tag_len; + uint8_t *datap = ctx->gcm_pt_buf; + const aes_key_t *key = ((aes_key_t *)ctx->gcm_keysched); + uint32_t *cb = (uint32_t *)ctx->gcm_cb; + uint64_t *ghash = ctx->gcm_ghash; + uint32_t *tmp = (uint32_t *)ctx->gcm_tmp; + int rv = CRYPTO_SUCCESS; + size_t bleft, done; + + /* + * Decrypt in chunks of gcm_avx_chunk_size, which is asserted to be + * greater or equal than GCM_AVX_MIN_ENCRYPT_BYTES, and a multiple of + * GCM_AVX_MIN_DECRYPT_BYTES. + */ + for (bleft = pt_len; bleft >= chunk_size; bleft -= chunk_size) { + kfpu_begin(); + done = aesni_gcm_decrypt(datap, datap, chunk_size, + (const void *)key, ctx->gcm_cb, ghash); + clear_fpu_regs(); + kfpu_end(); + if (done != chunk_size) { + return (CRYPTO_FAILED); + } + datap += done; + } + /* Decrypt remainder, which is less then chunk size, in one go. */ + kfpu_begin(); + if (bleft >= GCM_AVX_MIN_DECRYPT_BYTES) { + done = aesni_gcm_decrypt(datap, datap, bleft, + (const void *)key, ctx->gcm_cb, ghash); + if (done == 0) { + clear_fpu_regs(); + kfpu_end(); + return (CRYPTO_FAILED); + } + datap += done; + bleft -= done; + } + ASSERT(bleft < GCM_AVX_MIN_DECRYPT_BYTES); + + /* + * Now less then GCM_AVX_MIN_DECRYPT_BYTES bytes remain, + * decrypt them block by block. + */ + while (bleft > 0) { + /* Incomplete last block. */ + if (bleft < block_size) { + uint8_t *lastb = (uint8_t *)ctx->gcm_remainder; + + bzero(lastb, block_size); + bcopy(datap, lastb, bleft); + /* The GCM processing. */ + GHASH_AVX(ctx, lastb, block_size); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); + for (size_t i = 0; i < bleft; i++) { + datap[i] = lastb[i] ^ ((uint8_t *)tmp)[i]; + } + break; + } + /* The GCM processing. */ + GHASH_AVX(ctx, datap, block_size); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, cb, tmp); + gcm_xor_avx((uint8_t *)tmp, datap); + gcm_incr_counter_block(ctx); + + datap += block_size; + bleft -= block_size; + } + if (rv != CRYPTO_SUCCESS) { + clear_fpu_regs(); + kfpu_end(); + return (rv); + } + /* Decryption done, finish the tag. */ + ctx->gcm_len_a_len_c[1] = htonll(CRYPTO_BYTES2BITS(pt_len)); + GHASH_AVX(ctx, (uint8_t *)ctx->gcm_len_a_len_c, block_size); + aes_encrypt_intel(key->encr_ks.ks32, key->nr, (uint32_t *)ctx->gcm_J0, + (uint32_t *)ctx->gcm_J0); + + gcm_xor_avx((uint8_t *)ctx->gcm_J0, (uint8_t *)ghash); + + /* We are done with the FPU, restore its state. */ + clear_fpu_regs(); + kfpu_end(); + + /* Compare the input authentication tag with what we calculated. */ + if (bcmp(&ctx->gcm_pt_buf[pt_len], ghash, ctx->gcm_tag_len)) { + /* They don't match. */ + return (CRYPTO_INVALID_MAC); + } + rv = crypto_put_output_data(ctx->gcm_pt_buf, out, pt_len); + if (rv != CRYPTO_SUCCESS) { + return (rv); + } + out->cd_offset += pt_len; + gcm_clear_ctx(ctx); + return (CRYPTO_SUCCESS); +} + +/* + * Initialize the GCM params H, Htabtle and the counter block. Save the + * initial counter block. + */ +static int +gcm_init_avx(gcm_ctx_t *ctx, unsigned char *iv, size_t iv_len, + unsigned char *auth_data, size_t auth_data_len, size_t block_size) +{ + uint8_t *cb = (uint8_t *)ctx->gcm_cb; + uint64_t *H = ctx->gcm_H; + const void *keysched = ((aes_key_t *)ctx->gcm_keysched)->encr_ks.ks32; + int aes_rounds = ((aes_key_t *)ctx->gcm_keysched)->nr; + uint8_t *datap = auth_data; + size_t chunk_size = (size_t)GCM_CHUNK_SIZE_READ; + size_t bleft; + + ASSERT(block_size == GCM_BLOCK_LEN); + + /* Init H (encrypt zero block) and create the initial counter block. */ + bzero(ctx->gcm_ghash, sizeof (ctx->gcm_ghash)); + bzero(H, sizeof (ctx->gcm_H)); + kfpu_begin(); + aes_encrypt_intel(keysched, aes_rounds, + (const uint32_t *)H, (uint32_t *)H); + + gcm_init_htab_avx(ctx->gcm_Htable, H); + + if (iv_len == 12) { + bcopy(iv, cb, 12); + cb[12] = 0; + cb[13] = 0; + cb[14] = 0; + cb[15] = 1; + /* We need the ICB later. */ + bcopy(cb, ctx->gcm_J0, sizeof (ctx->gcm_J0)); + } else { + /* + * Most consumers use 12 byte IVs, so it's OK to use the + * original routines for other IV sizes, just avoid nesting + * kfpu_begin calls. + */ + clear_fpu_regs(); + kfpu_end(); + gcm_format_initial_blocks(iv, iv_len, ctx, block_size, + aes_copy_block, aes_xor_block); + kfpu_begin(); + } + + /* Openssl post increments the counter, adjust for that. */ + gcm_incr_counter_block(ctx); + + /* Ghash AAD in chunk_size blocks. */ + for (bleft = auth_data_len; bleft >= chunk_size; bleft -= chunk_size) { + GHASH_AVX(ctx, datap, chunk_size); + datap += chunk_size; + clear_fpu_regs(); + kfpu_end(); + kfpu_begin(); + } + /* Ghash the remainder and handle possible incomplete GCM block. */ + if (bleft > 0) { + size_t incomp = bleft % block_size; + + bleft -= incomp; + if (bleft > 0) { + GHASH_AVX(ctx, datap, bleft); + datap += bleft; + } + if (incomp > 0) { + /* Zero pad and hash incomplete last block. */ + uint8_t *authp = (uint8_t *)ctx->gcm_tmp; + + bzero(authp, block_size); + bcopy(datap, authp, incomp); + GHASH_AVX(ctx, authp, block_size); + } + } + clear_fpu_regs(); + kfpu_end(); + return (CRYPTO_SUCCESS); +} + +#if defined(_KERNEL) +static int +icp_gcm_avx_set_chunk_size(const char *buf, zfs_kernel_param_t *kp) +{ + unsigned long val; + char val_rounded[16]; + int error = 0; + + error = kstrtoul(buf, 0, &val); + if (error) + return (error); + + val = (val / GCM_AVX_MIN_DECRYPT_BYTES) * GCM_AVX_MIN_DECRYPT_BYTES; + + if (val < GCM_AVX_MIN_ENCRYPT_BYTES || val > GCM_AVX_MAX_CHUNK_SIZE) + return (-EINVAL); + + snprintf(val_rounded, 16, "%u", (uint32_t)val); + error = param_set_uint(val_rounded, kp); + return (error); +} + +module_param_call(icp_gcm_avx_chunk_size, icp_gcm_avx_set_chunk_size, + param_get_uint, &gcm_avx_chunk_size, 0644); + +MODULE_PARM_DESC(icp_gcm_avx_chunk_size, + "How many bytes to process while owning the FPU"); + +#endif /* defined(__KERNEL) */ +#endif /* ifdef CAN_USE_GCM_ASM */ |