aboutsummaryrefslogtreecommitdiff
path: root/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c')
-rw-r--r--sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c483
1 files changed, 303 insertions, 180 deletions
diff --git a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
index c3809a2827be..5684b4ff1a97 100644
--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
@@ -6,7 +6,7 @@
* You may not use this file except in compliance with the License.
*
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
- * or http://www.opensolaris.org/os/licensing.
+ * or https://opensource.org/licenses/CDDL-1.0.
* See the License for the specific language governing permissions
* and limitations under the License.
*
@@ -23,185 +23,270 @@
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
*/
+#include <sys/simd.h>
#include <sys/zfs_context.h>
-#include <sys/zio_checksum.h>
+#include <sys/zfs_impl.h>
+#include <sys/blake3.h>
#include "blake3_impl.h"
-static const blake3_impl_ops_t *const blake3_impls[] = {
- &blake3_generic_impl,
-#if defined(__aarch64__) || \
+#if !defined(OMIT_SIMD) && (defined(__aarch64__) || \
(defined(__x86_64) && defined(HAVE_SSE2)) || \
- (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
- &blake3_sse2_impl,
-#endif
-#if defined(__aarch64__) || \
- (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
- (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
- &blake3_sse41_impl,
-#endif
-#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
- &blake3_avx2_impl,
-#endif
-#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
- &blake3_avx512_impl,
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__)))
+#define USE_SIMD
#endif
-};
-/* this pointer holds current ops for implementation */
-static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
+#ifdef USE_SIMD
+extern void ASMABI zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void ASMABI zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void ASMABI zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse2(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
-/* special implementation selections */
-#define IMPL_FASTEST (UINT32_MAX)
-#define IMPL_CYCLE (UINT32_MAX-1)
-#define IMPL_USER (UINT32_MAX-2)
-#define IMPL_PARAM (UINT32_MAX-3)
+static void blake3_compress_xof_sse2(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
+}
-#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
-static uint32_t icp_blake3_impl = IMPL_FASTEST;
+static void blake3_hash_many_sse2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
+}
-#define BLAKE3_IMPL_NAME_MAX 16
+static boolean_t blake3_is_sse2_supported(void)
+{
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse2_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
+}
-/* id of fastest implementation */
-static uint32_t blake3_fastest_id = 0;
+const blake3_ops_t blake3_sse2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse2,
+ .compress_xof = blake3_compress_xof_sse2,
+ .hash_many = blake3_hash_many_sse2,
+ .is_supported = blake3_is_sse2_supported,
+ .degree = 4,
+ .name = "sse2"
+};
+#endif
-/* currently used id */
-static uint32_t blake3_current_id = 0;
+#ifdef USE_SIMD
-/* id of module parameter (-1 == unused) */
-static int blake3_param_id = -1;
+extern void ASMABI zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
-/* return number of supported implementations */
-int
-blake3_get_impl_count(void)
-{
- static int impls = 0;
- int i;
+extern void ASMABI zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
- if (impls)
- return (impls);
+extern void ASMABI zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
- for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
- if (!blake3_impls[i]->is_supported()) continue;
- impls++;
- }
+static void blake3_compress_in_place_sse41(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
- return (impls);
+static void blake3_compress_xof_sse41(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
}
-/* return id of selected implementation */
-int
-blake3_get_impl_id(void)
-{
- return (blake3_current_id);
+static void blake3_hash_many_sse41(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
}
-/* return name of selected implementation */
-const char *
-blake3_get_impl_name(void)
+static boolean_t blake3_is_sse41_supported(void)
{
- return (blake3_selected_impl->name);
+#if defined(__x86_64)
+ return (kfpu_allowed() && zfs_sse4_1_available());
+#elif defined(__PPC64__)
+ return (kfpu_allowed() && zfs_vsx_available());
+#else
+ return (kfpu_allowed());
+#endif
}
-/* setup id as fastest implementation */
-void
-blake3_set_impl_fastest(uint32_t id)
-{
- blake3_fastest_id = id;
+const blake3_ops_t blake3_sse41_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_sse41,
+ .is_supported = blake3_is_sse41_supported,
+ .degree = 4,
+ .name = "sse41"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern void ASMABI zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_hash_many_avx2(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
}
-/* set implementation by id */
-void
-blake3_set_impl_id(uint32_t id)
+static boolean_t blake3_is_avx2_supported(void)
{
- int i, cid;
-
- /* select fastest */
- if (id == IMPL_FASTEST)
- id = blake3_fastest_id;
-
- /* select next or first */
- if (id == IMPL_CYCLE)
- id = (++blake3_current_id) % blake3_get_impl_count();
-
- /* 0..N for the real impl */
- for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
- if (!blake3_impls[i]->is_supported()) continue;
- if (cid == id) {
- blake3_current_id = cid;
- blake3_selected_impl = blake3_impls[i];
- return;
- }
- cid++;
- }
+ return (kfpu_allowed() && zfs_sse4_1_available() &&
+ zfs_avx2_available());
}
-/* set implementation by name */
-int
-blake3_set_impl_name(const char *name)
-{
- int i, cid;
-
- if (strcmp(name, "fastest") == 0) {
- atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
- blake3_set_impl_id(IMPL_FASTEST);
- return (0);
- } else if (strcmp(name, "cycle") == 0) {
- atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
- blake3_set_impl_id(IMPL_CYCLE);
- return (0);
- }
+const blake3_ops_t
+blake3_avx2_impl = {
+ .compress_in_place = blake3_compress_in_place_sse41,
+ .compress_xof = blake3_compress_xof_sse41,
+ .hash_many = blake3_hash_many_avx2,
+ .is_supported = blake3_is_avx2_supported,
+ .degree = 8,
+ .name = "avx2"
+};
+#endif
- for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
- if (!blake3_impls[i]->is_supported()) continue;
- if (strcmp(name, blake3_impls[i]->name) == 0) {
- if (icp_blake3_impl == IMPL_PARAM) {
- blake3_param_id = cid;
- return (0);
- }
- blake3_selected_impl = blake3_impls[i];
- blake3_current_id = cid;
- return (0);
- }
- cid++;
- }
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern void ASMABI zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags);
+
+extern void ASMABI zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void ASMABI zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_avx512(uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags) {
+ kfpu_begin();
+ zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
+ flags);
+ kfpu_end();
+}
- return (-EINVAL);
+static void blake3_compress_xof_avx512(const uint32_t cv[8],
+ const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+ uint64_t counter, uint8_t flags, uint8_t out[64]) {
+ kfpu_begin();
+ zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
+ out);
+ kfpu_end();
}
-/* setup implementation */
-void
-blake3_setup_impl(void)
-{
- switch (IMPL_READ(icp_blake3_impl)) {
- case IMPL_PARAM:
- blake3_set_impl_id(blake3_param_id);
- atomic_swap_32(&icp_blake3_impl, IMPL_USER);
- break;
- case IMPL_FASTEST:
- blake3_set_impl_id(IMPL_FASTEST);
- break;
- case IMPL_CYCLE:
- blake3_set_impl_id(IMPL_CYCLE);
- break;
- default:
- blake3_set_impl_id(blake3_current_id);
- break;
- }
+static void blake3_hash_many_avx512(const uint8_t * const *inputs,
+ size_t num_inputs, size_t blocks, const uint32_t key[8],
+ uint64_t counter, boolean_t increment_counter, uint8_t flags,
+ uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+ kfpu_begin();
+ zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+ increment_counter, flags, flags_start, flags_end, out);
+ kfpu_end();
}
-/* return selected implementation */
-const blake3_impl_ops_t *
-blake3_impl_get_ops(void)
+static boolean_t blake3_is_avx512_supported(void)
{
- /* each call to ops will cycle */
- if (icp_blake3_impl == IMPL_CYCLE)
- blake3_set_impl_id(IMPL_CYCLE);
-
- return (blake3_selected_impl);
+ return (kfpu_allowed() && zfs_avx512f_available() &&
+ zfs_avx512vl_available());
}
-#if defined(_KERNEL)
+const blake3_ops_t blake3_avx512_impl = {
+ .compress_in_place = blake3_compress_in_place_avx512,
+ .compress_xof = blake3_compress_xof_avx512,
+ .hash_many = blake3_hash_many_avx512,
+ .is_supported = blake3_is_avx512_supported,
+ .degree = 16,
+ .name = "avx512"
+};
+#endif
+
+extern const blake3_ops_t blake3_generic_impl;
+
+static const blake3_ops_t *const blake3_impls[] = {
+ &blake3_generic_impl,
+#ifdef USE_SIMD
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE2)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse2_impl,
+#endif
+#if defined(__aarch64__) || \
+ (defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+ (defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+ &blake3_sse41_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+ &blake3_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+ &blake3_avx512_impl,
+#endif
+#endif
+};
+
+/* use the generic implementation functions */
+#define IMPL_NAME "blake3"
+#define IMPL_OPS_T blake3_ops_t
+#define IMPL_ARRAY blake3_impls
+#define IMPL_GET_OPS blake3_get_ops
+#define ZFS_IMPL_OPS zfs_blake3_ops
+#include <generic_impl.c>
+
+#ifdef _KERNEL
void **blake3_per_cpu_ctx;
void
@@ -227,58 +312,96 @@ blake3_per_cpu_ctx_fini(void)
memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
}
-#endif
-#if defined(_KERNEL) && defined(__linux__)
+#define IMPL_FMT(impl, i) (((impl) == (i)) ? "[%s] " : "%s ")
+
+#if defined(__linux__)
+
static int
-icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
+blake3_param_get(char *buffer, zfs_kernel_param_t *unused)
{
- char req_name[BLAKE3_IMPL_NAME_MAX];
- size_t i;
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ char *fmt;
+ int cnt = 0;
- /* sanitize input */
- i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
- if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
- return (-EINVAL);
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt, "fastest");
+
+ /* list all supported implementations */
+ generic_impl_init();
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ cnt += kmem_scnprintf(buffer + cnt, PAGE_SIZE - cnt, fmt,
+ blake3_impls[i]->name);
+ }
- strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
- while (i > 0 && isspace(req_name[i-1]))
- i--;
- req_name[i] = '\0';
+ return (cnt);
+}
- atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
- return (blake3_set_impl_name(req_name));
+static int
+blake3_param_set(const char *val, zfs_kernel_param_t *unused)
+{
+ (void) unused;
+ return (generic_impl_setname(val));
}
+#elif defined(__FreeBSD__)
+
+#include <sys/sbuf.h>
+
static int
-icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
+blake3_param(ZFS_MODULE_PARAM_ARGS)
{
- int i, cid, cnt = 0;
- char *fmt;
+ int err;
- /* cycling */
- fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
- cnt += sprintf(buffer + cnt, fmt);
-
- /* fastest one */
- fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
- cnt += sprintf(buffer + cnt, fmt);
-
- /* user selected */
- for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
- if (!blake3_impls[i]->is_supported()) continue;
- fmt = (icp_blake3_impl == IMPL_USER &&
- cid == blake3_current_id) ? "[%s] " : "%s ";
- cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
- cid++;
+ generic_impl_init();
+ if (req->newptr == NULL) {
+ const uint32_t impl = IMPL_READ(generic_impl_chosen);
+ const int init_buflen = 64;
+ const char *fmt;
+ struct sbuf *s;
+
+ s = sbuf_new_for_sysctl(NULL, NULL, init_buflen, req);
+
+ /* cycling */
+ fmt = IMPL_FMT(impl, IMPL_CYCLE);
+ (void) sbuf_printf(s, fmt, "cycle");
+
+ /* list fastest */
+ fmt = IMPL_FMT(impl, IMPL_FASTEST);
+ (void) sbuf_printf(s, fmt, "fastest");
+
+ /* list all supported implementations */
+ for (uint32_t i = 0; i < generic_supp_impls_cnt; ++i) {
+ fmt = IMPL_FMT(impl, i);
+ (void) sbuf_printf(s, fmt, generic_supp_impls[i]->name);
+ }
+
+ err = sbuf_finish(s);
+ sbuf_delete(s);
+
+ return (err);
}
- buffer[cnt] = 0;
+ char buf[16];
- return (cnt);
+ err = sysctl_handle_string(oidp, buf, sizeof (buf), req);
+ if (err) {
+ return (err);
+ }
+
+ return (-generic_impl_setname(buf));
}
+#endif
+
+#undef IMPL_FMT
-module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
- NULL, 0644);
-MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
+ZFS_MODULE_VIRTUAL_PARAM_CALL(zfs, zfs_, blake3_impl,
+ blake3_param_set, blake3_param_get, ZMOD_RW, \
+ "Select BLAKE3 implementation.");
#endif