aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Turner <andrew@FreeBSD.org>2022-09-07 11:12:30 +0000
committerAndrew Turner <andrew@FreeBSD.org>2022-09-08 13:29:37 +0000
commit51a1bf7ba7eb79c760161a2054c113978dce38cb (patch)
tree327fcb614e3f5b3a5bd1a147c7bc1007d327112d
parent8c6e5d8cf1e67c6744be2f5bfe42eeda0479744c (diff)
downloadsrc-51a1bf7ba7eb79c760161a2054c113978dce38cb.tar.gz
src-51a1bf7ba7eb79c760161a2054c113978dce38cb.zip
Import an optimized arm64 memcmp into the kernel
Bring in a version of the Arm Optimized Routines memcpy from before the VFP registers were used. Imported with modification from: https://github.com/ARM-software/optimized-routines/blob/e823e3abf5f89ecb/string/aarch64/memcmp.S Sponsored by: The FreeBSD Foundation
-rw-r--r--sys/arm64/arm64/memcmp.S136
-rw-r--r--sys/conf/files.arm643
2 files changed, 137 insertions, 2 deletions
diff --git a/sys/arm64/arm64/memcmp.S b/sys/arm64/arm64/memcmp.S
new file mode 100644
index 000000000000..8517a181f3f3
--- /dev/null
+++ b/sys/arm64/arm64/memcmp.S
@@ -0,0 +1,136 @@
+/* memcmp - compare memory
+ *
+ * Copyright (c) 2013-2020, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64, unaligned accesses.
+ */
+
+#include <machine/asm.h>
+
+#define L(l) .L ## l
+
+/* Parameters and result. */
+#define src1 x0
+#define src2 x1
+#define limit x2
+#define result w0
+
+/* Internal variables. */
+#define data1 x3
+#define data1w w3
+#define data1h x4
+#define data2 x5
+#define data2w w5
+#define data2h x6
+#define tmp1 x7
+#define tmp2 x8
+
+ENTRY (memcmp)
+ subs limit, limit, 8
+ b.lo L(less8)
+
+ ldr data1, [src1], 8
+ ldr data2, [src2], 8
+ cmp data1, data2
+ b.ne L(return)
+
+ subs limit, limit, 8
+ b.gt L(more16)
+
+ ldr data1, [src1, limit]
+ ldr data2, [src2, limit]
+ b L(return)
+
+L(more16):
+ ldr data1, [src1], 8
+ ldr data2, [src2], 8
+ cmp data1, data2
+ bne L(return)
+
+ /* Jump directly to comparing the last 16 bytes for 32 byte (or less)
+ strings. */
+ subs limit, limit, 16
+ b.ls L(last_bytes)
+
+ /* We overlap loads between 0-32 bytes at either side of SRC1 when we
+ try to align, so limit it only to strings larger than 128 bytes. */
+ cmp limit, 96
+ b.ls L(loop16)
+
+ /* Align src1 and adjust src2 with bytes not yet done. */
+ and tmp1, src1, 15
+ add limit, limit, tmp1
+ sub src1, src1, tmp1
+ sub src2, src2, tmp1
+
+ /* Loop performing 16 bytes per iteration using aligned src1.
+ Limit is pre-decremented by 16 and must be larger than zero.
+ Exit if <= 16 bytes left to do or if the data is not equal. */
+ .p2align 4
+L(loop16):
+ ldp data1, data1h, [src1], 16
+ ldp data2, data2h, [src2], 16
+ subs limit, limit, 16
+ ccmp data1, data2, 0, hi
+ ccmp data1h, data2h, 0, eq
+ b.eq L(loop16)
+
+ cmp data1, data2
+ bne L(return)
+ mov data1, data1h
+ mov data2, data2h
+ cmp data1, data2
+ bne L(return)
+
+ /* Compare last 1-16 bytes using unaligned access. */
+L(last_bytes):
+ add src1, src1, limit
+ add src2, src2, limit
+ ldp data1, data1h, [src1]
+ ldp data2, data2h, [src2]
+ cmp data1, data2
+ bne L(return)
+ mov data1, data1h
+ mov data2, data2h
+ cmp data1, data2
+
+ /* Compare data bytes and set return value to 0, -1 or 1. */
+L(return):
+#ifndef __AARCH64EB__
+ rev data1, data1
+ rev data2, data2
+#endif
+ cmp data1, data2
+L(ret_eq):
+ cset result, ne
+ cneg result, result, lo
+ ret
+
+ .p2align 4
+ /* Compare up to 8 bytes. Limit is [-8..-1]. */
+L(less8):
+ adds limit, limit, 4
+ b.lo L(less4)
+ ldr data1w, [src1], 4
+ ldr data2w, [src2], 4
+ cmp data1w, data2w
+ b.ne L(return)
+ sub limit, limit, 4
+L(less4):
+ adds limit, limit, 4
+ beq L(ret_eq)
+L(byte_loop):
+ ldrb data1w, [src1], 1
+ ldrb data2w, [src2], 1
+ subs limit, limit, 1
+ ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */
+ b.eq L(byte_loop)
+ sub result, data1w, data2w
+ ret
+
+END (memcmp)
+
diff --git a/sys/conf/files.arm64 b/sys/conf/files.arm64
index a647d4e32230..d01b3f674e9a 100644
--- a/sys/conf/files.arm64
+++ b/sys/conf/files.arm64
@@ -9,8 +9,6 @@ kern/pic_if.m optional intrng
kern/subr_devmap.c standard
kern/subr_intr.c optional intrng
kern/subr_physmem.c standard
-libkern/memcmp.c standard \
- compile-with "${NORMAL_C:N-fsanitize*}"
libkern/memset.c standard \
compile-with "${NORMAL_C:N-fsanitize*}"
libkern/strlen.c standard
@@ -60,6 +58,7 @@ arm64/arm64/locore.S standard no-obj
arm64/arm64/machdep.c standard
arm64/arm64/machdep_boot.c standard
arm64/arm64/mem.c standard
+arm64/arm64/memcmp.S standard
arm64/arm64/memcpy.S standard
arm64/arm64/minidump_machdep.c standard
arm64/arm64/mp_machdep.c optional smp