aboutsummaryrefslogtreecommitdiff
path: root/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
diff options
context:
space:
mode:
authorAlex Richardson <arichardson@FreeBSD.org>2021-07-06 10:02:44 +0000
committerAlex Richardson <arichardson@FreeBSD.org>2021-07-06 10:05:34 +0000
commit31914882fca502069810b9e9ddea4bcd8136a4f4 (patch)
tree4e683e2197c52fe78bdc77db188c471418418a43 /contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
parente34c713b0e660a49b57fafc02c9eccb26b938220 (diff)
parentf9f37c002ab5a580accfe26b731eef45e798b435 (diff)
downloadsrc-31914882fca502069810b9e9ddea4bcd8136a4f4.tar.gz
src-31914882fca502069810b9e9ddea4bcd8136a4f4.zip
Import Arm Optimized Routines v21.02
This is the new replacement for the existing cortex-strings code which will be replaced in a follow-up commit. We should also be able to use some of the math functions to allow the tests to pass on AArch64 (and other architectures) instead of just x86. We might also be able to reuse some of the tests for the kyua testsuite. Imported using ``` curl -L https://github.com/ARM-software/optimized-routines/tarball/e823e3abf5f89ecba58a10fc0fd82c13d9984b6b | tar --strip-components=1 -xvzf - git add . ``` Differential Revision: https://reviews.freebsd.org/D29035 git-subtree-dir: contrib/arm-optimized-routines git-subtree-mainline: e34c713b0e660a49b57fafc02c9eccb26b938220 git-subtree-split: f9f37c002ab5a580accfe26b731eef45e798b435
Diffstat (limited to 'contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S')
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S69
1 files changed, 69 insertions, 0 deletions
diff --git a/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
new file mode 100644
index 000000000000..234190e245b0
--- /dev/null
+++ b/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
@@ -0,0 +1,69 @@
+/*
+ * strncmp - compare two strings with limit
+ *
+ * Copyright (c) 2018-2021, Arm Limited.
+ * SPDX-License-Identifier: MIT
+ */
+
+#include "../asmdefs.h"
+
+#if __ARM_FEATURE_SVE
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__strncmp_aarch64_sve)
+ PTR_ARG (0)
+ PTR_ARG (1)
+ SIZE_ARG (2)
+ setffr /* initialize FFR */
+ mov x3, 0 /* initialize off */
+
+0: whilelo p0.b, x3, x2 /* while off < max */
+ b.none 9f
+
+ ldff1b z0.b, p0/z, [x0, x3]
+ ldff1b z1.b, p0/z, [x1, x3]
+ rdffrs p1.b, p0/z
+ b.nlast 2f
+
+ /* First fault did not fail: the vector up to max is valid.
+ Avoid depending on the contents of FFR beyond the branch.
+ Increment for a whole vector, even if we've only read a partial.
+ This is significantly cheaper than INCP, and since OFF is not
+ used after the loop it is ok to increment OFF past MAX. */
+ incb x3
+ cmpeq p1.b, p0/z, z0.b, z1.b /* compare strings */
+ cmpne p2.b, p0/z, z0.b, 0 /* search for ~zero */
+ nands p2.b, p0/z, p1.b, p2.b /* ~(eq & ~zero) -> ne | zero */
+ b.none 0b
+
+ /* Found end-of-string or inequality. */
+1: brkb p2.b, p0/z, p2.b /* find first such */
+ lasta w0, p2, z0.b /* extract each char */
+ lasta w1, p2, z1.b
+ sub x0, x0, x1 /* return comparison */
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparison only on the valid bytes. */
+2: cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings, as above */
+ cmpne p3.b, p1/z, z0.b, 0
+ nands p2.b, p1/z, p2.b, p3.b
+ b.any 1b
+
+ /* No inequality or zero found. Re-init FFR, incr and loop. */
+ setffr
+ incp x3, p1.b
+ b 0b
+
+ /* Found end-of-count. */
+9: mov x0, 0 /* return equal */
+ ret
+
+END (__strncmp_aarch64_sve)
+
+#endif
+