aboutsummaryrefslogtreecommitdiff
path: root/string/aarch64/experimental
diff options
context:
space:
mode:
Diffstat (limited to 'string/aarch64/experimental')
-rw-r--r--string/aarch64/experimental/memchr-sve.S60
-rw-r--r--string/aarch64/experimental/memcmp-sve.S46
-rw-r--r--string/aarch64/experimental/stpcpy-sve.S10
-rw-r--r--string/aarch64/experimental/strchr-sve.S67
-rw-r--r--string/aarch64/experimental/strchrnul-sve.S9
-rw-r--r--string/aarch64/experimental/strcmp-sve.S55
-rw-r--r--string/aarch64/experimental/strcpy-sve.S67
-rw-r--r--string/aarch64/experimental/strlen-sve.S52
-rw-r--r--string/aarch64/experimental/strncmp-sve.S64
-rw-r--r--string/aarch64/experimental/strnlen-sve.S70
-rw-r--r--string/aarch64/experimental/strrchr-sve.S81
11 files changed, 581 insertions, 0 deletions
diff --git a/string/aarch64/experimental/memchr-sve.S b/string/aarch64/experimental/memchr-sve.S
new file mode 100644
index 000000000000..b314551f3e0f
--- /dev/null
+++ b/string/aarch64/experimental/memchr-sve.S
@@ -0,0 +1,60 @@
+/*
+ * memchr - find a character in a memory zone
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__memchr_aarch64_sve)
+ dup z1.b, w1 /* duplicate c to a vector */
+ setffr /* initialize FFR */
+ mov x3, 0 /* initialize off */
+
+ .p2align 4
+0: whilelo p1.b, x3, x2 /* make sure off < max */
+ b.none 9f
+
+ /* Read a vector's worth of bytes, bounded by max,
+ stopping on first fault. */
+ ldff1b z0.b, p1/z, [x0, x3]
+ rdffrs p0.b, p1/z
+ b.nlast 2f
+
+ /* First fault did not fail: the vector bounded by max is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ incb x3 /* speculate increment */
+ cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */
+ b.none 0b
+ decb x3 /* undo speculate */
+
+ /* Found C. */
+1: brkb p2.b, p1/z, p2.b /* find the first c */
+ add x0, x0, x3 /* form partial pointer */
+ incp x0, p2.b /* form final pointer to c */
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparision only on the valid bytes. */
+2: cmpeq p2.b, p0/z, z0.b, z1.b
+ b.any 1b
+
+ /* No C found. Re-init FFR, increment, and loop. */
+ setffr
+ incp x3, p0.b
+ b 0b
+
+ /* Found end of count. */
+9: mov x0, 0 /* return null */
+ ret
+
+END (__memchr_aarch64_sve)
diff --git a/string/aarch64/experimental/memcmp-sve.S b/string/aarch64/experimental/memcmp-sve.S
new file mode 100644
index 000000000000..ad3534836d04
--- /dev/null
+++ b/string/aarch64/experimental/memcmp-sve.S
@@ -0,0 +1,46 @@
+/*
+ * memcmp - compare memory
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__memcmp_aarch64_sve)
+ mov x3, 0 /* initialize off */
+
+0: whilelo p0.b, x3, x2 /* while off < max */
+ b.none 9f
+
+ ld1b z0.b, p0/z, [x0, x3] /* read vectors bounded by max. */
+ ld1b z1.b, p0/z, [x1, x3]
+
+ /* Increment for a whole vector, even if we've only read a partial.
+ This is significantly cheaper than INCP, and since OFF is not
+ used after the loop it is ok to increment OFF past MAX. */
+ incb x3
+
+ cmpne p1.b, p0/z, z0.b, z1.b /* while no inequalities */
+ b.none 0b
+
+ /* Found inequality. */
+1: brkb p1.b, p0/z, p1.b /* find first such */
+ lasta w0, p1, z0.b /* extract each byte */
+ lasta w1, p1, z1.b
+ sub x0, x0, x1 /* return comparison */
+ ret
+
+ /* Found end-of-count. */
+9: mov x0, 0 /* return equality */
+ ret
+
+END (__memcmp_aarch64_sve)
diff --git a/string/aarch64/experimental/stpcpy-sve.S b/string/aarch64/experimental/stpcpy-sve.S
new file mode 100644
index 000000000000..5d3f14b86026
--- /dev/null
+++ b/string/aarch64/experimental/stpcpy-sve.S
@@ -0,0 +1,10 @@
+/*
+ * stpcpy - copy a string returning pointer to end.
+ *
+ * Copyright (c) 2020, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define BUILD_STPCPY 1
+
+#include "strcpy-sve.S"
diff --git a/string/aarch64/experimental/strchr-sve.S b/string/aarch64/experimental/strchr-sve.S
new file mode 100644
index 000000000000..7d74ae9ff232
--- /dev/null
+++ b/string/aarch64/experimental/strchr-sve.S
@@ -0,0 +1,67 @@
+/*
+ * strchr/strchrnul - find a character in a string
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+/* To build as strchrnul, define BUILD_STRCHRNUL before compiling this file. */
+#ifdef BUILD_STRCHRNUL
+#define FUNC __strchrnul_aarch64_sve
+#else
+#define FUNC __strchr_aarch64_sve
+#endif
+
+ENTRY (FUNC)
+ dup z1.b, w1 /* replicate byte across vector */
+ setffr /* initialize FFR */
+ ptrue p1.b /* all ones; loop invariant */
+
+ .p2align 4
+ /* Read a vector's worth of bytes, stopping on first fault. */
+0: ldff1b z0.b, p1/z, [x0, xzr]
+ rdffrs p0.b, p1/z
+ b.nlast 2f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ incb x0 /* speculate increment */
+ cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */
+ cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */
+ orrs p4.b, p1/z, p2.b, p3.b /* c | 0 */
+ b.none 0b
+ decb x0 /* undo speculate */
+
+ /* Found C or 0. */
+1: brka p4.b, p1/z, p4.b /* find first such */
+ sub x0, x0, 1 /* adjust pointer for that byte */
+ incp x0, p4.b
+#ifndef BUILD_STRCHRNUL
+ ptest p4, p2.b /* was first in c? */
+ csel x0, xzr, x0, none /* if there was no c, return null */
+#endif
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparision only on the valid bytes. */
+2: cmpeq p2.b, p0/z, z0.b, z1.b /* search for c */
+ cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */
+ orrs p4.b, p0/z, p2.b, p3.b /* c | 0 */
+ b.any 1b
+
+ /* No C or 0 found. Re-init FFR, increment, and loop. */
+ setffr
+ incp x0, p0.b
+ b 0b
+
+END (FUNC)
diff --git a/string/aarch64/experimental/strchrnul-sve.S b/string/aarch64/experimental/strchrnul-sve.S
new file mode 100644
index 000000000000..0005f9177514
--- /dev/null
+++ b/string/aarch64/experimental/strchrnul-sve.S
@@ -0,0 +1,9 @@
+/*
+ * strchrnul - find a character or nul in a string
+ *
+ * Copyright (c) 2018-2019, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#define BUILD_STRCHRNUL
+#include "strchr-sve.S"
diff --git a/string/aarch64/experimental/strcmp-sve.S b/string/aarch64/experimental/strcmp-sve.S
new file mode 100644
index 000000000000..b6c249588534
--- /dev/null
+++ b/string/aarch64/experimental/strcmp-sve.S
@@ -0,0 +1,55 @@
+/*
+ * __strcmp_aarch64_sve - compare two strings
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__strcmp_aarch64_sve)
+ setffr /* initialize FFR */
+ ptrue p1.b, all /* all ones; loop invariant */
+ mov x2, 0 /* initialize offset */
+
+ /* Read a vector's worth of bytes, stopping on first fault. */
+ .p2align 4
+0: ldff1b z0.b, p1/z, [x0, x2]
+ ldff1b z1.b, p1/z, [x1, x2]
+ rdffrs p0.b, p1/z
+ b.nlast 2f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ incb x2, all /* skip bytes for next round */
+ cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings */
+ cmpne p3.b, p1/z, z0.b, 0 /* search for ~zero */
+ nands p2.b, p1/z, p2.b, p3.b /* ~(eq & ~zero) -> ne | zero */
+ b.none 0b
+
+ /* Found end-of-string or inequality. */
+1: brkb p2.b, p1/z, p2.b /* find first such */
+ lasta w0, p2, z0.b /* extract each char */
+ lasta w1, p2, z1.b
+ sub x0, x0, x1 /* return comparison */
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparison only on the valid bytes. */
+2: incp x2, p0.b /* skip bytes for next round */
+ setffr /* re-init FFR for next round */
+ cmpeq p2.b, p0/z, z0.b, z1.b /* compare strings, as above */
+ cmpne p3.b, p0/z, z0.b, 0
+ nands p2.b, p0/z, p2.b, p3.b
+ b.none 0b
+ b 1b
+
+END (__strcmp_aarch64_sve)
diff --git a/string/aarch64/experimental/strcpy-sve.S b/string/aarch64/experimental/strcpy-sve.S
new file mode 100644
index 000000000000..57b77c8a00e7
--- /dev/null
+++ b/string/aarch64/experimental/strcpy-sve.S
@@ -0,0 +1,67 @@
+/*
+ * strcpy/stpcpy - copy a string returning pointer to start/end.
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+/* To build as stpcpy, define BUILD_STPCPY before compiling this file. */
+#ifdef BUILD_STPCPY
+#define FUNC __stpcpy_aarch64_sve
+#else
+#define FUNC __strcpy_aarch64_sve
+#endif
+
+ENTRY (FUNC)
+ setffr /* initialize FFR */
+ ptrue p2.b, all /* all ones; loop invariant */
+ mov x2, 0 /* initialize offset */
+
+ .p2align 4
+ /* Read a vector's worth of bytes, stopping on first fault. */
+0: ldff1b z0.b, p2/z, [x1, x2]
+ rdffrs p0.b, p2/z
+ b.nlast 1f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contexts of FFR beyond the branch. */
+ cmpeq p1.b, p2/z, z0.b, 0 /* search for zeros */
+ b.any 2f
+
+ /* No zero found. Store the whole vector and loop. */
+ st1b z0.b, p2, [x0, x2]
+ incb x2, all
+ b 0b
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparison only on the valid bytes. */
+1: cmpeq p1.b, p0/z, z0.b, 0 /* search for zeros */
+ b.any 2f
+
+ /* No zero found. Store the valid portion of the vector and loop. */
+ setffr /* re-init FFR */
+ st1b z0.b, p0, [x0, x2]
+ incp x2, p0.b
+ b 0b
+
+ /* Zero found. Crop the vector to the found zero and finish. */
+2: brka p0.b, p2/z, p1.b
+ st1b z0.b, p0, [x0, x2]
+#ifdef BUILD_STPCPY
+ add x0, x0, x2
+ sub x0, x0, 1
+ incp x0, p0.b
+#endif
+ ret
+
+END (FUNC)
diff --git a/string/aarch64/experimental/strlen-sve.S b/string/aarch64/experimental/strlen-sve.S
new file mode 100644
index 000000000000..c83155052c07
--- /dev/null
+++ b/string/aarch64/experimental/strlen-sve.S
@@ -0,0 +1,52 @@
+/*
+ * __strlen_aarch64_sve - compute the length of a string
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__strlen_aarch64_sve)
+ setffr /* initialize FFR */
+ ptrue p2.b /* all ones; loop invariant */
+ mov x1, 0 /* initialize length */
+
+ /* Read a vector's worth of bytes, stopping on first fault. */
+ .p2align 4
+0: ldff1b z0.b, p2/z, [x0, x1]
+ rdffrs p0.b, p2/z
+ b.nlast 2f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ incb x1, all /* speculate increment */
+ cmpeq p1.b, p2/z, z0.b, 0 /* loop if no zeros */
+ b.none 0b
+ decb x1, all /* undo speculate */
+
+ /* Zero found. Select the bytes before the first and count them. */
+1: brkb p0.b, p2/z, p1.b
+ incp x1, p0.b
+ mov x0, x1
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparison only on the valid bytes. */
+2: cmpeq p1.b, p0/z, z0.b, 0
+ b.any 1b
+
+ /* No zero found. Re-init FFR, increment, and loop. */
+ setffr
+ incp x1, p0.b
+ b 0b
+
+END (__strlen_aarch64_sve)
diff --git a/string/aarch64/experimental/strncmp-sve.S b/string/aarch64/experimental/strncmp-sve.S
new file mode 100644
index 000000000000..a281e642d8aa
--- /dev/null
+++ b/string/aarch64/experimental/strncmp-sve.S
@@ -0,0 +1,64 @@
+/*
+ * strncmp - compare two strings with limit
+ *
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__strncmp_aarch64_sve)
+ setffr /* initialize FFR */
+ mov x3, 0 /* initialize off */
+
+0: whilelo p0.b, x3, x2 /* while off < max */
+ b.none 9f
+
+ ldff1b z0.b, p0/z, [x0, x3]
+ ldff1b z1.b, p0/z, [x1, x3]
+ rdffrs p1.b, p0/z
+ b.nlast 2f
+
+ /* First fault did not fail: the vector up to max is valid.
+ Avoid depending on the contents of FFR beyond the branch.
+ Increment for a whole vector, even if we've only read a partial.
+ This is significantly cheaper than INCP, and since OFF is not
+ used after the loop it is ok to increment OFF past MAX. */
+ incb x3
+ cmpeq p1.b, p0/z, z0.b, z1.b /* compare strings */
+ cmpne p2.b, p0/z, z0.b, 0 /* search for ~zero */
+ nands p2.b, p0/z, p1.b, p2.b /* ~(eq & ~zero) -> ne | zero */
+ b.none 0b
+
+ /* Found end-of-string or inequality. */
+1: brkb p2.b, p0/z, p2.b /* find first such */
+ lasta w0, p2, z0.b /* extract each char */
+ lasta w1, p2, z1.b
+ sub x0, x0, x1 /* return comparison */
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparison only on the valid bytes. */
+2: cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings, as above */
+ cmpne p3.b, p1/z, z0.b, 0
+ nands p2.b, p1/z, p2.b, p3.b
+ b.any 1b
+
+ /* No inequality or zero found. Re-init FFR, incr and loop. */
+ setffr
+ incp x3, p1.b
+ b 0b
+
+ /* Found end-of-count. */
+9: mov x0, 0 /* return equal */
+ ret
+
+END (__strncmp_aarch64_sve)
diff --git a/string/aarch64/experimental/strnlen-sve.S b/string/aarch64/experimental/strnlen-sve.S
new file mode 100644
index 000000000000..11d835a1b13c
--- /dev/null
+++ b/string/aarch64/experimental/strnlen-sve.S
@@ -0,0 +1,70 @@
+/*
+ * strnlen - calculate the length of a string with limit.
+ *
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__strnlen_aarch64_sve)
+ setffr /* initialize FFR */
+ mov x2, 0 /* initialize len */
+ b 1f
+
+ .p2align 4
+ /* We have off + vl <= max, and so may read the whole vector. */
+0: ldff1b z0.b, p0/z, [x0, x2]
+ rdffrs p1.b, p0/z
+ b.nlast 2f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ cmpeq p2.b, p0/z, z0.b, 0
+ b.any 8f
+ incb x2
+
+1: whilelo p0.b, x2, x1
+ b.last 0b
+
+ /* We have off + vl < max. Test for off == max before proceeding. */
+ b.none 9f
+
+ ldff1b z0.b, p0/z, [x0, x2]
+ rdffrs p1.b, p0/z
+ b.nlast 2f
+
+ /* First fault did not fail: the vector up to max is valid.
+ Avoid depending on the contents of FFR beyond the branch.
+ Compare for end-of-string, but there are no more bytes. */
+ cmpeq p2.b, p0/z, z0.b, 0
+
+ /* Found end-of-string or zero. */
+8: brkb p2.b, p0/z, p2.b
+ mov x0, x2
+ incp x0, p2.b
+ ret
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparison only on the valid bytes. */
+2: cmpeq p2.b, p1/z, z0.b, 0
+ b.any 8b
+
+ /* No inequality or zero found. Re-init FFR, incr and loop. */
+ setffr
+ incp x2, p1.b
+ b 1b
+
+ /* End of count. Return max. */
+9: mov x0, x1
+ ret
+
+END (__strnlen_aarch64_sve)
diff --git a/string/aarch64/experimental/strrchr-sve.S b/string/aarch64/experimental/strrchr-sve.S
new file mode 100644
index 000000000000..731edaddf156
--- /dev/null
+++ b/string/aarch64/experimental/strrchr-sve.S
@@ -0,0 +1,81 @@
+/*
+ * strrchr - find the last of a character in a string
+ *
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#include "asmdefs.h"
+
+.arch armv8-a+sve
+
+/* Assumptions:
+ *
+ * ARMv8-a, AArch64
+ * SVE Available.
+ */
+
+ENTRY (__strrchr_aarch64_sve)
+ dup z1.b, w1 /* replicate byte across vector */
+ setffr /* initialize FFR */
+ ptrue p1.b /* all ones; loop invariant */
+ mov x2, 0 /* no match found so far */
+ pfalse p2.b
+
+ .p2align 4
+ /* Read a vector's worth of bytes, stopping on first fault. */
+0: ldff1b z0.b, p1/z, [x0, xzr]
+ rdffrs p0.b, p1/z
+ b.nlast 1f
+
+ /* First fault did not fail: the whole vector is valid.
+ Avoid depending on the contents of FFR beyond the branch. */
+ incb x0, all /* skip bytes this round */
+ cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */
+ b.any 3f
+
+ cmpeq p3.b, p1/z, z0.b, z1.b /* search for c; no eos */
+ b.none 0b
+
+ mov x2, x0 /* save advanced base */
+ mov p2.b, p3.b /* save current search */
+ b 0b
+
+ /* First fault failed: only some of the vector is valid.
+ Perform the comparisions only on the valid bytes. */
+1: cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */
+ b.any 2f
+
+ cmpeq p3.b, p0/z, z0.b, z1.b /* search for c; no eos */
+ mov x3, x0
+ incp x0, p0.b /* skip bytes this round */
+ setffr /* re-init FFR */
+ b.none 0b
+
+ addvl x2, x3, 1 /* save advanced base */
+ mov p2.b, p3.b /* save current search */
+ b 0b
+
+ /* Found end-of-string. */
+2: incb x0, all /* advance base */
+3: brka p3.b, p1/z, p3.b /* mask after first 0 */
+ cmpeq p3.b, p3/z, z0.b, z1.b /* search for c not after eos */
+ b.any 4f
+
+ /* No C within last vector. Did we have one before? */
+ cbz x2, 5f
+ mov x0, x2 /* restore advanced base */
+ mov p3.b, p2.b /* restore saved search */
+
+ /* Find the *last* match in the predicate. This is slightly
+ more complicated than finding the first match. */
+4: rev p3.b, p3.b /* reverse the bits */
+ brka p3.b, p1/z, p3.b /* find position of last match */
+ decp x0, p3.b /* retard pointer to last match */
+ ret
+
+ /* No C whatsoever. Return NULL. */
+5: mov x0, 0
+ ret
+
+END (__strrchr_aarch64_sve)