diff options
Diffstat (limited to 'string/aarch64/experimental')
| -rw-r--r-- | string/aarch64/experimental/memchr-sve.S | 60 | ||||
| -rw-r--r-- | string/aarch64/experimental/memcmp-sve.S | 46 | ||||
| -rw-r--r-- | string/aarch64/experimental/stpcpy-sve.S | 10 | ||||
| -rw-r--r-- | string/aarch64/experimental/strchr-sve.S | 67 | ||||
| -rw-r--r-- | string/aarch64/experimental/strchrnul-sve.S | 9 | ||||
| -rw-r--r-- | string/aarch64/experimental/strcmp-sve.S | 55 | ||||
| -rw-r--r-- | string/aarch64/experimental/strcpy-sve.S | 67 | ||||
| -rw-r--r-- | string/aarch64/experimental/strlen-sve.S | 52 | ||||
| -rw-r--r-- | string/aarch64/experimental/strncmp-sve.S | 64 | ||||
| -rw-r--r-- | string/aarch64/experimental/strnlen-sve.S | 70 | ||||
| -rw-r--r-- | string/aarch64/experimental/strrchr-sve.S | 81 |
11 files changed, 581 insertions, 0 deletions
diff --git a/string/aarch64/experimental/memchr-sve.S b/string/aarch64/experimental/memchr-sve.S new file mode 100644 index 000000000000..b314551f3e0f --- /dev/null +++ b/string/aarch64/experimental/memchr-sve.S @@ -0,0 +1,60 @@ +/* + * memchr - find a character in a memory zone + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__memchr_aarch64_sve) + dup z1.b, w1 /* duplicate c to a vector */ + setffr /* initialize FFR */ + mov x3, 0 /* initialize off */ + + .p2align 4 +0: whilelo p1.b, x3, x2 /* make sure off < max */ + b.none 9f + + /* Read a vector's worth of bytes, bounded by max, + stopping on first fault. */ + ldff1b z0.b, p1/z, [x0, x3] + rdffrs p0.b, p1/z + b.nlast 2f + + /* First fault did not fail: the vector bounded by max is valid. + Avoid depending on the contents of FFR beyond the branch. */ + incb x3 /* speculate increment */ + cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */ + b.none 0b + decb x3 /* undo speculate */ + + /* Found C. */ +1: brkb p2.b, p1/z, p2.b /* find the first c */ + add x0, x0, x3 /* form partial pointer */ + incp x0, p2.b /* form final pointer to c */ + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparision only on the valid bytes. */ +2: cmpeq p2.b, p0/z, z0.b, z1.b + b.any 1b + + /* No C found. Re-init FFR, increment, and loop. */ + setffr + incp x3, p0.b + b 0b + + /* Found end of count. */ +9: mov x0, 0 /* return null */ + ret + +END (__memchr_aarch64_sve) diff --git a/string/aarch64/experimental/memcmp-sve.S b/string/aarch64/experimental/memcmp-sve.S new file mode 100644 index 000000000000..ad3534836d04 --- /dev/null +++ b/string/aarch64/experimental/memcmp-sve.S @@ -0,0 +1,46 @@ +/* + * memcmp - compare memory + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__memcmp_aarch64_sve) + mov x3, 0 /* initialize off */ + +0: whilelo p0.b, x3, x2 /* while off < max */ + b.none 9f + + ld1b z0.b, p0/z, [x0, x3] /* read vectors bounded by max. */ + ld1b z1.b, p0/z, [x1, x3] + + /* Increment for a whole vector, even if we've only read a partial. + This is significantly cheaper than INCP, and since OFF is not + used after the loop it is ok to increment OFF past MAX. */ + incb x3 + + cmpne p1.b, p0/z, z0.b, z1.b /* while no inequalities */ + b.none 0b + + /* Found inequality. */ +1: brkb p1.b, p0/z, p1.b /* find first such */ + lasta w0, p1, z0.b /* extract each byte */ + lasta w1, p1, z1.b + sub x0, x0, x1 /* return comparison */ + ret + + /* Found end-of-count. */ +9: mov x0, 0 /* return equality */ + ret + +END (__memcmp_aarch64_sve) diff --git a/string/aarch64/experimental/stpcpy-sve.S b/string/aarch64/experimental/stpcpy-sve.S new file mode 100644 index 000000000000..5d3f14b86026 --- /dev/null +++ b/string/aarch64/experimental/stpcpy-sve.S @@ -0,0 +1,10 @@ +/* + * stpcpy - copy a string returning pointer to end. + * + * Copyright (c) 2020, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#define BUILD_STPCPY 1 + +#include "strcpy-sve.S" diff --git a/string/aarch64/experimental/strchr-sve.S b/string/aarch64/experimental/strchr-sve.S new file mode 100644 index 000000000000..7d74ae9ff232 --- /dev/null +++ b/string/aarch64/experimental/strchr-sve.S @@ -0,0 +1,67 @@ +/* + * strchr/strchrnul - find a character in a string + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +/* To build as strchrnul, define BUILD_STRCHRNUL before compiling this file. */ +#ifdef BUILD_STRCHRNUL +#define FUNC __strchrnul_aarch64_sve +#else +#define FUNC __strchr_aarch64_sve +#endif + +ENTRY (FUNC) + dup z1.b, w1 /* replicate byte across vector */ + setffr /* initialize FFR */ + ptrue p1.b /* all ones; loop invariant */ + + .p2align 4 + /* Read a vector's worth of bytes, stopping on first fault. */ +0: ldff1b z0.b, p1/z, [x0, xzr] + rdffrs p0.b, p1/z + b.nlast 2f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contents of FFR beyond the branch. */ + incb x0 /* speculate increment */ + cmpeq p2.b, p1/z, z0.b, z1.b /* search for c */ + cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */ + orrs p4.b, p1/z, p2.b, p3.b /* c | 0 */ + b.none 0b + decb x0 /* undo speculate */ + + /* Found C or 0. */ +1: brka p4.b, p1/z, p4.b /* find first such */ + sub x0, x0, 1 /* adjust pointer for that byte */ + incp x0, p4.b +#ifndef BUILD_STRCHRNUL + ptest p4, p2.b /* was first in c? */ + csel x0, xzr, x0, none /* if there was no c, return null */ +#endif + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparision only on the valid bytes. */ +2: cmpeq p2.b, p0/z, z0.b, z1.b /* search for c */ + cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */ + orrs p4.b, p0/z, p2.b, p3.b /* c | 0 */ + b.any 1b + + /* No C or 0 found. Re-init FFR, increment, and loop. */ + setffr + incp x0, p0.b + b 0b + +END (FUNC) diff --git a/string/aarch64/experimental/strchrnul-sve.S b/string/aarch64/experimental/strchrnul-sve.S new file mode 100644 index 000000000000..0005f9177514 --- /dev/null +++ b/string/aarch64/experimental/strchrnul-sve.S @@ -0,0 +1,9 @@ +/* + * strchrnul - find a character or nul in a string + * + * Copyright (c) 2018-2019, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#define BUILD_STRCHRNUL +#include "strchr-sve.S" diff --git a/string/aarch64/experimental/strcmp-sve.S b/string/aarch64/experimental/strcmp-sve.S new file mode 100644 index 000000000000..b6c249588534 --- /dev/null +++ b/string/aarch64/experimental/strcmp-sve.S @@ -0,0 +1,55 @@ +/* + * __strcmp_aarch64_sve - compare two strings + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__strcmp_aarch64_sve) + setffr /* initialize FFR */ + ptrue p1.b, all /* all ones; loop invariant */ + mov x2, 0 /* initialize offset */ + + /* Read a vector's worth of bytes, stopping on first fault. */ + .p2align 4 +0: ldff1b z0.b, p1/z, [x0, x2] + ldff1b z1.b, p1/z, [x1, x2] + rdffrs p0.b, p1/z + b.nlast 2f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contents of FFR beyond the branch. */ + incb x2, all /* skip bytes for next round */ + cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings */ + cmpne p3.b, p1/z, z0.b, 0 /* search for ~zero */ + nands p2.b, p1/z, p2.b, p3.b /* ~(eq & ~zero) -> ne | zero */ + b.none 0b + + /* Found end-of-string or inequality. */ +1: brkb p2.b, p1/z, p2.b /* find first such */ + lasta w0, p2, z0.b /* extract each char */ + lasta w1, p2, z1.b + sub x0, x0, x1 /* return comparison */ + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparison only on the valid bytes. */ +2: incp x2, p0.b /* skip bytes for next round */ + setffr /* re-init FFR for next round */ + cmpeq p2.b, p0/z, z0.b, z1.b /* compare strings, as above */ + cmpne p3.b, p0/z, z0.b, 0 + nands p2.b, p0/z, p2.b, p3.b + b.none 0b + b 1b + +END (__strcmp_aarch64_sve) diff --git a/string/aarch64/experimental/strcpy-sve.S b/string/aarch64/experimental/strcpy-sve.S new file mode 100644 index 000000000000..57b77c8a00e7 --- /dev/null +++ b/string/aarch64/experimental/strcpy-sve.S @@ -0,0 +1,67 @@ +/* + * strcpy/stpcpy - copy a string returning pointer to start/end. + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +/* To build as stpcpy, define BUILD_STPCPY before compiling this file. */ +#ifdef BUILD_STPCPY +#define FUNC __stpcpy_aarch64_sve +#else +#define FUNC __strcpy_aarch64_sve +#endif + +ENTRY (FUNC) + setffr /* initialize FFR */ + ptrue p2.b, all /* all ones; loop invariant */ + mov x2, 0 /* initialize offset */ + + .p2align 4 + /* Read a vector's worth of bytes, stopping on first fault. */ +0: ldff1b z0.b, p2/z, [x1, x2] + rdffrs p0.b, p2/z + b.nlast 1f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contexts of FFR beyond the branch. */ + cmpeq p1.b, p2/z, z0.b, 0 /* search for zeros */ + b.any 2f + + /* No zero found. Store the whole vector and loop. */ + st1b z0.b, p2, [x0, x2] + incb x2, all + b 0b + + /* First fault failed: only some of the vector is valid. + Perform the comparison only on the valid bytes. */ +1: cmpeq p1.b, p0/z, z0.b, 0 /* search for zeros */ + b.any 2f + + /* No zero found. Store the valid portion of the vector and loop. */ + setffr /* re-init FFR */ + st1b z0.b, p0, [x0, x2] + incp x2, p0.b + b 0b + + /* Zero found. Crop the vector to the found zero and finish. */ +2: brka p0.b, p2/z, p1.b + st1b z0.b, p0, [x0, x2] +#ifdef BUILD_STPCPY + add x0, x0, x2 + sub x0, x0, 1 + incp x0, p0.b +#endif + ret + +END (FUNC) diff --git a/string/aarch64/experimental/strlen-sve.S b/string/aarch64/experimental/strlen-sve.S new file mode 100644 index 000000000000..c83155052c07 --- /dev/null +++ b/string/aarch64/experimental/strlen-sve.S @@ -0,0 +1,52 @@ +/* + * __strlen_aarch64_sve - compute the length of a string + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__strlen_aarch64_sve) + setffr /* initialize FFR */ + ptrue p2.b /* all ones; loop invariant */ + mov x1, 0 /* initialize length */ + + /* Read a vector's worth of bytes, stopping on first fault. */ + .p2align 4 +0: ldff1b z0.b, p2/z, [x0, x1] + rdffrs p0.b, p2/z + b.nlast 2f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contents of FFR beyond the branch. */ + incb x1, all /* speculate increment */ + cmpeq p1.b, p2/z, z0.b, 0 /* loop if no zeros */ + b.none 0b + decb x1, all /* undo speculate */ + + /* Zero found. Select the bytes before the first and count them. */ +1: brkb p0.b, p2/z, p1.b + incp x1, p0.b + mov x0, x1 + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparison only on the valid bytes. */ +2: cmpeq p1.b, p0/z, z0.b, 0 + b.any 1b + + /* No zero found. Re-init FFR, increment, and loop. */ + setffr + incp x1, p0.b + b 0b + +END (__strlen_aarch64_sve) diff --git a/string/aarch64/experimental/strncmp-sve.S b/string/aarch64/experimental/strncmp-sve.S new file mode 100644 index 000000000000..a281e642d8aa --- /dev/null +++ b/string/aarch64/experimental/strncmp-sve.S @@ -0,0 +1,64 @@ +/* + * strncmp - compare two strings with limit + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__strncmp_aarch64_sve) + setffr /* initialize FFR */ + mov x3, 0 /* initialize off */ + +0: whilelo p0.b, x3, x2 /* while off < max */ + b.none 9f + + ldff1b z0.b, p0/z, [x0, x3] + ldff1b z1.b, p0/z, [x1, x3] + rdffrs p1.b, p0/z + b.nlast 2f + + /* First fault did not fail: the vector up to max is valid. + Avoid depending on the contents of FFR beyond the branch. + Increment for a whole vector, even if we've only read a partial. + This is significantly cheaper than INCP, and since OFF is not + used after the loop it is ok to increment OFF past MAX. */ + incb x3 + cmpeq p1.b, p0/z, z0.b, z1.b /* compare strings */ + cmpne p2.b, p0/z, z0.b, 0 /* search for ~zero */ + nands p2.b, p0/z, p1.b, p2.b /* ~(eq & ~zero) -> ne | zero */ + b.none 0b + + /* Found end-of-string or inequality. */ +1: brkb p2.b, p0/z, p2.b /* find first such */ + lasta w0, p2, z0.b /* extract each char */ + lasta w1, p2, z1.b + sub x0, x0, x1 /* return comparison */ + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparison only on the valid bytes. */ +2: cmpeq p2.b, p1/z, z0.b, z1.b /* compare strings, as above */ + cmpne p3.b, p1/z, z0.b, 0 + nands p2.b, p1/z, p2.b, p3.b + b.any 1b + + /* No inequality or zero found. Re-init FFR, incr and loop. */ + setffr + incp x3, p1.b + b 0b + + /* Found end-of-count. */ +9: mov x0, 0 /* return equal */ + ret + +END (__strncmp_aarch64_sve) diff --git a/string/aarch64/experimental/strnlen-sve.S b/string/aarch64/experimental/strnlen-sve.S new file mode 100644 index 000000000000..11d835a1b13c --- /dev/null +++ b/string/aarch64/experimental/strnlen-sve.S @@ -0,0 +1,70 @@ +/* + * strnlen - calculate the length of a string with limit. + * + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__strnlen_aarch64_sve) + setffr /* initialize FFR */ + mov x2, 0 /* initialize len */ + b 1f + + .p2align 4 + /* We have off + vl <= max, and so may read the whole vector. */ +0: ldff1b z0.b, p0/z, [x0, x2] + rdffrs p1.b, p0/z + b.nlast 2f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contents of FFR beyond the branch. */ + cmpeq p2.b, p0/z, z0.b, 0 + b.any 8f + incb x2 + +1: whilelo p0.b, x2, x1 + b.last 0b + + /* We have off + vl < max. Test for off == max before proceeding. */ + b.none 9f + + ldff1b z0.b, p0/z, [x0, x2] + rdffrs p1.b, p0/z + b.nlast 2f + + /* First fault did not fail: the vector up to max is valid. + Avoid depending on the contents of FFR beyond the branch. + Compare for end-of-string, but there are no more bytes. */ + cmpeq p2.b, p0/z, z0.b, 0 + + /* Found end-of-string or zero. */ +8: brkb p2.b, p0/z, p2.b + mov x0, x2 + incp x0, p2.b + ret + + /* First fault failed: only some of the vector is valid. + Perform the comparison only on the valid bytes. */ +2: cmpeq p2.b, p1/z, z0.b, 0 + b.any 8b + + /* No inequality or zero found. Re-init FFR, incr and loop. */ + setffr + incp x2, p1.b + b 1b + + /* End of count. Return max. */ +9: mov x0, x1 + ret + +END (__strnlen_aarch64_sve) diff --git a/string/aarch64/experimental/strrchr-sve.S b/string/aarch64/experimental/strrchr-sve.S new file mode 100644 index 000000000000..731edaddf156 --- /dev/null +++ b/string/aarch64/experimental/strrchr-sve.S @@ -0,0 +1,81 @@ +/* + * strrchr - find the last of a character in a string + * + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +ENTRY (__strrchr_aarch64_sve) + dup z1.b, w1 /* replicate byte across vector */ + setffr /* initialize FFR */ + ptrue p1.b /* all ones; loop invariant */ + mov x2, 0 /* no match found so far */ + pfalse p2.b + + .p2align 4 + /* Read a vector's worth of bytes, stopping on first fault. */ +0: ldff1b z0.b, p1/z, [x0, xzr] + rdffrs p0.b, p1/z + b.nlast 1f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contents of FFR beyond the branch. */ + incb x0, all /* skip bytes this round */ + cmpeq p3.b, p1/z, z0.b, 0 /* search for 0 */ + b.any 3f + + cmpeq p3.b, p1/z, z0.b, z1.b /* search for c; no eos */ + b.none 0b + + mov x2, x0 /* save advanced base */ + mov p2.b, p3.b /* save current search */ + b 0b + + /* First fault failed: only some of the vector is valid. + Perform the comparisions only on the valid bytes. */ +1: cmpeq p3.b, p0/z, z0.b, 0 /* search for 0 */ + b.any 2f + + cmpeq p3.b, p0/z, z0.b, z1.b /* search for c; no eos */ + mov x3, x0 + incp x0, p0.b /* skip bytes this round */ + setffr /* re-init FFR */ + b.none 0b + + addvl x2, x3, 1 /* save advanced base */ + mov p2.b, p3.b /* save current search */ + b 0b + + /* Found end-of-string. */ +2: incb x0, all /* advance base */ +3: brka p3.b, p1/z, p3.b /* mask after first 0 */ + cmpeq p3.b, p3/z, z0.b, z1.b /* search for c not after eos */ + b.any 4f + + /* No C within last vector. Did we have one before? */ + cbz x2, 5f + mov x0, x2 /* restore advanced base */ + mov p3.b, p2.b /* restore saved search */ + + /* Find the *last* match in the predicate. This is slightly + more complicated than finding the first match. */ +4: rev p3.b, p3.b /* reverse the bits */ + brka p3.b, p1/z, p3.b /* find position of last match */ + decp x0, p3.b /* retard pointer to last match */ + ret + + /* No C whatsoever. Return NULL. */ +5: mov x0, 0 + ret + +END (__strrchr_aarch64_sve) |
