diff options
| author | Andrew Turner <andrew@FreeBSD.org> | 2025-01-10 10:34:52 +0000 |
|---|---|---|
| committer | Andrew Turner <andrew@FreeBSD.org> | 2025-01-10 10:39:34 +0000 |
| commit | 9d1de25930735261c16ed874a933b4c1f1d9041e (patch) | |
| tree | b0cac1c933cc1ecb885c7e757b89ffbf13f1f012 /string/aarch64/experimental/strcpy-sve.S | |
| parent | edc5c0de794f521eb620d2b6cbaee2434442a8f3 (diff) | |
Update the Arm Optimized Routinesvendor/arm-optimized-routines/v25.01vendor/arm-optimized-routines
Import the v25.01 release of the Arm Optimized Routines [1].
[1] https://github.com/ARM-software/optimized-routines/tree/v25.01
Sponsored by: Arm Ltd
Diffstat (limited to 'string/aarch64/experimental/strcpy-sve.S')
| -rw-r--r-- | string/aarch64/experimental/strcpy-sve.S | 67 |
1 files changed, 67 insertions, 0 deletions
diff --git a/string/aarch64/experimental/strcpy-sve.S b/string/aarch64/experimental/strcpy-sve.S new file mode 100644 index 000000000000..57b77c8a00e7 --- /dev/null +++ b/string/aarch64/experimental/strcpy-sve.S @@ -0,0 +1,67 @@ +/* + * strcpy/stpcpy - copy a string returning pointer to start/end. + * + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#include "asmdefs.h" + +.arch armv8-a+sve + +/* Assumptions: + * + * ARMv8-a, AArch64 + * SVE Available. + */ + +/* To build as stpcpy, define BUILD_STPCPY before compiling this file. */ +#ifdef BUILD_STPCPY +#define FUNC __stpcpy_aarch64_sve +#else +#define FUNC __strcpy_aarch64_sve +#endif + +ENTRY (FUNC) + setffr /* initialize FFR */ + ptrue p2.b, all /* all ones; loop invariant */ + mov x2, 0 /* initialize offset */ + + .p2align 4 + /* Read a vector's worth of bytes, stopping on first fault. */ +0: ldff1b z0.b, p2/z, [x1, x2] + rdffrs p0.b, p2/z + b.nlast 1f + + /* First fault did not fail: the whole vector is valid. + Avoid depending on the contexts of FFR beyond the branch. */ + cmpeq p1.b, p2/z, z0.b, 0 /* search for zeros */ + b.any 2f + + /* No zero found. Store the whole vector and loop. */ + st1b z0.b, p2, [x0, x2] + incb x2, all + b 0b + + /* First fault failed: only some of the vector is valid. + Perform the comparison only on the valid bytes. */ +1: cmpeq p1.b, p0/z, z0.b, 0 /* search for zeros */ + b.any 2f + + /* No zero found. Store the valid portion of the vector and loop. */ + setffr /* re-init FFR */ + st1b z0.b, p0, [x0, x2] + incp x2, p0.b + b 0b + + /* Zero found. Crop the vector to the found zero and finish. */ +2: brka p0.b, p2/z, p1.b + st1b z0.b, p0, [x0, x2] +#ifdef BUILD_STPCPY + add x0, x0, x2 + sub x0, x0, 1 + incp x0, p0.b +#endif + ret + +END (FUNC) |
