diff options
Diffstat (limited to 'contrib/arm-optimized-routines/string')
72 files changed, 954 insertions, 428 deletions
diff --git a/contrib/arm-optimized-routines/string/Dir.mk b/contrib/arm-optimized-routines/string/Dir.mk index cf3453f7580d..40ff5acc093e 100644 --- a/contrib/arm-optimized-routines/string/Dir.mk +++ b/contrib/arm-optimized-routines/string/Dir.mk @@ -1,7 +1,7 @@ # Makefile fragment - requires GNU make # # Copyright (c) 2019-2021, Arm Limited. -# SPDX-License-Identifier: MIT +# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception S := $(srcdir)/string B := build/string diff --git a/contrib/arm-optimized-routines/string/README.contributors b/contrib/arm-optimized-routines/string/README.contributors new file mode 100644 index 000000000000..0b4a51b56366 --- /dev/null +++ b/contrib/arm-optimized-routines/string/README.contributors @@ -0,0 +1,30 @@ +STYLE REQUIREMENTS +================== + +1. Most code in this sub-directory is expected to be upstreamed into glibc so + the GNU Coding Standard and glibc specific conventions should be followed + to ease upstreaming. + +2. ABI and symbols: the code should be written so it is suitable for inclusion + into a libc with minimal changes. This e.g. means that internal symbols + should be hidden and in the implementation reserved namespace according to + ISO C and POSIX rules. If possible the built shared libraries and static + library archives should be usable to override libc symbols at link time (or + at runtime via LD_PRELOAD). This requires the symbols to follow the glibc ABI + (other than symbol versioning), this cannot be done reliably for static + linking so this is a best effort requirement. + +3. API: include headers should be suitable for benchmarking and testing code + and should not conflict with libc headers. + + +CONTRIBUTION GUIDELINES FOR string SUB-DIRECTORY +================================================ +1. Code: + - The assumptions of the code must be clearly documented. + + - Assembly style should be consistent across different implementations. + + +2. Performance: + - Benchmarking is needed on several microarchitectures. diff --git a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S index 84339f73cf23..207e22950c6d 100644 --- a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S +++ b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S @@ -1,8 +1,8 @@ /* * __mtag_tag_region - tag memory * - * Copyright (c) 2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2021-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -15,7 +15,7 @@ * The memory region may remain untagged if tagging is not enabled. */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_MEMORY_TAGGING diff --git a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S index f58364ca6fcb..44b8e0114f42 100644 --- a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S +++ b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S @@ -1,8 +1,8 @@ /* * __mtag_tag_zero_region - tag memory and fill it with zero bytes * - * Copyright (c) 2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2021-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -15,7 +15,7 @@ * The memory region may remain untagged if tagging is not enabled. */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_MEMORY_TAGGING diff --git a/contrib/arm-optimized-routines/string/asmdefs.h b/contrib/arm-optimized-routines/string/aarch64/asmdefs.h index 340b427a505b..069b146f4a69 100644 --- a/contrib/arm-optimized-routines/string/asmdefs.h +++ b/contrib/arm-optimized-routines/string/aarch64/asmdefs.h @@ -1,15 +1,13 @@ /* - * Macros for asm code. + * Macros for asm code. AArch64 version. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _ASMDEFS_H #define _ASMDEFS_H -#if defined(__aarch64__) - /* Branch Target Identitication support. */ #define BTI_C hint 34 #define BTI_J hint 36 @@ -55,19 +53,6 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC) .cfi_startproc; \ BTI_C; -#else - -#define END_FILE - -#define ENTRY_ALIGN(name, alignment) \ - .global name; \ - .type name,%function; \ - .align alignment; \ - name: \ - .cfi_startproc; - -#endif - #define ENTRY(name) ENTRY_ALIGN(name, 6) #define ENTRY_ALIAS(name) \ @@ -95,4 +80,13 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC) #define SIZE_ARG(n) #endif +/* Compiler supports SVE instructions */ +#ifndef HAVE_SVE +# if __aarch64__ && (__GNUC__ >= 8 || __clang_major__ >= 5) +# define HAVE_SVE 1 +# else +# define HAVE_SVE 0 +# endif +#endif + #endif diff --git a/contrib/arm-optimized-routines/string/aarch64/check-arch.S b/contrib/arm-optimized-routines/string/aarch64/check-arch.S index 5a54242d7de6..131b7fa36ec2 100644 --- a/contrib/arm-optimized-routines/string/aarch64/check-arch.S +++ b/contrib/arm-optimized-routines/string/aarch64/check-arch.S @@ -1,8 +1,8 @@ /* * check ARCH setting. * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if !__aarch64__ @@ -10,4 +10,4 @@ #endif /* Include for GNU property notes. */ -#include "../asmdefs.h" +#include "asmdefs.h" diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S index c2e967d1004e..948c3cbc7dd4 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S +++ b/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S @@ -1,8 +1,8 @@ /* * memchr - find a character in a memory zone * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define chrin w1 @@ -23,25 +23,21 @@ #define synd x5 #define shift x6 #define tmp x7 -#define wtmp w7 #define vrepchr v0 #define qdata q1 #define vdata v1 #define vhas_chr v2 -#define vrepmask v3 -#define vend v4 -#define dend d4 +#define vend v3 +#define dend d3 /* Core algorithm: - - For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-3 are set if the relevant byte matched the - requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are - set likewise for odd bytes so that adjacent bytes can be merged. Since the - bits in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ + For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits + per byte. We take 4 bits of every comparison byte with shift right and narrow + by 4 instruction. Since the bits in the nibble mask reflect the order in + which things occur in the original string, counting leading zeros identifies + exactly which byte matched. */ ENTRY (__memchr_aarch64_mte) PTR_ARG (0) @@ -50,55 +46,53 @@ ENTRY (__memchr_aarch64_mte) cbz cntin, L(nomatch) ld1 {vdata.16b}, [src] dup vrepchr.16b, chrin - mov wtmp, 0xf00f - dup vrepmask.8h, wtmp cmeq vhas_chr.16b, vdata.16b, vrepchr.16b lsl shift, srcin, 2 - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ fmov synd, dend lsr synd, synd, shift cbz synd, L(start_loop) rbit synd, synd clz synd, synd - add result, srcin, synd, lsr 2 cmp cntin, synd, lsr 2 + add result, srcin, synd, lsr 2 csel result, result, xzr, hi ret + .p2align 3 L(start_loop): sub tmp, src, srcin - add tmp, tmp, 16 + add tmp, tmp, 17 subs cntrem, cntin, tmp - b.ls L(nomatch) + b.lo L(nomatch) /* Make sure that it won't overread by a 16-byte chunk */ - add tmp, cntrem, 15 - tbnz tmp, 4, L(loop32_2) - + tbz cntrem, 4, L(loop32_2) + sub src, src, 16 .p2align 4 L(loop32): - ldr qdata, [src, 16]! + ldr qdata, [src, 32]! cmeq vhas_chr.16b, vdata.16b, vrepchr.16b umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ fmov synd, dend cbnz synd, L(end) L(loop32_2): - ldr qdata, [src, 16]! - subs cntrem, cntrem, 32 + ldr qdata, [src, 16] cmeq vhas_chr.16b, vdata.16b, vrepchr.16b - b.ls L(end) + subs cntrem, cntrem, 32 + b.lo L(end_2) umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ fmov synd, dend cbz synd, L(loop32) +L(end_2): + add src, src, 16 L(end): - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ + sub cntrem, src, srcin fmov synd, dend - add tmp, srcin, cntin - sub cntrem, tmp, src + sub cntrem, cntin, cntrem #ifndef __AARCH64EB__ rbit synd, synd #endif diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S index c22e6596f19b..b851cf31f238 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S @@ -1,11 +1,11 @@ /* * memchr - find a character in a memory zone * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr.S b/contrib/arm-optimized-routines/string/aarch64/memchr.S index 353f0d1eac53..fe6cfe2bc0e2 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memchr.S +++ b/contrib/arm-optimized-routines/string/aarch64/memchr.S @@ -1,8 +1,8 @@ /* * memchr - find a character in a memory zone * - * Copyright (c) 2014-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2014-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * Neon Available. */ -#include "../asmdefs.h" +#include "asmdefs.h" /* Arguments and results. */ #define srcin x0 diff --git a/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S index 78c5ecaa4cdc..d52ce4555344 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S @@ -1,11 +1,11 @@ /* * memcmp - compare memory * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/memcmp.S b/contrib/arm-optimized-routines/string/aarch64/memcmp.S index 7ca1135edec7..35135e72cc8e 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memcmp.S +++ b/contrib/arm-optimized-routines/string/aarch64/memcmp.S @@ -1,7 +1,7 @@ /* memcmp - compare memory * - * Copyright (c) 2013-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2013-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -9,7 +9,7 @@ * ARMv8-a, AArch64, Advanced SIMD, unaligned accesses. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define src1 x0 #define src2 x1 diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S b/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S index f97f2c3047b9..e6527d0dac2c 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S +++ b/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S @@ -1,8 +1,8 @@ /* * memcpy - copy memory area * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * */ -#include "../asmdefs.h" +#include "asmdefs.h" #define dstin x0 #define src x1 diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S index f85e8009f3c5..e8a946d7db37 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S @@ -1,8 +1,8 @@ /* * memcpy - copy memory area * - * Copyright (c) 2019-2022, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,9 +11,11 @@ * */ -#if __ARM_FEATURE_SVE +#include "asmdefs.h" -#include "../asmdefs.h" +#ifdef HAVE_SVE + +.arch armv8-a+sve #define dstin x0 #define src x1 @@ -55,14 +57,16 @@ ENTRY (__memcpy_aarch64_sve) cmp count, 128 b.hi L(copy_long) - cmp count, 32 + cntb vlen + cmp count, vlen, lsl 1 b.hi L(copy32_128) whilelo p0.b, xzr, count - cntb vlen - tbnz vlen, 4, L(vlen128) - ld1b z0.b, p0/z, [src] - st1b z0.b, p0, [dstin] + whilelo p1.b, vlen, count + ld1b z0.b, p0/z, [src, 0, mul vl] + ld1b z1.b, p1/z, [src, 1, mul vl] + st1b z0.b, p0, [dstin, 0, mul vl] + st1b z1.b, p1, [dstin, 1, mul vl] ret /* Medium copies: 33..128 bytes. */ @@ -131,14 +135,6 @@ L(copy64_from_end): stp A_q, B_q, [dstend, -32] ret -L(vlen128): - whilelo p1.b, vlen, count - ld1b z0.b, p0/z, [src, 0, mul vl] - ld1b z1.b, p1/z, [src, 1, mul vl] - st1b z0.b, p0, [dstin, 0, mul vl] - st1b z1.b, p1, [dstin, 1, mul vl] - ret - /* Large backwards copy for overlapping copies. Copy 16 bytes and then align srcend to 16-byte alignment. */ L(copy_long_backwards): @@ -177,4 +173,5 @@ L(return): ret END (__memcpy_aarch64_sve) + #endif diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy.S b/contrib/arm-optimized-routines/string/aarch64/memcpy.S index dd254f6f9929..7c0606e2104a 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memcpy.S +++ b/contrib/arm-optimized-routines/string/aarch64/memcpy.S @@ -1,8 +1,8 @@ /* * memcpy - copy memory area * - * Copyright (c) 2012-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2012-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * */ -#include "../asmdefs.h" +#include "asmdefs.h" #define dstin x0 #define src x1 diff --git a/contrib/arm-optimized-routines/string/aarch64/memrchr.S b/contrib/arm-optimized-routines/string/aarch64/memrchr.S index 7b4be847cecb..6418bdf56f41 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memrchr.S +++ b/contrib/arm-optimized-routines/string/aarch64/memrchr.S @@ -1,8 +1,8 @@ /* * memrchr - find last character in a memory zone. * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define chrin w1 @@ -23,7 +23,6 @@ #define synd x5 #define shift x6 #define tmp x7 -#define wtmp w7 #define end x8 #define endm1 x9 @@ -31,19 +30,16 @@ #define qdata q1 #define vdata v1 #define vhas_chr v2 -#define vrepmask v3 -#define vend v4 -#define dend d4 +#define vend v3 +#define dend d3 /* Core algorithm: - - For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-3 are set if the relevant byte matched the - requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are - set likewise for odd bytes so that adjacent bytes can be merged. Since the - bits in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ + For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits + per byte. We take 4 bits of every comparison byte with shift right and narrow + by 4 instruction. Since the bits in the nibble mask reflect the order in + which things occur in the original string, counting leading zeros identifies + exactly which byte matched. */ ENTRY (__memrchr_aarch64) PTR_ARG (0) @@ -53,12 +49,9 @@ ENTRY (__memrchr_aarch64) cbz cntin, L(nomatch) ld1 {vdata.16b}, [src] dup vrepchr.16b, chrin - mov wtmp, 0xf00f - dup vrepmask.8h, wtmp cmeq vhas_chr.16b, vdata.16b, vrepchr.16b neg shift, end, lsl 2 - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ fmov synd, dend lsl synd, synd, shift cbz synd, L(start_loop) @@ -69,34 +62,36 @@ ENTRY (__memrchr_aarch64) csel result, result, xzr, hi ret + nop L(start_loop): - sub tmp, end, src - subs cntrem, cntin, tmp + subs cntrem, src, srcin b.ls L(nomatch) /* Make sure that it won't overread by a 16-byte chunk */ - add tmp, cntrem, 15 - tbnz tmp, 4, L(loop32_2) + sub cntrem, cntrem, 1 + tbz cntrem, 4, L(loop32_2) + add src, src, 16 - .p2align 4 + .p2align 5 L(loop32): - ldr qdata, [src, -16]! + ldr qdata, [src, -32]! cmeq vhas_chr.16b, vdata.16b, vrepchr.16b umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ fmov synd, dend cbnz synd, L(end) L(loop32_2): - ldr qdata, [src, -16]! + ldr qdata, [src, -16] subs cntrem, cntrem, 32 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b - b.ls L(end) + b.lo L(end_2) umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ fmov synd, dend cbz synd, L(loop32) +L(end_2): + sub src, src, 16 L(end): - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ fmov synd, dend add tmp, src, 15 diff --git a/contrib/arm-optimized-routines/string/aarch64/memset.S b/contrib/arm-optimized-routines/string/aarch64/memset.S index 9fcd97579913..553b0fcaefea 100644 --- a/contrib/arm-optimized-routines/string/aarch64/memset.S +++ b/contrib/arm-optimized-routines/string/aarch64/memset.S @@ -1,8 +1,8 @@ /* * memset - fill memory with a constant byte * - * Copyright (c) 2012-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2012-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * */ -#include "../asmdefs.h" +#include "asmdefs.h" #define dstin x0 #define val x1 diff --git a/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S index 82dd9717b0a0..5d3f14b86026 100644 --- a/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S @@ -2,7 +2,7 @@ * stpcpy - copy a string returning pointer to end. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define BUILD_STPCPY 1 diff --git a/contrib/arm-optimized-routines/string/aarch64/stpcpy.S b/contrib/arm-optimized-routines/string/aarch64/stpcpy.S index 4f62aa462389..155c68d75a7b 100644 --- a/contrib/arm-optimized-routines/string/aarch64/stpcpy.S +++ b/contrib/arm-optimized-routines/string/aarch64/stpcpy.S @@ -2,7 +2,7 @@ * stpcpy - copy a string returning pointer to end. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define BUILD_STPCPY 1 diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S index dcb0e4625870..6ec08f7acc76 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S +++ b/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S @@ -1,8 +1,8 @@ /* * strchr - find a character in a string * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define chrin w1 @@ -19,8 +19,7 @@ #define src x2 #define tmp1 x1 -#define wtmp2 w3 -#define tmp3 x3 +#define tmp2 x3 #define vrepchr v0 #define vdata v1 @@ -28,39 +27,30 @@ #define vhas_nul v2 #define vhas_chr v3 #define vrepmask v4 -#define vrepmask2 v5 -#define vend v6 -#define dend d6 +#define vend v5 +#define dend d5 /* Core algorithm. For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-1 are set if the relevant byte matched the - requested character, bits 2-3 are set if the byte is NUL (or matched), and - bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd - bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits - in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ + per byte. Bits 0-1 are set if the relevant byte matched the requested + character, bits 2-3 are set if the byte is NUL or matched. Count trailing + zeroes gives the position of the matching byte if it is a multiple of 4. + If it is not a multiple of 4, there was no match. */ ENTRY (__strchr_aarch64_mte) PTR_ARG (0) bic src, srcin, 15 dup vrepchr.16b, chrin ld1 {vdata.16b}, [src] - mov wtmp2, 0x3003 - dup vrepmask.8h, wtmp2 + movi vrepmask.16b, 0x33 cmeq vhas_nul.16b, vdata.16b, 0 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b - mov wtmp2, 0xf00f - dup vrepmask2.8h, wtmp2 - bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b - and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b - lsl tmp3, srcin, 2 - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ - + lsl tmp2, srcin, 2 + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov tmp1, dend - lsr tmp1, tmp1, tmp3 + lsr tmp1, tmp1, tmp2 cbz tmp1, L(loop) rbit tmp1, tmp1 @@ -74,28 +64,34 @@ ENTRY (__strchr_aarch64_mte) .p2align 4 L(loop): - ldr qdata, [src, 16]! + ldr qdata, [src, 16] + cmeq vhas_chr.16b, vdata.16b, vrepchr.16b + cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b + umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + fmov tmp1, dend + cbnz tmp1, L(end) + ldr qdata, [src, 32]! cmeq vhas_chr.16b, vdata.16b, vrepchr.16b cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b umaxp vend.16b, vhas_nul.16b, vhas_nul.16b fmov tmp1, dend cbz tmp1, L(loop) + sub src, src, 16 +L(end): #ifdef __AARCH64EB__ bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b - and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov tmp1, dend #else bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b - and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov tmp1, dend rbit tmp1, tmp1 #endif + add src, src, 16 clz tmp1, tmp1 - /* Tmp1 is an even multiple of 2 if the target character was - found first. Otherwise we've found the end of string. */ + /* Tmp1 is a multiple of 4 if the target character was found. */ tst tmp1, 2 add result, src, tmp1, lsr 2 csel result, result, xzr, eq diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S index 13ba9f44f9c5..ff075167bfef 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S @@ -1,11 +1,11 @@ /* * strchr/strchrnul - find a character in a string * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr.S b/contrib/arm-optimized-routines/string/aarch64/strchr.S index 1063cbfd77aa..37193bd947a7 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strchr.S +++ b/contrib/arm-optimized-routines/string/aarch64/strchr.S @@ -1,8 +1,8 @@ /* * strchr - find a character in a string * - * Copyright (c) 2014-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2014-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * Neon Available. */ -#include "../asmdefs.h" +#include "asmdefs.h" /* Arguments and results. */ #define srcin x0 diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S index 1b0d0a63094c..543ee88bb285 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S +++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S @@ -1,8 +1,8 @@ /* * strchrnul - find a character or nul in a string * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define chrin w1 @@ -20,38 +20,32 @@ #define src x2 #define tmp1 x1 #define tmp2 x3 -#define tmp2w w3 #define vrepchr v0 #define vdata v1 #define qdata q1 #define vhas_nul v2 #define vhas_chr v3 -#define vrepmask v4 -#define vend v5 -#define dend d5 +#define vend v4 +#define dend d4 -/* Core algorithm: - - For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-3 are set if the relevant byte matched the - requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are - set likewise for odd bytes so that adjacent bytes can be merged. Since the - bits in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ +/* + Core algorithm: + For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits + per byte. We take 4 bits of every comparison byte with shift right and narrow + by 4 instruction. Since the bits in the nibble mask reflect the order in + which things occur in the original string, counting leading zeros identifies + exactly which byte matched. */ ENTRY (__strchrnul_aarch64_mte) PTR_ARG (0) bic src, srcin, 15 dup vrepchr.16b, chrin ld1 {vdata.16b}, [src] - mov tmp2w, 0xf00f - dup vrepmask.8h, tmp2w cmeq vhas_chr.16b, vdata.16b, vrepchr.16b cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b lsl tmp2, srcin, 2 - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ fmov tmp1, dend lsr tmp1, tmp1, tmp2 /* Mask padding bits. */ cbz tmp1, L(loop) @@ -63,15 +57,22 @@ ENTRY (__strchrnul_aarch64_mte) .p2align 4 L(loop): - ldr qdata, [src, 16]! + ldr qdata, [src, 16] + cmeq vhas_chr.16b, vdata.16b, vrepchr.16b + cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b + umaxp vend.16b, vhas_chr.16b, vhas_chr.16b + fmov tmp1, dend + cbnz tmp1, L(end) + ldr qdata, [src, 32]! cmeq vhas_chr.16b, vdata.16b, vrepchr.16b cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b umaxp vend.16b, vhas_chr.16b, vhas_chr.16b fmov tmp1, dend cbz tmp1, L(loop) - - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ + sub src, src, 16 +L(end): + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ + add src, src, 16 fmov tmp1, dend #ifndef __AARCH64EB__ rbit tmp1, tmp1 diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S index 428ff1a3d008..0005f9177514 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S @@ -2,7 +2,7 @@ * strchrnul - find a character or nul in a string * * Copyright (c) 2018-2019, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define BUILD_STRCHRNUL diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul.S index a4230d919b47..666e8d0304c1 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strchrnul.S +++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul.S @@ -1,8 +1,8 @@ /* * strchrnul - find a character or nul in a string * - * Copyright (c) 2014-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2014-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * Neon Available. */ -#include "../asmdefs.h" +#include "asmdefs.h" /* Arguments and results. */ #define srcin x0 diff --git a/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S index e6d2da5411ca..eaf909a378f1 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S @@ -1,11 +1,11 @@ /* * __strcmp_aarch64_sve - compare two strings * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/strcmp.S b/contrib/arm-optimized-routines/string/aarch64/strcmp.S index 6e77845ae6ff..137a9aa06681 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strcmp.S +++ b/contrib/arm-optimized-routines/string/aarch64/strcmp.S @@ -2,7 +2,7 @@ * strcmp - compare two strings * * Copyright (c) 2012-2022, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ @@ -12,7 +12,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f diff --git a/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S index f515462e09ae..00e72dce4451 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S @@ -1,11 +1,11 @@ /* * strcpy/stpcpy - copy a string returning pointer to start/end. * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/strcpy.S b/contrib/arm-optimized-routines/string/aarch64/strcpy.S index b99e49403be8..97ae37ea4229 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strcpy.S +++ b/contrib/arm-optimized-routines/string/aarch64/strcpy.S @@ -1,8 +1,8 @@ /* * strcpy/stpcpy - copy a string returning pointer to start/end. * - * Copyright (c) 2020-2022, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define dstin x0 #define srcin x1 @@ -22,7 +22,6 @@ #define len x4 #define synd x4 #define tmp x5 -#define wtmp w5 #define shift x5 #define data1 x6 #define dataw1 w6 @@ -32,9 +31,8 @@ #define dataq q0 #define vdata v0 #define vhas_nul v1 -#define vrepmask v2 -#define vend v3 -#define dend d3 +#define vend v2 +#define dend d2 #define dataq2 q1 #ifdef BUILD_STPCPY @@ -45,34 +43,29 @@ # define IFSTPCPY(X,...) #endif -/* Core algorithm: - - For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-3 are set if the relevant byte matched the - requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are - set likewise for odd bytes so that adjacent bytes can be merged. Since the - bits in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ +/* + Core algorithm: + For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits + per byte. We take 4 bits of every comparison byte with shift right and narrow + by 4 instruction. Since the bits in the nibble mask reflect the order in + which things occur in the original string, counting leading zeros identifies + exactly which byte matched. */ ENTRY (STRCPY) PTR_ARG (0) PTR_ARG (1) bic src, srcin, 15 - mov wtmp, 0xf00f ld1 {vdata.16b}, [src] - dup vrepmask.8h, wtmp cmeq vhas_nul.16b, vdata.16b, 0 lsl shift, srcin, 2 - and vhas_nul.16b, vhas_nul.16b, vrepmask.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b + shrn vend.8b, vhas_nul.8h, 4 fmov synd, dend lsr synd, synd, shift cbnz synd, L(tail) ldr dataq, [src, 16]! cmeq vhas_nul.16b, vdata.16b, 0 - and vhas_nul.16b, vhas_nul.16b, vrepmask.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b + shrn vend.8b, vhas_nul.8h, 4 fmov synd, dend cbz synd, L(start_loop) @@ -91,13 +84,10 @@ ENTRY (STRCPY) IFSTPCPY (add result, dstin, len) ret - .p2align 4,,8 L(tail): rbit synd, synd clz len, synd lsr len, len, 2 - - .p2align 4 L(less16): tbz len, 3, L(less8) sub tmp, len, 7 @@ -130,32 +120,37 @@ L(zerobyte): .p2align 4 L(start_loop): - sub len, src, srcin + sub tmp, srcin, dstin ldr dataq2, [srcin] - add dst, dstin, len + sub dst, src, tmp str dataq2, [dstin] - - .p2align 5 L(loop): - str dataq, [dst], 16 - ldr dataq, [src, 16]! + str dataq, [dst], 32 + ldr dataq, [src, 16] + cmeq vhas_nul.16b, vdata.16b, 0 + umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + fmov synd, dend + cbnz synd, L(loopend) + str dataq, [dst, -16] + ldr dataq, [src, 32]! cmeq vhas_nul.16b, vdata.16b, 0 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b fmov synd, dend cbz synd, L(loop) - - and vhas_nul.16b, vhas_nul.16b, vrepmask.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ + add dst, dst, 16 +L(loopend): + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov synd, dend + sub dst, dst, 31 #ifndef __AARCH64EB__ rbit synd, synd #endif clz len, synd lsr len, len, 2 - sub tmp, len, 15 - ldr dataq, [src, tmp] - str dataq, [dst, tmp] - IFSTPCPY (add result, dst, len) + add dst, dst, len + ldr dataq, [dst, tmp] + str dataq, [dst] + IFSTPCPY (add result, dst, 15) ret END (STRCPY) diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S b/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S index 7cf41d5c1eac..77235797f7c5 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S +++ b/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S @@ -1,8 +1,8 @@ /* * strlen - calculate the length of a string. * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define result x0 @@ -19,35 +19,26 @@ #define src x1 #define synd x2 #define tmp x3 -#define wtmp w3 #define shift x4 #define data q0 #define vdata v0 #define vhas_nul v1 -#define vrepmask v2 -#define vend v3 -#define dend d3 +#define vend v2 +#define dend d2 /* Core algorithm: - - For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-3 are set if the relevant byte matched the - requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are - set likewise for odd bytes so that adjacent bytes can be merged. Since the - bits in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ + Process the string in 16-byte aligned chunks. Compute a 64-bit mask with + four bits per byte using the shrn instruction. A count trailing zeros then + identifies the first zero byte. */ ENTRY (__strlen_aarch64_mte) PTR_ARG (0) bic src, srcin, 15 - mov wtmp, 0xf00f ld1 {vdata.16b}, [src] - dup vrepmask.8h, wtmp cmeq vhas_nul.16b, vdata.16b, 0 lsl shift, srcin, 2 - and vhas_nul.16b, vhas_nul.16b, vrepmask.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ fmov synd, dend lsr synd, synd, shift cbz synd, L(loop) @@ -59,19 +50,25 @@ ENTRY (__strlen_aarch64_mte) .p2align 5 L(loop): - ldr data, [src, 16]! + ldr data, [src, 16] + cmeq vhas_nul.16b, vdata.16b, 0 + umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + fmov synd, dend + cbnz synd, L(loop_end) + ldr data, [src, 32]! cmeq vhas_nul.16b, vdata.16b, 0 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b fmov synd, dend cbz synd, L(loop) - - and vhas_nul.16b, vhas_nul.16b, vrepmask.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ + sub src, src, 16 +L(loop_end): + shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */ sub result, src, srcin fmov synd, dend #ifndef __AARCH64EB__ rbit synd, synd #endif + add result, result, 16 clz tmp, synd add result, result, tmp, lsr 2 ret diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S b/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S index 2392493f1a3c..12ebbdba5c93 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S @@ -1,11 +1,11 @@ /* * __strlen_aarch64_sve - compute the length of a string * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen.S b/contrib/arm-optimized-routines/string/aarch64/strlen.S index a1b164a49238..6f6f08f636b2 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strlen.S +++ b/contrib/arm-optimized-routines/string/aarch64/strlen.S @@ -1,8 +1,8 @@ /* * strlen - calculate the length of a string. * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * Not MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define len x0 @@ -36,6 +36,7 @@ #define tmp x2 #define tmpw w2 #define synd x3 +#define syndw w3 #define shift x4 /* For the first 32 bytes, NUL detection works on the principle that @@ -110,7 +111,6 @@ ENTRY (__strlen_aarch64) add len, len, tmp1, lsr 3 ret - .p2align 3 /* Look for a NUL byte at offset 16..31 in the string. */ L(bytes16_31): ldp data1, data2, [srcin, 16] @@ -138,6 +138,7 @@ L(bytes16_31): add len, len, tmp1, lsr 3 ret + nop L(loop_entry): bic src, srcin, 31 @@ -153,18 +154,12 @@ L(loop): /* Low 32 bits of synd are non-zero if a NUL was found in datav1. */ cmeq maskv.16b, datav1.16b, 0 sub len, src, srcin - tst synd, 0xffffffff - b.ne 1f + cbnz syndw, 1f cmeq maskv.16b, datav2.16b, 0 add len, len, 16 1: /* Generate a bitmask and compute correct byte offset. */ -#ifdef __AARCH64EB__ - bic maskv.8h, 0xf0 -#else - bic maskv.8h, 0x0f, lsl 8 -#endif - umaxp maskv.16b, maskv.16b, maskv.16b + shrn maskv.8b, maskv.8h, 4 fmov synd, maskd #ifndef __AARCH64EB__ rbit synd, synd @@ -173,8 +168,6 @@ L(loop): add len, len, tmp, lsr 2 ret - .p2align 4 - L(page_cross): bic src, srcin, 31 mov tmpw, 0x0c03 diff --git a/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S index 234190e245b0..6a9e9f7b6437 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S @@ -1,11 +1,11 @@ /* * strncmp - compare two strings with limit * - * Copyright (c) 2018-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2018-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/strncmp.S b/contrib/arm-optimized-routines/string/aarch64/strncmp.S index 7e636b4a593d..128a10c52bb1 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strncmp.S +++ b/contrib/arm-optimized-routines/string/aarch64/strncmp.S @@ -2,7 +2,7 @@ * strncmp - compare two strings * * Copyright (c) 2013-2022, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define REP8_01 0x0101010101010101 #define REP8_7f 0x7f7f7f7f7f7f7f7f diff --git a/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S b/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S index 5b9ebf7763bc..6c43dc427da7 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S @@ -1,11 +1,11 @@ /* * strnlen - calculate the length of a string with limit. * - * Copyright (c) 2019-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/strnlen.S b/contrib/arm-optimized-routines/string/aarch64/strnlen.S index 48d2495d2082..f2090a7485a5 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strnlen.S +++ b/contrib/arm-optimized-routines/string/aarch64/strnlen.S @@ -1,8 +1,8 @@ /* * strnlen - calculate the length of a string with limit. * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define cntin x1 @@ -20,39 +20,30 @@ #define src x2 #define synd x3 #define shift x4 -#define wtmp w4 #define tmp x4 #define cntrem x5 #define qdata q0 #define vdata v0 #define vhas_chr v1 -#define vrepmask v2 -#define vend v3 -#define dend d3 +#define vend v2 +#define dend d2 /* Core algorithm: - - For each 16-byte chunk we calculate a 64-bit syndrome value with four bits - per byte. For even bytes, bits 0-3 are set if the relevant byte matched the - requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are - set likewise for odd bytes so that adjacent bytes can be merged. Since the - bits in the syndrome reflect the order in which things occur in the original - string, counting trailing zeros identifies exactly which byte matched. */ + Process the string in 16-byte aligned chunks. Compute a 64-bit mask with + four bits per byte using the shrn instruction. A count trailing zeros then + identifies the first zero byte. */ ENTRY (__strnlen_aarch64) PTR_ARG (0) SIZE_ARG (1) bic src, srcin, 15 - mov wtmp, 0xf00f cbz cntin, L(nomatch) - ld1 {vdata.16b}, [src], 16 - dup vrepmask.8h, wtmp + ld1 {vdata.16b}, [src] cmeq vhas_chr.16b, vdata.16b, 0 lsl shift, srcin, 2 - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ fmov synd, dend lsr synd, synd, shift cbz synd, L(start_loop) @@ -64,37 +55,40 @@ L(finish): csel result, cntin, result, ls ret +L(nomatch): + mov result, cntin + ret + L(start_loop): sub tmp, src, srcin + add tmp, tmp, 17 subs cntrem, cntin, tmp - b.ls L(nomatch) + b.lo L(nomatch) /* Make sure that it won't overread by a 16-byte chunk */ - add tmp, cntrem, 15 - tbnz tmp, 4, L(loop32_2) - + tbz cntrem, 4, L(loop32_2) + sub src, src, 16 .p2align 5 L(loop32): - ldr qdata, [src], 16 + ldr qdata, [src, 32]! cmeq vhas_chr.16b, vdata.16b, 0 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ fmov synd, dend cbnz synd, L(end) L(loop32_2): - ldr qdata, [src], 16 + ldr qdata, [src, 16] subs cntrem, cntrem, 32 cmeq vhas_chr.16b, vdata.16b, 0 - b.ls L(end) + b.lo L(end_2) umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ fmov synd, dend cbz synd, L(loop32) - +L(end_2): + add src, src, 16 L(end): - and vhas_chr.16b, vhas_chr.16b, vrepmask.16b - addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ - sub src, src, 16 - mov synd, vend.d[0] + shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */ sub result, src, srcin + fmov synd, dend #ifndef __AARCH64EB__ rbit synd, synd #endif @@ -104,9 +98,5 @@ L(end): csel result, cntin, result, ls ret -L(nomatch): - mov result, cntin - ret - END (__strnlen_aarch64) diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S index 1e4fb1a68f7e..bb61ab9ad4e7 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S +++ b/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S @@ -1,8 +1,8 @@ /* * strrchr - find last position of a character in a string. * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2023, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * MTE compatible. */ -#include "../asmdefs.h" +#include "asmdefs.h" #define srcin x0 #define chrin w1 @@ -19,7 +19,6 @@ #define src x2 #define tmp x3 -#define wtmp w3 #define synd x3 #define shift x4 #define src_match x4 @@ -31,7 +30,6 @@ #define vhas_nul v2 #define vhas_chr v3 #define vrepmask v4 -#define vrepmask2 v5 #define vend v5 #define dend d5 @@ -47,55 +45,67 @@ ENTRY (__strrchr_aarch64_mte) PTR_ARG (0) bic src, srcin, 15 dup vrepchr.16b, chrin - mov wtmp, 0x3003 - dup vrepmask.8h, wtmp - tst srcin, 15 - beq L(loop1) - - ld1 {vdata.16b}, [src], 16 + movi vrepmask.16b, 0x33 + ld1 {vdata.16b}, [src] cmeq vhas_nul.16b, vdata.16b, 0 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b - mov wtmp, 0xf00f - dup vrepmask2.8h, wtmp bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b - and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b - addp vend.16b, vhas_nul.16b, vhas_nul.16b + shrn vend.8b, vhas_nul.8h, 4 lsl shift, srcin, 2 fmov synd, dend lsr synd, synd, shift lsl synd, synd, shift ands nul_match, synd, 0xcccccccccccccccc bne L(tail) - cbnz synd, L(loop2) + cbnz synd, L(loop2_start) - .p2align 5 + .p2align 4 L(loop1): - ld1 {vdata.16b}, [src], 16 + ldr q1, [src, 16] + cmeq vhas_chr.16b, vdata.16b, vrepchr.16b + cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b + umaxp vend.16b, vhas_nul.16b, vhas_nul.16b + fmov synd, dend + cbnz synd, L(loop1_end) + ldr q1, [src, 32]! cmeq vhas_chr.16b, vdata.16b, vrepchr.16b cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b umaxp vend.16b, vhas_nul.16b, vhas_nul.16b fmov synd, dend cbz synd, L(loop1) - + sub src, src, 16 +L(loop1_end): + add src, src, 16 cmeq vhas_nul.16b, vdata.16b, 0 +#ifdef __AARCH64EB__ + bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b + shrn vend.8b, vhas_nul.8h, 4 + fmov synd, dend + rbit synd, synd +#else bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b - bic vhas_nul.8h, 0x0f, lsl 8 - addp vend.16b, vhas_nul.16b, vhas_nul.16b + shrn vend.8b, vhas_nul.8h, 4 fmov synd, dend +#endif ands nul_match, synd, 0xcccccccccccccccc - beq L(loop2) - + beq L(loop2_start) L(tail): sub nul_match, nul_match, 1 and chr_match, synd, 0x3333333333333333 ands chr_match, chr_match, nul_match - sub result, src, 1 + add result, src, 15 clz tmp, chr_match sub result, result, tmp, lsr 2 csel result, result, xzr, ne ret .p2align 4 + nop + nop +L(loop2_start): + add src, src, 16 + bic vrepmask.8h, 0xf0 + L(loop2): cmp synd, 0 csel src_match, src, src_match, ne diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S index d36d69af37fd..825a7384cfc1 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S +++ b/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S @@ -1,11 +1,11 @@ /* * strrchr - find the last of a character in a string * - * Copyright (c) 2019-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ -#include "../asmdefs.h" +#include "asmdefs.h" #if __ARM_FEATURE_SVE /* Assumptions: diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr.S b/contrib/arm-optimized-routines/string/aarch64/strrchr.S index 56185ff534e3..bf9cb297b6cb 100644 --- a/contrib/arm-optimized-routines/string/aarch64/strrchr.S +++ b/contrib/arm-optimized-routines/string/aarch64/strrchr.S @@ -1,8 +1,8 @@ /* * strrchr - find last position of a character in a string. * - * Copyright (c) 2014-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2014-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* Assumptions: @@ -11,7 +11,7 @@ * Neon Available. */ -#include "../asmdefs.h" +#include "asmdefs.h" /* Arguments and results. */ #define srcin x0 diff --git a/contrib/arm-optimized-routines/string/arm/asmdefs.h b/contrib/arm-optimized-routines/string/arm/asmdefs.h new file mode 100644 index 000000000000..e31188804716 --- /dev/null +++ b/contrib/arm-optimized-routines/string/arm/asmdefs.h @@ -0,0 +1,477 @@ +/* + * Macros for asm code. Arm version. + * + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception + */ + +#ifndef _ASMDEFS_H +#define _ASMDEFS_H + +/* Check whether leaf function PAC signing has been requested in the + -mbranch-protect compile-time option. */ +#define LEAF_PROTECT_BIT 2 + +#ifdef __ARM_FEATURE_PAC_DEFAULT +# define HAVE_PAC_LEAF \ + ((__ARM_FEATURE_PAC_DEFAULT & (1 << LEAF_PROTECT_BIT)) && 1) +#else +# define HAVE_PAC_LEAF 0 +#endif + +/* Provide default parameters for PAC-code handling in leaf-functions. */ +#if HAVE_PAC_LEAF +# ifndef PAC_LEAF_PUSH_IP +# define PAC_LEAF_PUSH_IP 1 +# endif +#else /* !HAVE_PAC_LEAF */ +# undef PAC_LEAF_PUSH_IP +# define PAC_LEAF_PUSH_IP 0 +#endif /* HAVE_PAC_LEAF */ + +#define STACK_ALIGN_ENFORCE 0 + +/****************************************************************************** +* Implementation of the prologue and epilogue assembler macros and their +* associated helper functions. +* +* These functions add support for the following: +* +* - M-profile branch target identification (BTI) landing-pads when compiled +* with `-mbranch-protection=bti'. +* - PAC-signing and verification instructions, depending on hardware support +* and whether the PAC-signing of leaf functions has been requested via the +* `-mbranch-protection=pac-ret+leaf' compiler argument. +* - 8-byte stack alignment preservation at function entry, defaulting to the +* value of STACK_ALIGN_ENFORCE. +* +* Notes: +* - Prologue stack alignment is implemented by detecting a push with an odd +* number of registers and prepending a dummy register to the list. +* - If alignment is attempted on a list containing r0, compilation will result +* in an error. +* - If alignment is attempted in a list containing r1, r0 will be prepended to +* the register list and r0 will be restored prior to function return. for +* functions with non-void return types, this will result in the corruption of +* the result register. +* - Stack alignment is enforced via the following helper macro call-chain: +* +* {prologue|epilogue} ->_align8 -> _preprocess_reglist -> +* _preprocess_reglist1 -> {_prologue|_epilogue} +* +* - Debug CFI directives are automatically added to prologues and epilogues, +* assisted by `cfisavelist' and `cfirestorelist', respectively. +* +* Arguments: +* prologue +* -------- +* - first - If `last' specified, this serves as start of general-purpose +* register (GPR) range to push onto stack, otherwise represents +* single GPR to push onto stack. If omitted, no GPRs pushed +* onto stack at prologue. +* - last - If given, specifies inclusive upper-bound of GPR range. +* - push_ip - Determines whether IP register is to be pushed to stack at +* prologue. When pac-signing is requested, this holds the +* the pac-key. Either 1 or 0 to push or not push, respectively. +* Default behavior: Set to value of PAC_LEAF_PUSH_IP macro. +* - push_lr - Determines whether to push lr to the stack on function entry. +* Either 1 or 0 to push or not push, respectively. +* - align8 - Whether to enforce alignment. Either 1 or 0, with 1 requesting +* alignment. +* +* epilogue +* -------- +* The epilogue should be called passing the same arguments as those passed to +* the prologue to ensure the stack is not corrupted on function return. +* +* Usage examples: +* +* prologue push_ip=1 -> push {ip} +* epilogue push_ip=1, align8=1 -> pop {r2, ip} +* prologue push_ip=1, push_lr=1 -> push {ip, lr} +* epilogue 1 -> pop {r1} +* prologue 1, align8=1 -> push {r0, r1} +* epilogue 1, push_ip=1 -> pop {r1, ip} +* prologue 1, 4 -> push {r1-r4} +* epilogue 1, 4 push_ip=1 -> pop {r1-r4, ip} +* +******************************************************************************/ + +/* Emit .cfi_restore directives for a consecutive sequence of registers. */ + .macro cfirestorelist first, last + .cfi_restore \last + .if \last-\first + cfirestorelist \first, \last-1 + .endif + .endm + +/* Emit .cfi_offset directives for a consecutive sequence of registers. */ + .macro cfisavelist first, last, index=1 + .cfi_offset \last, -4*(\index) + .if \last-\first + cfisavelist \first, \last-1, \index+1 + .endif + .endm + +.macro _prologue first=-1, last=-1, push_ip=PAC_LEAF_PUSH_IP, push_lr=0 + .if \push_ip & 1 != \push_ip + .error "push_ip may be either 0 or 1" + .endif + .if \push_lr & 1 != \push_lr + .error "push_lr may be either 0 or 1" + .endif + .if \first != -1 + .if \last == -1 + /* Upper-bound not provided: Set upper = lower. */ + _prologue \first, \first, \push_ip, \push_lr + .exitm + .endif + .endif +#if HAVE_PAC_LEAF +# if __ARM_FEATURE_BTI_DEFAULT + pacbti ip, lr, sp +# else + pac ip, lr, sp +# endif /* __ARM_FEATURE_BTI_DEFAULT */ + .cfi_register 143, 12 +#else +# if __ARM_FEATURE_BTI_DEFAULT + bti +# endif /* __ARM_FEATURE_BTI_DEFAULT */ +#endif /* HAVE_PAC_LEAF */ + .if \first != -1 + .if \last != \first + .if \last >= 13 + .error "SP cannot be in the save list" + .endif + .if \push_ip + .if \push_lr + /* Case 1: push register range, ip and lr registers. */ + push {r\first-r\last, ip, lr} + .cfi_adjust_cfa_offset ((\last-\first)+3)*4 + .cfi_offset 14, -4 + .cfi_offset 143, -8 + cfisavelist \first, \last, 3 + .else // !\push_lr + /* Case 2: push register range and ip register. */ + push {r\first-r\last, ip} + .cfi_adjust_cfa_offset ((\last-\first)+2)*4 + .cfi_offset 143, -4 + cfisavelist \first, \last, 2 + .endif + .else // !\push_ip + .if \push_lr + /* Case 3: push register range and lr register. */ + push {r\first-r\last, lr} + .cfi_adjust_cfa_offset ((\last-\first)+2)*4 + .cfi_offset 14, -4 + cfisavelist \first, \last, 2 + .else // !\push_lr + /* Case 4: push register range. */ + push {r\first-r\last} + .cfi_adjust_cfa_offset ((\last-\first)+1)*4 + cfisavelist \first, \last, 1 + .endif + .endif + .else // \last == \first + .if \push_ip + .if \push_lr + /* Case 5: push single GP register plus ip and lr registers. */ + push {r\first, ip, lr} + .cfi_adjust_cfa_offset 12 + .cfi_offset 14, -4 + .cfi_offset 143, -8 + cfisavelist \first, \first, 3 + .else // !\push_lr + /* Case 6: push single GP register plus ip register. */ + push {r\first, ip} + .cfi_adjust_cfa_offset 8 + .cfi_offset 143, -4 + cfisavelist \first, \first, 2 + .endif + .else // !\push_ip + .if \push_lr + /* Case 7: push single GP register plus lr register. */ + push {r\first, lr} + .cfi_adjust_cfa_offset 8 + .cfi_offset 14, -4 + cfisavelist \first, \first, 2 + .else // !\push_lr + /* Case 8: push single GP register. */ + push {r\first} + .cfi_adjust_cfa_offset 4 + cfisavelist \first, \first, 1 + .endif + .endif + .endif + .else // \first == -1 + .if \push_ip + .if \push_lr + /* Case 9: push ip and lr registers. */ + push {ip, lr} + .cfi_adjust_cfa_offset 8 + .cfi_offset 14, -4 + .cfi_offset 143, -8 + .else // !\push_lr + /* Case 10: push ip register. */ + push {ip} + .cfi_adjust_cfa_offset 4 + .cfi_offset 143, -4 + .endif + .else // !\push_ip + .if \push_lr + /* Case 11: push lr register. */ + push {lr} + .cfi_adjust_cfa_offset 4 + .cfi_offset 14, -4 + .endif + .endif + .endif +.endm + +.macro _epilogue first=-1, last=-1, push_ip=PAC_LEAF_PUSH_IP, push_lr=0 + .if \push_ip & 1 != \push_ip + .error "push_ip may be either 0 or 1" + .endif + .if \push_lr & 1 != \push_lr + .error "push_lr may be either 0 or 1" + .endif + .if \first != -1 + .if \last == -1 + /* Upper-bound not provided: Set upper = lower. */ + _epilogue \first, \first, \push_ip, \push_lr + .exitm + .endif + .if \last != \first + .if \last >= 13 + .error "SP cannot be in the save list" + .endif + .if \push_ip + .if \push_lr + /* Case 1: pop register range, ip and lr registers. */ + pop {r\first-r\last, ip, lr} + .cfi_restore 14 + .cfi_register 143, 12 + cfirestorelist \first, \last + .else // !\push_lr + /* Case 2: pop register range and ip register. */ + pop {r\first-r\last, ip} + .cfi_register 143, 12 + cfirestorelist \first, \last + .endif + .else // !\push_ip + .if \push_lr + /* Case 3: pop register range and lr register. */ + pop {r\first-r\last, lr} + .cfi_restore 14 + cfirestorelist \first, \last + .else // !\push_lr + /* Case 4: pop register range. */ + pop {r\first-r\last} + cfirestorelist \first, \last + .endif + .endif + .else // \last == \first + .if \push_ip + .if \push_lr + /* Case 5: pop single GP register plus ip and lr registers. */ + pop {r\first, ip, lr} + .cfi_restore 14 + .cfi_register 143, 12 + cfirestorelist \first, \first + .else // !\push_lr + /* Case 6: pop single GP register plus ip register. */ + pop {r\first, ip} + .cfi_register 143, 12 + cfirestorelist \first, \first + .endif + .else // !\push_ip + .if \push_lr + /* Case 7: pop single GP register plus lr register. */ + pop {r\first, lr} + .cfi_restore 14 + cfirestorelist \first, \first + .else // !\push_lr + /* Case 8: pop single GP register. */ + pop {r\first} + cfirestorelist \first, \first + .endif + .endif + .endif + .else // \first == -1 + .if \push_ip + .if \push_lr + /* Case 9: pop ip and lr registers. */ + pop {ip, lr} + .cfi_restore 14 + .cfi_register 143, 12 + .else // !\push_lr + /* Case 10: pop ip register. */ + pop {ip} + .cfi_register 143, 12 + .endif + .else // !\push_ip + .if \push_lr + /* Case 11: pop lr register. */ + pop {lr} + .cfi_restore 14 + .endif + .endif + .endif +#if HAVE_PAC_LEAF + aut ip, lr, sp +#endif /* HAVE_PAC_LEAF */ + bx lr +.endm + +/* Clean up expressions in 'last'. */ +.macro _preprocess_reglist1 first:req, last:req, push_ip:req, push_lr:req, reglist_op:req + .if \last == 0 + \reglist_op \first, 0, \push_ip, \push_lr + .elseif \last == 1 + \reglist_op \first, 1, \push_ip, \push_lr + .elseif \last == 2 + \reglist_op \first, 2, \push_ip, \push_lr + .elseif \last == 3 + \reglist_op \first, 3, \push_ip, \push_lr + .elseif \last == 4 + \reglist_op \first, 4, \push_ip, \push_lr + .elseif \last == 5 + \reglist_op \first, 5, \push_ip, \push_lr + .elseif \last == 6 + \reglist_op \first, 6, \push_ip, \push_lr + .elseif \last == 7 + \reglist_op \first, 7, \push_ip, \push_lr + .elseif \last == 8 + \reglist_op \first, 8, \push_ip, \push_lr + .elseif \last == 9 + \reglist_op \first, 9, \push_ip, \push_lr + .elseif \last == 10 + \reglist_op \first, 10, \push_ip, \push_lr + .elseif \last == 11 + \reglist_op \first, 11, \push_ip, \push_lr + .else + .error "last (\last) out of range" + .endif +.endm + +/* Clean up expressions in 'first'. */ +.macro _preprocess_reglist first:req, last, push_ip=0, push_lr=0, reglist_op:req + .ifb \last + _preprocess_reglist \first \first \push_ip \push_lr + .else + .if \first > \last + .error "last (\last) must be at least as great as first (\first)" + .endif + .if \first == 0 + _preprocess_reglist1 0, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 1 + _preprocess_reglist1 1, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 2 + _preprocess_reglist1 2, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 3 + _preprocess_reglist1 3, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 4 + _preprocess_reglist1 4, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 5 + _preprocess_reglist1 5, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 6 + _preprocess_reglist1 6, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 7 + _preprocess_reglist1 7, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 8 + _preprocess_reglist1 8, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 9 + _preprocess_reglist1 9, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 10 + _preprocess_reglist1 10, \last, \push_ip, \push_lr, \reglist_op + .elseif \first == 11 + _preprocess_reglist1 11, \last, \push_ip, \push_lr, \reglist_op + .else + .error "first (\first) out of range" + .endif + .endif +.endm + +.macro _align8 first, last, push_ip=0, push_lr=0, reglist_op=_prologue + .ifb \first + .ifnb \last + .error "can't have last (\last) without specifying first" + .else // \last not blank + .if ((\push_ip + \push_lr) % 2) == 0 + \reglist_op first=-1, last=-1, push_ip=\push_ip, push_lr=\push_lr + .exitm + .else // ((\push_ip + \push_lr) % 2) odd + _align8 2, 2, \push_ip, \push_lr, \reglist_op + .exitm + .endif // ((\push_ip + \push_lr) % 2) == 0 + .endif // .ifnb \last + .endif // .ifb \first + + .ifb \last + _align8 \first, \first, \push_ip, \push_lr, \reglist_op + .else + .if \push_ip & 1 <> \push_ip + .error "push_ip may be 0 or 1" + .endif + .if \push_lr & 1 <> \push_lr + .error "push_lr may be 0 or 1" + .endif + .ifeq (\last - \first + \push_ip + \push_lr) % 2 + .if \first == 0 + .error "Alignment required and first register is r0" + .exitm + .endif + _preprocess_reglist \first-1, \last, \push_ip, \push_lr, \reglist_op + .else + _preprocess_reglist \first \last, \push_ip, \push_lr, \reglist_op + .endif + .endif +.endm + +.macro prologue first, last, push_ip=PAC_LEAF_PUSH_IP, push_lr=0, align8=STACK_ALIGN_ENFORCE + .if \align8 + _align8 \first, \last, \push_ip, \push_lr, _prologue + .else + _prologue first=\first, last=\last, push_ip=\push_ip, push_lr=\push_lr + .endif +.endm + +.macro epilogue first, last, push_ip=PAC_LEAF_PUSH_IP, push_lr=0, align8=STACK_ALIGN_ENFORCE + .if \align8 + _align8 \first, \last, \push_ip, \push_lr, reglist_op=_epilogue + .else + _epilogue first=\first, last=\last, push_ip=\push_ip, push_lr=\push_lr + .endif +.endm + +#define ENTRY_ALIGN(name, alignment) \ + .global name; \ + .type name,%function; \ + .align alignment; \ + name: \ + .fnstart; \ + .cfi_startproc; + +#define ENTRY(name) ENTRY_ALIGN(name, 6) + +#define ENTRY_ALIAS(name) \ + .global name; \ + .type name,%function; \ + name: + +#if defined (IS_LEAF) +# define END_UNWIND .cantunwind; +#else +# define END_UNWIND +#endif + +#define END(name) \ + .cfi_endproc; \ + END_UNWIND \ + .fnend; \ + .size name, .-name; + +#define L(l) .L ## l + +#endif diff --git a/contrib/arm-optimized-routines/string/arm/check-arch.S b/contrib/arm-optimized-routines/string/arm/check-arch.S index 1cff9345e343..95516710fb85 100644 --- a/contrib/arm-optimized-routines/string/arm/check-arch.S +++ b/contrib/arm-optimized-routines/string/arm/check-arch.S @@ -1,10 +1,13 @@ /* * check ARCH setting. * - * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if !__arm__ # error ARCH setting does not match the compiler. #endif + +/* For attributes that may affect ABI. */ +#include "asmdefs.h" diff --git a/contrib/arm-optimized-routines/string/arm/memchr.S b/contrib/arm-optimized-routines/string/arm/memchr.S index 3f1ac4df136f..823d6013eb35 100644 --- a/contrib/arm-optimized-routines/string/arm/memchr.S +++ b/contrib/arm-optimized-routines/string/arm/memchr.S @@ -1,8 +1,8 @@ /* * memchr - scan memory for a character * - * Copyright (c) 2010-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2010-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* @@ -23,7 +23,11 @@ @ Removed unneeded cbz from align loop .syntax unified +#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'M' + /* keep config inherited from -march= */ +#else .arch armv7-a +#endif @ this lets us check a flag in a 00/ff byte easily in either endianness #ifdef __ARMEB__ @@ -32,6 +36,8 @@ #define CHARTSTMASK(c) 1<<(c*8) #endif .thumb +#include "asmdefs.h" + @ --------------------------------------------------------------------------- .thumb_func @@ -39,11 +45,14 @@ .p2align 4,,15 .global __memchr_arm .type __memchr_arm,%function + .fnstart + .cfi_startproc __memchr_arm: @ r0 = start of memory to scan @ r1 = character to look for @ r2 = length @ returns r0 = pointer to character or NULL if not found + prologue and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char cmp r2,#16 @ If it's short don't bother with anything clever @@ -64,6 +73,11 @@ __memchr_arm: 10: @ At this point, we are aligned, we know we have at least 8 bytes to work with push {r4,r5,r6,r7} + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset 4, 0 + .cfi_rel_offset 5, 4 + .cfi_rel_offset 6, 8 + .cfi_rel_offset 7, 12 orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes orr r1, r1, r1, lsl #16 bic r4, r2, #7 @ Number of double words to work with @@ -83,6 +97,11 @@ __memchr_arm: bne 15b @ (Flags from the subs above) If not run out of bytes then go around again pop {r4,r5,r6,r7} + .cfi_restore 7 + .cfi_restore 6 + .cfi_restore 5 + .cfi_restore 4 + .cfi_adjust_cfa_offset -16 and r1,r1,#0xff @ Get r1 back to a single character from the expansion above and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done @@ -97,16 +116,25 @@ __memchr_arm: bne 21b @ on r2 flags 40: + .cfi_remember_state movs r0,#0 @ not found - bx lr + epilogue 50: + .cfi_restore_state + .cfi_remember_state subs r0,r0,#1 @ found - bx lr + epilogue 60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was @ r0 points to the start of the double word after the one that was tested @ r5 has the 00/ff pattern for the first word, r6 has the chained value + .cfi_restore_state @ Standard post-prologue state + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset 4, 0 + .cfi_rel_offset 5, 4 + .cfi_rel_offset 6, 8 + .cfi_rel_offset 7, 12 cmp r5, #0 itte eq moveq r5, r6 @ the end is in the 2nd word @@ -126,7 +154,15 @@ __memchr_arm: 61: pop {r4,r5,r6,r7} + .cfi_restore 7 + .cfi_restore 6 + .cfi_restore 5 + .cfi_restore 4 + .cfi_adjust_cfa_offset -16 subs r0,r0,#1 - bx lr + epilogue + .cfi_endproc + .cantunwind + .fnend .size __memchr_arm, . - __memchr_arm diff --git a/contrib/arm-optimized-routines/string/arm/memcpy.S b/contrib/arm-optimized-routines/string/arm/memcpy.S index 86e64938edb1..2423cfd69061 100644 --- a/contrib/arm-optimized-routines/string/arm/memcpy.S +++ b/contrib/arm-optimized-routines/string/arm/memcpy.S @@ -1,8 +1,8 @@ /* * memcpy - copy memory area * - * Copyright (c) 2013-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2013-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* @@ -17,7 +17,7 @@ */ -#include "../asmdefs.h" +#include "asmdefs.h" .syntax unified /* This implementation requires ARM state. */ diff --git a/contrib/arm-optimized-routines/string/arm/memset.S b/contrib/arm-optimized-routines/string/arm/memset.S index 11e927368fd1..487b9d6a8f6c 100644 --- a/contrib/arm-optimized-routines/string/arm/memset.S +++ b/contrib/arm-optimized-routines/string/arm/memset.S @@ -2,7 +2,7 @@ * memset - fill memory with a constant * * Copyright (c) 2010-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ /* diff --git a/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S b/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S index b75d4143db57..4d55306810ad 100644 --- a/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S +++ b/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S @@ -1,10 +1,12 @@ /* * strcmp for ARMv6-M (optimized for performance, not size) * - * Copyright (c) 2014-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2014-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ +#include "asmdefs.h" + #if __ARM_ARCH == 6 && __ARM_ARCH_6M__ >= 1 .thumb_func diff --git a/contrib/arm-optimized-routines/string/arm/strcmp.S b/contrib/arm-optimized-routines/string/arm/strcmp.S index 51443e343058..74b3d235fb18 100644 --- a/contrib/arm-optimized-routines/string/arm/strcmp.S +++ b/contrib/arm-optimized-routines/string/arm/strcmp.S @@ -1,8 +1,8 @@ /* * strcmp for ARMv7 * - * Copyright (c) 2012-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2012-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1 @@ -12,7 +12,7 @@ is sufficiently aligned. Use saturating arithmetic to optimize the compares. */ -#include "../asmdefs.h" +#include "asmdefs.h" /* Build Options: STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first @@ -26,6 +26,11 @@ #define STRCMP_NO_PRECHECK 0 +/* Ensure the .cantunwind directive is prepended to .fnend. + Leaf functions cannot throw exceptions - EHABI only supports + synchronous exceptions. */ +#define IS_LEAF + /* This version uses Thumb-2 code. */ .thumb .syntax unified @@ -98,8 +103,9 @@ ldrd r4, r5, [sp], #16 .cfi_restore 4 .cfi_restore 5 + .cfi_adjust_cfa_offset -16 sub result, result, r1, lsr #24 - bx lr + epilogue push_ip=HAVE_PAC_LEAF #else /* To use the big-endian trick we'd have to reverse all three words. that's slower than this approach. */ @@ -119,21 +125,15 @@ ldrd r4, r5, [sp], #16 .cfi_restore 4 .cfi_restore 5 + .cfi_adjust_cfa_offset -16 sub result, result, r1 - bx lr + epilogue push_ip=HAVE_PAC_LEAF #endif .endm - .p2align 5 -L(strcmp_start_addr): -#if STRCMP_NO_PRECHECK == 0 -L(fastpath_exit): - sub r0, r2, r3 - bx lr - nop -#endif -ENTRY_ALIGN (__strcmp_arm, 0) +ENTRY(__strcmp_arm) + prologue push_ip=HAVE_PAC_LEAF #if STRCMP_NO_PRECHECK == 0 ldrb r2, [src1] ldrb r3, [src2] @@ -143,13 +143,13 @@ ENTRY_ALIGN (__strcmp_arm, 0) bne L(fastpath_exit) #endif strd r4, r5, [sp, #-16]! - .cfi_def_cfa_offset 16 - .cfi_offset 4, -16 - .cfi_offset 5, -12 + .cfi_adjust_cfa_offset 16 + .cfi_rel_offset 4, 0 + .cfi_rel_offset 5, 4 orr tmp1, src1, src2 strd r6, r7, [sp, #8] - .cfi_offset 6, -8 - .cfi_offset 7, -4 + .cfi_rel_offset 6, 8 + .cfi_rel_offset 7, 12 mvn const_m1, #0 lsl r2, tmp1, #29 cbz r2, L(loop_aligned8) @@ -318,10 +318,19 @@ L(misaligned_exit): mov result, tmp1 ldr r4, [sp], #16 .cfi_restore 4 - bx lr + .cfi_adjust_cfa_offset -16 + epilogue push_ip=HAVE_PAC_LEAF #if STRCMP_NO_PRECHECK == 0 +L(fastpath_exit): + .cfi_restore_state + .cfi_remember_state + sub r0, r2, r3 + epilogue push_ip=HAVE_PAC_LEAF + L(aligned_m1): + .cfi_restore_state + .cfi_remember_state add src2, src2, #4 #endif L(src1_aligned): @@ -368,9 +377,9 @@ L(overlap3): /* R6/7 Not used in this sequence. */ .cfi_restore 6 .cfi_restore 7 + .cfi_adjust_cfa_offset -16 neg result, result - bx lr - + epilogue push_ip=HAVE_PAC_LEAF 6: .cfi_restore_state S2LO data1, data1, #24 @@ -445,7 +454,8 @@ L(strcmp_done_equal): /* R6/7 not used in this sequence. */ .cfi_restore 6 .cfi_restore 7 - bx lr + .cfi_adjust_cfa_offset -16 + epilogue push_ip=HAVE_PAC_LEAF L(strcmp_tail): .cfi_restore_state @@ -467,8 +477,9 @@ L(strcmp_tail): /* R6/7 not used in this sequence. */ .cfi_restore 6 .cfi_restore 7 + .cfi_adjust_cfa_offset -16 sub result, result, data2, lsr #24 - bx lr + epilogue push_ip=HAVE_PAC_LEAF END (__strcmp_arm) diff --git a/contrib/arm-optimized-routines/string/arm/strcpy.c b/contrib/arm-optimized-routines/string/arm/strcpy.c index 02cf94ff4be0..b5728a2534f0 100644 --- a/contrib/arm-optimized-routines/string/arm/strcpy.c +++ b/contrib/arm-optimized-routines/string/arm/strcpy.c @@ -2,7 +2,7 @@ * strcpy * * Copyright (c) 2008-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if defined (__thumb2__) && !defined (__thumb__) diff --git a/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S b/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S index 5ad30c941586..5eb8671bdc8b 100644 --- a/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S +++ b/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S @@ -1,8 +1,8 @@ /* * strlen - calculate the length of a string * - * Copyright (c) 2010-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2010-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2 @@ -13,7 +13,7 @@ */ -#include "../asmdefs.h" +#include "asmdefs.h" #ifdef __ARMEB__ #define S2LO lsl @@ -23,6 +23,11 @@ #define S2HI lsl #endif +/* Ensure the .cantunwind directive is prepended to .fnend. + Leaf functions cannot throw exceptions - EHABI only supports + synchronous exceptions. */ +#define IS_LEAF + /* This code requires Thumb. */ .thumb .syntax unified @@ -41,8 +46,8 @@ #define tmp2 r5 ENTRY (__strlen_armv6t2) + prologue 4 5 push_ip=HAVE_PAC_LEAF pld [srcin, #0] - strd r4, r5, [sp, #-8]! bic src, srcin, #7 mvn const_m1, #0 ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */ @@ -92,6 +97,7 @@ L(start_realigned): beq L(loop_aligned) L(null_found): + .cfi_remember_state cmp data1a, #0 itt eq addeq result, result, #4 @@ -100,11 +106,11 @@ L(null_found): rev data1a, data1a #endif clz data1a, data1a - ldrd r4, r5, [sp], #8 add result, result, data1a, lsr #3 /* Bits -> Bytes. */ - bx lr + epilogue 4 5 push_ip=HAVE_PAC_LEAF L(misaligned8): + .cfi_restore_state ldrd data1a, data1b, [src] and tmp2, tmp1, #3 rsb result, tmp1, #0 diff --git a/contrib/arm-optimized-routines/string/bench/memcpy.c b/contrib/arm-optimized-routines/string/bench/memcpy.c index 6bd27633e224..1468663e51cd 100644 --- a/contrib/arm-optimized-routines/string/bench/memcpy.c +++ b/contrib/arm-optimized-routines/string/bench/memcpy.c @@ -1,8 +1,8 @@ /* * memcpy benchmark. * - * Copyright (c) 2020-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2020-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define _GNU_SOURCE diff --git a/contrib/arm-optimized-routines/string/bench/memset.c b/contrib/arm-optimized-routines/string/bench/memset.c index 2d6196931307..990e23ba9a36 100644 --- a/contrib/arm-optimized-routines/string/bench/memset.c +++ b/contrib/arm-optimized-routines/string/bench/memset.c @@ -2,7 +2,7 @@ * memset benchmark. * * Copyright (c) 2021, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define _GNU_SOURCE diff --git a/contrib/arm-optimized-routines/string/bench/strlen.c b/contrib/arm-optimized-routines/string/bench/strlen.c index b7eee6e905ab..f05d0d5b89e6 100644 --- a/contrib/arm-optimized-routines/string/bench/strlen.c +++ b/contrib/arm-optimized-routines/string/bench/strlen.c @@ -2,7 +2,7 @@ * strlen benchmark. * * Copyright (c) 2020-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #define _GNU_SOURCE diff --git a/contrib/arm-optimized-routines/string/include/benchlib.h b/contrib/arm-optimized-routines/string/include/benchlib.h index 0f2ce2eb6bce..f1bbea388cd2 100644 --- a/contrib/arm-optimized-routines/string/include/benchlib.h +++ b/contrib/arm-optimized-routines/string/include/benchlib.h @@ -2,7 +2,7 @@ * Benchmark support functions. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/include/stringlib.h b/contrib/arm-optimized-routines/string/include/stringlib.h index 85e630279ceb..f41a46446888 100644 --- a/contrib/arm-optimized-routines/string/include/stringlib.h +++ b/contrib/arm-optimized-routines/string/include/stringlib.h @@ -1,8 +1,8 @@ /* * Public API. * - * Copyright (c) 2019-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stddef.h> diff --git a/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c b/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c index d8c02d92d626..c45fa6662a77 100644 --- a/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c +++ b/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c @@ -2,7 +2,7 @@ * __mtag_tag_region test. * * Copyright (c) 2021, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if __ARM_FEATURE_MEMORY_TAGGING && WANT_MTE_TEST diff --git a/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c b/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c index 221c223a2f31..a4a7861620d1 100644 --- a/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c +++ b/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c @@ -2,7 +2,7 @@ * __mtag_tag_zero_region test. * * Copyright (c) 2021, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if __ARM_FEATURE_MEMORY_TAGGING && WANT_MTE_TEST diff --git a/contrib/arm-optimized-routines/string/test/memchr.c b/contrib/arm-optimized-routines/string/test/memchr.c index 0ff77f5710bf..c6a94481c0ad 100644 --- a/contrib/arm-optimized-routines/string/test/memchr.c +++ b/contrib/arm-optimized-routines/string/test/memchr.c @@ -2,7 +2,7 @@ * memchr test. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/memcmp.c b/contrib/arm-optimized-routines/string/test/memcmp.c index 7a7cf9cff35a..f9236b83a60d 100644 --- a/contrib/arm-optimized-routines/string/test/memcmp.c +++ b/contrib/arm-optimized-routines/string/test/memcmp.c @@ -2,7 +2,7 @@ * memcmp test. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/memcpy.c b/contrib/arm-optimized-routines/string/test/memcpy.c index 21b35b990b9b..fa15a95b2bda 100644 --- a/contrib/arm-optimized-routines/string/test/memcpy.c +++ b/contrib/arm-optimized-routines/string/test/memcpy.c @@ -1,8 +1,8 @@ /* * memcpy test. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/memmove.c b/contrib/arm-optimized-routines/string/test/memmove.c index 12a70574c7c5..5d509c03affa 100644 --- a/contrib/arm-optimized-routines/string/test/memmove.c +++ b/contrib/arm-optimized-routines/string/test/memmove.c @@ -1,8 +1,8 @@ /* * memmove test. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/memrchr.c b/contrib/arm-optimized-routines/string/test/memrchr.c index adf96f049cc9..4171a56daefd 100644 --- a/contrib/arm-optimized-routines/string/test/memrchr.c +++ b/contrib/arm-optimized-routines/string/test/memrchr.c @@ -2,7 +2,7 @@ * memchr test. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _GNU_SOURCE diff --git a/contrib/arm-optimized-routines/string/test/memset.c b/contrib/arm-optimized-routines/string/test/memset.c index f1721442dbaf..5543f44bb026 100644 --- a/contrib/arm-optimized-routines/string/test/memset.c +++ b/contrib/arm-optimized-routines/string/test/memset.c @@ -2,7 +2,7 @@ * memset test. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/mte.h b/contrib/arm-optimized-routines/string/test/mte.h index e67cbd9d2d40..40b0ecf6c194 100644 --- a/contrib/arm-optimized-routines/string/test/mte.h +++ b/contrib/arm-optimized-routines/string/test/mte.h @@ -2,7 +2,7 @@ * Memory tagging testing code. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef __TEST_MTE_H diff --git a/contrib/arm-optimized-routines/string/test/stpcpy.c b/contrib/arm-optimized-routines/string/test/stpcpy.c index 1b61245bf8df..0300892a1f3c 100644 --- a/contrib/arm-optimized-routines/string/test/stpcpy.c +++ b/contrib/arm-optimized-routines/string/test/stpcpy.c @@ -1,8 +1,8 @@ /* * stpcpy test. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _GNU_SOURCE diff --git a/contrib/arm-optimized-routines/string/test/strchr.c b/contrib/arm-optimized-routines/string/test/strchr.c index f3ae982ef0ad..66180acfb57c 100644 --- a/contrib/arm-optimized-routines/string/test/strchr.c +++ b/contrib/arm-optimized-routines/string/test/strchr.c @@ -2,7 +2,7 @@ * strchr test. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/strchrnul.c b/contrib/arm-optimized-routines/string/test/strchrnul.c index 6c30ab2123f1..aad0bf59da66 100644 --- a/contrib/arm-optimized-routines/string/test/strchrnul.c +++ b/contrib/arm-optimized-routines/string/test/strchrnul.c @@ -2,7 +2,7 @@ * strchrnul test. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _GNU_SOURCE diff --git a/contrib/arm-optimized-routines/string/test/strcmp.c b/contrib/arm-optimized-routines/string/test/strcmp.c index 0262397dec88..4aa95f4f2f1d 100644 --- a/contrib/arm-optimized-routines/string/test/strcmp.c +++ b/contrib/arm-optimized-routines/string/test/strcmp.c @@ -1,8 +1,8 @@ /* * strcmp test. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/strcpy.c b/contrib/arm-optimized-routines/string/test/strcpy.c index 6de3bed590ef..af297f90396a 100644 --- a/contrib/arm-optimized-routines/string/test/strcpy.c +++ b/contrib/arm-optimized-routines/string/test/strcpy.c @@ -1,8 +1,8 @@ /* * strcpy test. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/stringtest.h b/contrib/arm-optimized-routines/string/test/stringtest.h index fe855fc21736..6bb7e1fdfeca 100644 --- a/contrib/arm-optimized-routines/string/test/stringtest.h +++ b/contrib/arm-optimized-routines/string/test/stringtest.h @@ -2,7 +2,7 @@ * Common string test code. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <ctype.h> diff --git a/contrib/arm-optimized-routines/string/test/strlen.c b/contrib/arm-optimized-routines/string/test/strlen.c index 6278380f26df..47ef3dcf0ef0 100644 --- a/contrib/arm-optimized-routines/string/test/strlen.c +++ b/contrib/arm-optimized-routines/string/test/strlen.c @@ -1,15 +1,14 @@ /* * strlen test. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> #include <stdio.h> #include <stdlib.h> #include <string.h> -#include <sys/mman.h> #include <limits.h> #include "mte.h" #include "stringlib.h" diff --git a/contrib/arm-optimized-routines/string/test/strncmp.c b/contrib/arm-optimized-routines/string/test/strncmp.c index f8c2167f8f1e..4bbab6f93450 100644 --- a/contrib/arm-optimized-routines/string/test/strncmp.c +++ b/contrib/arm-optimized-routines/string/test/strncmp.c @@ -1,8 +1,8 @@ /* * strncmp test. * - * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * Copyright (c) 2019-2022, Arm Limited. + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/test/strnlen.c b/contrib/arm-optimized-routines/string/test/strnlen.c index 0dea00eaf8e3..a800fd1993cd 100644 --- a/contrib/arm-optimized-routines/string/test/strnlen.c +++ b/contrib/arm-optimized-routines/string/test/strnlen.c @@ -2,7 +2,7 @@ * strnlen test. * * Copyright (c) 2019-2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #ifndef _GNU_SOURCE diff --git a/contrib/arm-optimized-routines/string/test/strrchr.c b/contrib/arm-optimized-routines/string/test/strrchr.c index fedbdc52fcc1..580ca497f8a4 100644 --- a/contrib/arm-optimized-routines/string/test/strrchr.c +++ b/contrib/arm-optimized-routines/string/test/strrchr.c @@ -2,7 +2,7 @@ * strrchr test. * * Copyright (c) 2019-2021, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #include <stdint.h> diff --git a/contrib/arm-optimized-routines/string/x86_64/check-arch.S b/contrib/arm-optimized-routines/string/x86_64/check-arch.S index 26ade0a0c7db..5afcf7b7ee54 100644 --- a/contrib/arm-optimized-routines/string/x86_64/check-arch.S +++ b/contrib/arm-optimized-routines/string/x86_64/check-arch.S @@ -2,7 +2,7 @@ * check ARCH setting. * * Copyright (c) 2020, Arm Limited. - * SPDX-License-Identifier: MIT + * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception */ #if !__x86_64__ |