aboutsummaryrefslogtreecommitdiff
path: root/contrib/arm-optimized-routines/string
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/arm-optimized-routines/string')
-rw-r--r--contrib/arm-optimized-routines/string/Dir.mk2
-rw-r--r--contrib/arm-optimized-routines/string/README.contributors30
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/asmdefs.h (renamed from contrib/arm-optimized-routines/string/asmdefs.h)30
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/check-arch.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memchr-mte.S58
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memchr-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memchr.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memcmp.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S31
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memcpy.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memrchr.S51
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/memset.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S2
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/stpcpy.S2
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strchr-mte.S58
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strchr-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strchr.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S47
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S2
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strchrnul.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strcmp.S4
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strcpy.S69
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strlen-mte.S41
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strlen-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strlen.S21
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strncmp.S4
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strnlen.S60
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S58
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S6
-rw-r--r--contrib/arm-optimized-routines/string/aarch64/strrchr.S6
-rw-r--r--contrib/arm-optimized-routines/string/arm/asmdefs.h477
-rw-r--r--contrib/arm-optimized-routines/string/arm/check-arch.S7
-rw-r--r--contrib/arm-optimized-routines/string/arm/memchr.S46
-rw-r--r--contrib/arm-optimized-routines/string/arm/memcpy.S6
-rw-r--r--contrib/arm-optimized-routines/string/arm/memset.S2
-rw-r--r--contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S6
-rw-r--r--contrib/arm-optimized-routines/string/arm/strcmp.S59
-rw-r--r--contrib/arm-optimized-routines/string/arm/strcpy.c2
-rw-r--r--contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S18
-rw-r--r--contrib/arm-optimized-routines/string/bench/memcpy.c4
-rw-r--r--contrib/arm-optimized-routines/string/bench/memset.c2
-rw-r--r--contrib/arm-optimized-routines/string/bench/strlen.c2
-rw-r--r--contrib/arm-optimized-routines/string/include/benchlib.h2
-rw-r--r--contrib/arm-optimized-routines/string/include/stringlib.h4
-rw-r--r--contrib/arm-optimized-routines/string/test/__mtag_tag_region.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/memchr.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/memcmp.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/memcpy.c4
-rw-r--r--contrib/arm-optimized-routines/string/test/memmove.c4
-rw-r--r--contrib/arm-optimized-routines/string/test/memrchr.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/memset.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/mte.h2
-rw-r--r--contrib/arm-optimized-routines/string/test/stpcpy.c4
-rw-r--r--contrib/arm-optimized-routines/string/test/strchr.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/strchrnul.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/strcmp.c4
-rw-r--r--contrib/arm-optimized-routines/string/test/strcpy.c4
-rw-r--r--contrib/arm-optimized-routines/string/test/stringtest.h2
-rw-r--r--contrib/arm-optimized-routines/string/test/strlen.c5
-rw-r--r--contrib/arm-optimized-routines/string/test/strncmp.c4
-rw-r--r--contrib/arm-optimized-routines/string/test/strnlen.c2
-rw-r--r--contrib/arm-optimized-routines/string/test/strrchr.c2
-rw-r--r--contrib/arm-optimized-routines/string/x86_64/check-arch.S2
72 files changed, 954 insertions, 428 deletions
diff --git a/contrib/arm-optimized-routines/string/Dir.mk b/contrib/arm-optimized-routines/string/Dir.mk
index cf3453f7580d..40ff5acc093e 100644
--- a/contrib/arm-optimized-routines/string/Dir.mk
+++ b/contrib/arm-optimized-routines/string/Dir.mk
@@ -1,7 +1,7 @@
# Makefile fragment - requires GNU make
#
# Copyright (c) 2019-2021, Arm Limited.
-# SPDX-License-Identifier: MIT
+# SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
S := $(srcdir)/string
B := build/string
diff --git a/contrib/arm-optimized-routines/string/README.contributors b/contrib/arm-optimized-routines/string/README.contributors
new file mode 100644
index 000000000000..0b4a51b56366
--- /dev/null
+++ b/contrib/arm-optimized-routines/string/README.contributors
@@ -0,0 +1,30 @@
+STYLE REQUIREMENTS
+==================
+
+1. Most code in this sub-directory is expected to be upstreamed into glibc so
+ the GNU Coding Standard and glibc specific conventions should be followed
+ to ease upstreaming.
+
+2. ABI and symbols: the code should be written so it is suitable for inclusion
+ into a libc with minimal changes. This e.g. means that internal symbols
+ should be hidden and in the implementation reserved namespace according to
+ ISO C and POSIX rules. If possible the built shared libraries and static
+ library archives should be usable to override libc symbols at link time (or
+ at runtime via LD_PRELOAD). This requires the symbols to follow the glibc ABI
+ (other than symbol versioning), this cannot be done reliably for static
+ linking so this is a best effort requirement.
+
+3. API: include headers should be suitable for benchmarking and testing code
+ and should not conflict with libc headers.
+
+
+CONTRIBUTION GUIDELINES FOR string SUB-DIRECTORY
+================================================
+1. Code:
+ - The assumptions of the code must be clearly documented.
+
+ - Assembly style should be consistent across different implementations.
+
+
+2. Performance:
+ - Benchmarking is needed on several microarchitectures.
diff --git a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S
index 84339f73cf23..207e22950c6d 100644
--- a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S
+++ b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_region.S
@@ -1,8 +1,8 @@
/*
* __mtag_tag_region - tag memory
*
- * Copyright (c) 2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2021-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -15,7 +15,7 @@
* The memory region may remain untagged if tagging is not enabled.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_MEMORY_TAGGING
diff --git a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S
index f58364ca6fcb..44b8e0114f42 100644
--- a/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S
+++ b/contrib/arm-optimized-routines/string/aarch64/__mtag_tag_zero_region.S
@@ -1,8 +1,8 @@
/*
* __mtag_tag_zero_region - tag memory and fill it with zero bytes
*
- * Copyright (c) 2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2021-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -15,7 +15,7 @@
* The memory region may remain untagged if tagging is not enabled.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_MEMORY_TAGGING
diff --git a/contrib/arm-optimized-routines/string/asmdefs.h b/contrib/arm-optimized-routines/string/aarch64/asmdefs.h
index 340b427a505b..069b146f4a69 100644
--- a/contrib/arm-optimized-routines/string/asmdefs.h
+++ b/contrib/arm-optimized-routines/string/aarch64/asmdefs.h
@@ -1,15 +1,13 @@
/*
- * Macros for asm code.
+ * Macros for asm code. AArch64 version.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _ASMDEFS_H
#define _ASMDEFS_H
-#if defined(__aarch64__)
-
/* Branch Target Identitication support. */
#define BTI_C hint 34
#define BTI_J hint 36
@@ -55,19 +53,6 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
.cfi_startproc; \
BTI_C;
-#else
-
-#define END_FILE
-
-#define ENTRY_ALIGN(name, alignment) \
- .global name; \
- .type name,%function; \
- .align alignment; \
- name: \
- .cfi_startproc;
-
-#endif
-
#define ENTRY(name) ENTRY_ALIGN(name, 6)
#define ENTRY_ALIAS(name) \
@@ -95,4 +80,13 @@ GNU_PROPERTY (FEATURE_1_AND, FEATURE_1_BTI|FEATURE_1_PAC)
#define SIZE_ARG(n)
#endif
+/* Compiler supports SVE instructions */
+#ifndef HAVE_SVE
+# if __aarch64__ && (__GNUC__ >= 8 || __clang_major__ >= 5)
+# define HAVE_SVE 1
+# else
+# define HAVE_SVE 0
+# endif
+#endif
+
#endif
diff --git a/contrib/arm-optimized-routines/string/aarch64/check-arch.S b/contrib/arm-optimized-routines/string/aarch64/check-arch.S
index 5a54242d7de6..131b7fa36ec2 100644
--- a/contrib/arm-optimized-routines/string/aarch64/check-arch.S
+++ b/contrib/arm-optimized-routines/string/aarch64/check-arch.S
@@ -1,8 +1,8 @@
/*
* check ARCH setting.
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if !__aarch64__
@@ -10,4 +10,4 @@
#endif
/* Include for GNU property notes. */
-#include "../asmdefs.h"
+#include "asmdefs.h"
diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S
index c2e967d1004e..948c3cbc7dd4 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memchr-mte.S
@@ -1,8 +1,8 @@
/*
* memchr - find a character in a memory zone
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define chrin w1
@@ -23,25 +23,21 @@
#define synd x5
#define shift x6
#define tmp x7
-#define wtmp w7
#define vrepchr v0
#define qdata q1
#define vdata v1
#define vhas_chr v2
-#define vrepmask v3
-#define vend v4
-#define dend d4
+#define vend v3
+#define dend d3
/*
Core algorithm:
-
- For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
- requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
- set likewise for odd bytes so that adjacent bytes can be merged. Since the
- bits in the syndrome reflect the order in which things occur in the original
- string, counting trailing zeros identifies exactly which byte matched. */
+ For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
+ per byte. We take 4 bits of every comparison byte with shift right and narrow
+ by 4 instruction. Since the bits in the nibble mask reflect the order in
+ which things occur in the original string, counting leading zeros identifies
+ exactly which byte matched. */
ENTRY (__memchr_aarch64_mte)
PTR_ARG (0)
@@ -50,55 +46,53 @@ ENTRY (__memchr_aarch64_mte)
cbz cntin, L(nomatch)
ld1 {vdata.16b}, [src]
dup vrepchr.16b, chrin
- mov wtmp, 0xf00f
- dup vrepmask.8h, wtmp
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
lsl shift, srcin, 2
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
fmov synd, dend
lsr synd, synd, shift
cbz synd, L(start_loop)
rbit synd, synd
clz synd, synd
- add result, srcin, synd, lsr 2
cmp cntin, synd, lsr 2
+ add result, srcin, synd, lsr 2
csel result, result, xzr, hi
ret
+ .p2align 3
L(start_loop):
sub tmp, src, srcin
- add tmp, tmp, 16
+ add tmp, tmp, 17
subs cntrem, cntin, tmp
- b.ls L(nomatch)
+ b.lo L(nomatch)
/* Make sure that it won't overread by a 16-byte chunk */
- add tmp, cntrem, 15
- tbnz tmp, 4, L(loop32_2)
-
+ tbz cntrem, 4, L(loop32_2)
+ sub src, src, 16
.p2align 4
L(loop32):
- ldr qdata, [src, 16]!
+ ldr qdata, [src, 32]!
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
fmov synd, dend
cbnz synd, L(end)
L(loop32_2):
- ldr qdata, [src, 16]!
- subs cntrem, cntrem, 32
+ ldr qdata, [src, 16]
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
- b.ls L(end)
+ subs cntrem, cntrem, 32
+ b.lo L(end_2)
umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
fmov synd, dend
cbz synd, L(loop32)
+L(end_2):
+ add src, src, 16
L(end):
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
+ sub cntrem, src, srcin
fmov synd, dend
- add tmp, srcin, cntin
- sub cntrem, tmp, src
+ sub cntrem, cntin, cntrem
#ifndef __AARCH64EB__
rbit synd, synd
#endif
diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S
index c22e6596f19b..b851cf31f238 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memchr-sve.S
@@ -1,11 +1,11 @@
/*
* memchr - find a character in a memory zone
*
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/memchr.S b/contrib/arm-optimized-routines/string/aarch64/memchr.S
index 353f0d1eac53..fe6cfe2bc0e2 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memchr.S
@@ -1,8 +1,8 @@
/*
* memchr - find a character in a memory zone
*
- * Copyright (c) 2014-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2014-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* Neon Available.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
/* Arguments and results. */
#define srcin x0
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S
index 78c5ecaa4cdc..d52ce4555344 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcmp-sve.S
@@ -1,11 +1,11 @@
/*
* memcmp - compare memory
*
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcmp.S b/contrib/arm-optimized-routines/string/aarch64/memcmp.S
index 7ca1135edec7..35135e72cc8e 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcmp.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcmp.S
@@ -1,7 +1,7 @@
/* memcmp - compare memory
*
- * Copyright (c) 2013-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2013-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -9,7 +9,7 @@
* ARMv8-a, AArch64, Advanced SIMD, unaligned accesses.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define src1 x0
#define src2 x1
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S b/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S
index f97f2c3047b9..e6527d0dac2c 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcpy-advsimd.S
@@ -1,8 +1,8 @@
/*
* memcpy - copy memory area
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
*
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define dstin x0
#define src x1
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S
index f85e8009f3c5..e8a946d7db37 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcpy-sve.S
@@ -1,8 +1,8 @@
/*
* memcpy - copy memory area
*
- * Copyright (c) 2019-2022, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,9 +11,11 @@
*
*/
-#if __ARM_FEATURE_SVE
+#include "asmdefs.h"
-#include "../asmdefs.h"
+#ifdef HAVE_SVE
+
+.arch armv8-a+sve
#define dstin x0
#define src x1
@@ -55,14 +57,16 @@ ENTRY (__memcpy_aarch64_sve)
cmp count, 128
b.hi L(copy_long)
- cmp count, 32
+ cntb vlen
+ cmp count, vlen, lsl 1
b.hi L(copy32_128)
whilelo p0.b, xzr, count
- cntb vlen
- tbnz vlen, 4, L(vlen128)
- ld1b z0.b, p0/z, [src]
- st1b z0.b, p0, [dstin]
+ whilelo p1.b, vlen, count
+ ld1b z0.b, p0/z, [src, 0, mul vl]
+ ld1b z1.b, p1/z, [src, 1, mul vl]
+ st1b z0.b, p0, [dstin, 0, mul vl]
+ st1b z1.b, p1, [dstin, 1, mul vl]
ret
/* Medium copies: 33..128 bytes. */
@@ -131,14 +135,6 @@ L(copy64_from_end):
stp A_q, B_q, [dstend, -32]
ret
-L(vlen128):
- whilelo p1.b, vlen, count
- ld1b z0.b, p0/z, [src, 0, mul vl]
- ld1b z1.b, p1/z, [src, 1, mul vl]
- st1b z0.b, p0, [dstin, 0, mul vl]
- st1b z1.b, p1, [dstin, 1, mul vl]
- ret
-
/* Large backwards copy for overlapping copies.
Copy 16 bytes and then align srcend to 16-byte alignment. */
L(copy_long_backwards):
@@ -177,4 +173,5 @@ L(return):
ret
END (__memcpy_aarch64_sve)
+
#endif
diff --git a/contrib/arm-optimized-routines/string/aarch64/memcpy.S b/contrib/arm-optimized-routines/string/aarch64/memcpy.S
index dd254f6f9929..7c0606e2104a 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memcpy.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memcpy.S
@@ -1,8 +1,8 @@
/*
* memcpy - copy memory area
*
- * Copyright (c) 2012-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2012-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
*
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define dstin x0
#define src x1
diff --git a/contrib/arm-optimized-routines/string/aarch64/memrchr.S b/contrib/arm-optimized-routines/string/aarch64/memrchr.S
index 7b4be847cecb..6418bdf56f41 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memrchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memrchr.S
@@ -1,8 +1,8 @@
/*
* memrchr - find last character in a memory zone.
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define chrin w1
@@ -23,7 +23,6 @@
#define synd x5
#define shift x6
#define tmp x7
-#define wtmp w7
#define end x8
#define endm1 x9
@@ -31,19 +30,16 @@
#define qdata q1
#define vdata v1
#define vhas_chr v2
-#define vrepmask v3
-#define vend v4
-#define dend d4
+#define vend v3
+#define dend d3
/*
Core algorithm:
-
- For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
- requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
- set likewise for odd bytes so that adjacent bytes can be merged. Since the
- bits in the syndrome reflect the order in which things occur in the original
- string, counting trailing zeros identifies exactly which byte matched. */
+ For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
+ per byte. We take 4 bits of every comparison byte with shift right and narrow
+ by 4 instruction. Since the bits in the nibble mask reflect the order in
+ which things occur in the original string, counting leading zeros identifies
+ exactly which byte matched. */
ENTRY (__memrchr_aarch64)
PTR_ARG (0)
@@ -53,12 +49,9 @@ ENTRY (__memrchr_aarch64)
cbz cntin, L(nomatch)
ld1 {vdata.16b}, [src]
dup vrepchr.16b, chrin
- mov wtmp, 0xf00f
- dup vrepmask.8h, wtmp
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
neg shift, end, lsl 2
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
fmov synd, dend
lsl synd, synd, shift
cbz synd, L(start_loop)
@@ -69,34 +62,36 @@ ENTRY (__memrchr_aarch64)
csel result, result, xzr, hi
ret
+ nop
L(start_loop):
- sub tmp, end, src
- subs cntrem, cntin, tmp
+ subs cntrem, src, srcin
b.ls L(nomatch)
/* Make sure that it won't overread by a 16-byte chunk */
- add tmp, cntrem, 15
- tbnz tmp, 4, L(loop32_2)
+ sub cntrem, cntrem, 1
+ tbz cntrem, 4, L(loop32_2)
+ add src, src, 16
- .p2align 4
+ .p2align 5
L(loop32):
- ldr qdata, [src, -16]!
+ ldr qdata, [src, -32]!
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
fmov synd, dend
cbnz synd, L(end)
L(loop32_2):
- ldr qdata, [src, -16]!
+ ldr qdata, [src, -16]
subs cntrem, cntrem, 32
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
- b.ls L(end)
+ b.lo L(end_2)
umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
fmov synd, dend
cbz synd, L(loop32)
+L(end_2):
+ sub src, src, 16
L(end):
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
fmov synd, dend
add tmp, src, 15
diff --git a/contrib/arm-optimized-routines/string/aarch64/memset.S b/contrib/arm-optimized-routines/string/aarch64/memset.S
index 9fcd97579913..553b0fcaefea 100644
--- a/contrib/arm-optimized-routines/string/aarch64/memset.S
+++ b/contrib/arm-optimized-routines/string/aarch64/memset.S
@@ -1,8 +1,8 @@
/*
* memset - fill memory with a constant byte
*
- * Copyright (c) 2012-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2012-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
*
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define dstin x0
#define val x1
diff --git a/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S
index 82dd9717b0a0..5d3f14b86026 100644
--- a/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/stpcpy-sve.S
@@ -2,7 +2,7 @@
* stpcpy - copy a string returning pointer to end.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define BUILD_STPCPY 1
diff --git a/contrib/arm-optimized-routines/string/aarch64/stpcpy.S b/contrib/arm-optimized-routines/string/aarch64/stpcpy.S
index 4f62aa462389..155c68d75a7b 100644
--- a/contrib/arm-optimized-routines/string/aarch64/stpcpy.S
+++ b/contrib/arm-optimized-routines/string/aarch64/stpcpy.S
@@ -2,7 +2,7 @@
* stpcpy - copy a string returning pointer to end.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define BUILD_STPCPY 1
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
index dcb0e4625870..6ec08f7acc76 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchr-mte.S
@@ -1,8 +1,8 @@
/*
* strchr - find a character in a string
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define chrin w1
@@ -19,8 +19,7 @@
#define src x2
#define tmp1 x1
-#define wtmp2 w3
-#define tmp3 x3
+#define tmp2 x3
#define vrepchr v0
#define vdata v1
@@ -28,39 +27,30 @@
#define vhas_nul v2
#define vhas_chr v3
#define vrepmask v4
-#define vrepmask2 v5
-#define vend v6
-#define dend d6
+#define vend v5
+#define dend d5
/* Core algorithm.
For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. For even bytes, bits 0-1 are set if the relevant byte matched the
- requested character, bits 2-3 are set if the byte is NUL (or matched), and
- bits 4-7 are not used and must be zero if none of bits 0-3 are set). Odd
- bytes set bits 4-7 so that adjacent bytes can be merged. Since the bits
- in the syndrome reflect the order in which things occur in the original
- string, counting trailing zeros identifies exactly which byte matched. */
+ per byte. Bits 0-1 are set if the relevant byte matched the requested
+ character, bits 2-3 are set if the byte is NUL or matched. Count trailing
+ zeroes gives the position of the matching byte if it is a multiple of 4.
+ If it is not a multiple of 4, there was no match. */
ENTRY (__strchr_aarch64_mte)
PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
ld1 {vdata.16b}, [src]
- mov wtmp2, 0x3003
- dup vrepmask.8h, wtmp2
+ movi vrepmask.16b, 0x33
cmeq vhas_nul.16b, vdata.16b, 0
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
- mov wtmp2, 0xf00f
- dup vrepmask2.8h, wtmp2
-
bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
- lsl tmp3, srcin, 2
- addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
-
+ lsl tmp2, srcin, 2
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov tmp1, dend
- lsr tmp1, tmp1, tmp3
+ lsr tmp1, tmp1, tmp2
cbz tmp1, L(loop)
rbit tmp1, tmp1
@@ -74,28 +64,34 @@ ENTRY (__strchr_aarch64_mte)
.p2align 4
L(loop):
- ldr qdata, [src, 16]!
+ ldr qdata, [src, 16]
+ cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
+ cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
+ umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
+ fmov tmp1, dend
+ cbnz tmp1, L(end)
+ ldr qdata, [src, 32]!
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
fmov tmp1, dend
cbz tmp1, L(loop)
+ sub src, src, 16
+L(end):
#ifdef __AARCH64EB__
bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov tmp1, dend
#else
bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov tmp1, dend
rbit tmp1, tmp1
#endif
+ add src, src, 16
clz tmp1, tmp1
- /* Tmp1 is an even multiple of 2 if the target character was
- found first. Otherwise we've found the end of string. */
+ /* Tmp1 is a multiple of 4 if the target character was found. */
tst tmp1, 2
add result, src, tmp1, lsr 2
csel result, result, xzr, eq
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S
index 13ba9f44f9c5..ff075167bfef 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchr-sve.S
@@ -1,11 +1,11 @@
/*
* strchr/strchrnul - find a character in a string
*
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchr.S b/contrib/arm-optimized-routines/string/aarch64/strchr.S
index 1063cbfd77aa..37193bd947a7 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchr.S
@@ -1,8 +1,8 @@
/*
* strchr - find a character in a string
*
- * Copyright (c) 2014-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2014-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* Neon Available.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
/* Arguments and results. */
#define srcin x0
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S
index 1b0d0a63094c..543ee88bb285 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul-mte.S
@@ -1,8 +1,8 @@
/*
* strchrnul - find a character or nul in a string
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define chrin w1
@@ -20,38 +20,32 @@
#define src x2
#define tmp1 x1
#define tmp2 x3
-#define tmp2w w3
#define vrepchr v0
#define vdata v1
#define qdata q1
#define vhas_nul v2
#define vhas_chr v3
-#define vrepmask v4
-#define vend v5
-#define dend d5
+#define vend v4
+#define dend d4
-/* Core algorithm:
-
- For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
- requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
- set likewise for odd bytes so that adjacent bytes can be merged. Since the
- bits in the syndrome reflect the order in which things occur in the original
- string, counting trailing zeros identifies exactly which byte matched. */
+/*
+ Core algorithm:
+ For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
+ per byte. We take 4 bits of every comparison byte with shift right and narrow
+ by 4 instruction. Since the bits in the nibble mask reflect the order in
+ which things occur in the original string, counting leading zeros identifies
+ exactly which byte matched. */
ENTRY (__strchrnul_aarch64_mte)
PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
ld1 {vdata.16b}, [src]
- mov tmp2w, 0xf00f
- dup vrepmask.8h, tmp2w
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b
lsl tmp2, srcin, 2
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
fmov tmp1, dend
lsr tmp1, tmp1, tmp2 /* Mask padding bits. */
cbz tmp1, L(loop)
@@ -63,15 +57,22 @@ ENTRY (__strchrnul_aarch64_mte)
.p2align 4
L(loop):
- ldr qdata, [src, 16]!
+ ldr qdata, [src, 16]
+ cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
+ cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b
+ umaxp vend.16b, vhas_chr.16b, vhas_chr.16b
+ fmov tmp1, dend
+ cbnz tmp1, L(end)
+ ldr qdata, [src, 32]!
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b
umaxp vend.16b, vhas_chr.16b, vhas_chr.16b
fmov tmp1, dend
cbz tmp1, L(loop)
-
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
+ sub src, src, 16
+L(end):
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
+ add src, src, 16
fmov tmp1, dend
#ifndef __AARCH64EB__
rbit tmp1, tmp1
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S
index 428ff1a3d008..0005f9177514 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul-sve.S
@@ -2,7 +2,7 @@
* strchrnul - find a character or nul in a string
*
* Copyright (c) 2018-2019, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define BUILD_STRCHRNUL
diff --git a/contrib/arm-optimized-routines/string/aarch64/strchrnul.S b/contrib/arm-optimized-routines/string/aarch64/strchrnul.S
index a4230d919b47..666e8d0304c1 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strchrnul.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strchrnul.S
@@ -1,8 +1,8 @@
/*
* strchrnul - find a character or nul in a string
*
- * Copyright (c) 2014-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2014-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* Neon Available.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
/* Arguments and results. */
#define srcin x0
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S
index e6d2da5411ca..eaf909a378f1 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strcmp-sve.S
@@ -1,11 +1,11 @@
/*
* __strcmp_aarch64_sve - compare two strings
*
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcmp.S b/contrib/arm-optimized-routines/string/aarch64/strcmp.S
index 6e77845ae6ff..137a9aa06681 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcmp.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strcmp.S
@@ -2,7 +2,7 @@
* strcmp - compare two strings
*
* Copyright (c) 2012-2022, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
@@ -12,7 +12,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S b/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S
index f515462e09ae..00e72dce4451 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strcpy-sve.S
@@ -1,11 +1,11 @@
/*
* strcpy/stpcpy - copy a string returning pointer to start/end.
*
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/strcpy.S b/contrib/arm-optimized-routines/string/aarch64/strcpy.S
index b99e49403be8..97ae37ea4229 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strcpy.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strcpy.S
@@ -1,8 +1,8 @@
/*
* strcpy/stpcpy - copy a string returning pointer to start/end.
*
- * Copyright (c) 2020-2022, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define dstin x0
#define srcin x1
@@ -22,7 +22,6 @@
#define len x4
#define synd x4
#define tmp x5
-#define wtmp w5
#define shift x5
#define data1 x6
#define dataw1 w6
@@ -32,9 +31,8 @@
#define dataq q0
#define vdata v0
#define vhas_nul v1
-#define vrepmask v2
-#define vend v3
-#define dend d3
+#define vend v2
+#define dend d2
#define dataq2 q1
#ifdef BUILD_STPCPY
@@ -45,34 +43,29 @@
# define IFSTPCPY(X,...)
#endif
-/* Core algorithm:
-
- For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
- requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
- set likewise for odd bytes so that adjacent bytes can be merged. Since the
- bits in the syndrome reflect the order in which things occur in the original
- string, counting trailing zeros identifies exactly which byte matched. */
+/*
+ Core algorithm:
+ For each 16-byte chunk we calculate a 64-bit nibble mask value with four bits
+ per byte. We take 4 bits of every comparison byte with shift right and narrow
+ by 4 instruction. Since the bits in the nibble mask reflect the order in
+ which things occur in the original string, counting leading zeros identifies
+ exactly which byte matched. */
ENTRY (STRCPY)
PTR_ARG (0)
PTR_ARG (1)
bic src, srcin, 15
- mov wtmp, 0xf00f
ld1 {vdata.16b}, [src]
- dup vrepmask.8h, wtmp
cmeq vhas_nul.16b, vdata.16b, 0
lsl shift, srcin, 2
- and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b
+ shrn vend.8b, vhas_nul.8h, 4
fmov synd, dend
lsr synd, synd, shift
cbnz synd, L(tail)
ldr dataq, [src, 16]!
cmeq vhas_nul.16b, vdata.16b, 0
- and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b
+ shrn vend.8b, vhas_nul.8h, 4
fmov synd, dend
cbz synd, L(start_loop)
@@ -91,13 +84,10 @@ ENTRY (STRCPY)
IFSTPCPY (add result, dstin, len)
ret
- .p2align 4,,8
L(tail):
rbit synd, synd
clz len, synd
lsr len, len, 2
-
- .p2align 4
L(less16):
tbz len, 3, L(less8)
sub tmp, len, 7
@@ -130,32 +120,37 @@ L(zerobyte):
.p2align 4
L(start_loop):
- sub len, src, srcin
+ sub tmp, srcin, dstin
ldr dataq2, [srcin]
- add dst, dstin, len
+ sub dst, src, tmp
str dataq2, [dstin]
-
- .p2align 5
L(loop):
- str dataq, [dst], 16
- ldr dataq, [src, 16]!
+ str dataq, [dst], 32
+ ldr dataq, [src, 16]
+ cmeq vhas_nul.16b, vdata.16b, 0
+ umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
+ fmov synd, dend
+ cbnz synd, L(loopend)
+ str dataq, [dst, -16]
+ ldr dataq, [src, 32]!
cmeq vhas_nul.16b, vdata.16b, 0
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
fmov synd, dend
cbz synd, L(loop)
-
- and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
+ add dst, dst, 16
+L(loopend):
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov synd, dend
+ sub dst, dst, 31
#ifndef __AARCH64EB__
rbit synd, synd
#endif
clz len, synd
lsr len, len, 2
- sub tmp, len, 15
- ldr dataq, [src, tmp]
- str dataq, [dst, tmp]
- IFSTPCPY (add result, dst, len)
+ add dst, dst, len
+ ldr dataq, [dst, tmp]
+ str dataq, [dst]
+ IFSTPCPY (add result, dst, 15)
ret
END (STRCPY)
diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S b/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
index 7cf41d5c1eac..77235797f7c5 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strlen-mte.S
@@ -1,8 +1,8 @@
/*
* strlen - calculate the length of a string.
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define result x0
@@ -19,35 +19,26 @@
#define src x1
#define synd x2
#define tmp x3
-#define wtmp w3
#define shift x4
#define data q0
#define vdata v0
#define vhas_nul v1
-#define vrepmask v2
-#define vend v3
-#define dend d3
+#define vend v2
+#define dend d2
/* Core algorithm:
-
- For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
- requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
- set likewise for odd bytes so that adjacent bytes can be merged. Since the
- bits in the syndrome reflect the order in which things occur in the original
- string, counting trailing zeros identifies exactly which byte matched. */
+ Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
+ four bits per byte using the shrn instruction. A count trailing zeros then
+ identifies the first zero byte. */
ENTRY (__strlen_aarch64_mte)
PTR_ARG (0)
bic src, srcin, 15
- mov wtmp, 0xf00f
ld1 {vdata.16b}, [src]
- dup vrepmask.8h, wtmp
cmeq vhas_nul.16b, vdata.16b, 0
lsl shift, srcin, 2
- and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
fmov synd, dend
lsr synd, synd, shift
cbz synd, L(loop)
@@ -59,19 +50,25 @@ ENTRY (__strlen_aarch64_mte)
.p2align 5
L(loop):
- ldr data, [src, 16]!
+ ldr data, [src, 16]
+ cmeq vhas_nul.16b, vdata.16b, 0
+ umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
+ fmov synd, dend
+ cbnz synd, L(loop_end)
+ ldr data, [src, 32]!
cmeq vhas_nul.16b, vdata.16b, 0
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
fmov synd, dend
cbz synd, L(loop)
-
- and vhas_nul.16b, vhas_nul.16b, vrepmask.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */
+ sub src, src, 16
+L(loop_end):
+ shrn vend.8b, vhas_nul.8h, 4 /* 128->64 */
sub result, src, srcin
fmov synd, dend
#ifndef __AARCH64EB__
rbit synd, synd
#endif
+ add result, result, 16
clz tmp, synd
add result, result, tmp, lsr 2
ret
diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S b/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S
index 2392493f1a3c..12ebbdba5c93 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strlen-sve.S
@@ -1,11 +1,11 @@
/*
* __strlen_aarch64_sve - compute the length of a string
*
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/strlen.S b/contrib/arm-optimized-routines/string/aarch64/strlen.S
index a1b164a49238..6f6f08f636b2 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strlen.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strlen.S
@@ -1,8 +1,8 @@
/*
* strlen - calculate the length of a string.
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* Not MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define len x0
@@ -36,6 +36,7 @@
#define tmp x2
#define tmpw w2
#define synd x3
+#define syndw w3
#define shift x4
/* For the first 32 bytes, NUL detection works on the principle that
@@ -110,7 +111,6 @@ ENTRY (__strlen_aarch64)
add len, len, tmp1, lsr 3
ret
- .p2align 3
/* Look for a NUL byte at offset 16..31 in the string. */
L(bytes16_31):
ldp data1, data2, [srcin, 16]
@@ -138,6 +138,7 @@ L(bytes16_31):
add len, len, tmp1, lsr 3
ret
+ nop
L(loop_entry):
bic src, srcin, 31
@@ -153,18 +154,12 @@ L(loop):
/* Low 32 bits of synd are non-zero if a NUL was found in datav1. */
cmeq maskv.16b, datav1.16b, 0
sub len, src, srcin
- tst synd, 0xffffffff
- b.ne 1f
+ cbnz syndw, 1f
cmeq maskv.16b, datav2.16b, 0
add len, len, 16
1:
/* Generate a bitmask and compute correct byte offset. */
-#ifdef __AARCH64EB__
- bic maskv.8h, 0xf0
-#else
- bic maskv.8h, 0x0f, lsl 8
-#endif
- umaxp maskv.16b, maskv.16b, maskv.16b
+ shrn maskv.8b, maskv.8h, 4
fmov synd, maskd
#ifndef __AARCH64EB__
rbit synd, synd
@@ -173,8 +168,6 @@ L(loop):
add len, len, tmp, lsr 2
ret
- .p2align 4
-
L(page_cross):
bic src, srcin, 31
mov tmpw, 0x0c03
diff --git a/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S b/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
index 234190e245b0..6a9e9f7b6437 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strncmp-sve.S
@@ -1,11 +1,11 @@
/*
* strncmp - compare two strings with limit
*
- * Copyright (c) 2018-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2018-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/strncmp.S b/contrib/arm-optimized-routines/string/aarch64/strncmp.S
index 7e636b4a593d..128a10c52bb1 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strncmp.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strncmp.S
@@ -2,7 +2,7 @@
* strncmp - compare two strings
*
* Copyright (c) 2013-2022, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define REP8_01 0x0101010101010101
#define REP8_7f 0x7f7f7f7f7f7f7f7f
diff --git a/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S b/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S
index 5b9ebf7763bc..6c43dc427da7 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strnlen-sve.S
@@ -1,11 +1,11 @@
/*
* strnlen - calculate the length of a string with limit.
*
- * Copyright (c) 2019-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/strnlen.S b/contrib/arm-optimized-routines/string/aarch64/strnlen.S
index 48d2495d2082..f2090a7485a5 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strnlen.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strnlen.S
@@ -1,8 +1,8 @@
/*
* strnlen - calculate the length of a string with limit.
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define cntin x1
@@ -20,39 +20,30 @@
#define src x2
#define synd x3
#define shift x4
-#define wtmp w4
#define tmp x4
#define cntrem x5
#define qdata q0
#define vdata v0
#define vhas_chr v1
-#define vrepmask v2
-#define vend v3
-#define dend d3
+#define vend v2
+#define dend d2
/*
Core algorithm:
-
- For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
- per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
- requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
- set likewise for odd bytes so that adjacent bytes can be merged. Since the
- bits in the syndrome reflect the order in which things occur in the original
- string, counting trailing zeros identifies exactly which byte matched. */
+ Process the string in 16-byte aligned chunks. Compute a 64-bit mask with
+ four bits per byte using the shrn instruction. A count trailing zeros then
+ identifies the first zero byte. */
ENTRY (__strnlen_aarch64)
PTR_ARG (0)
SIZE_ARG (1)
bic src, srcin, 15
- mov wtmp, 0xf00f
cbz cntin, L(nomatch)
- ld1 {vdata.16b}, [src], 16
- dup vrepmask.8h, wtmp
+ ld1 {vdata.16b}, [src]
cmeq vhas_chr.16b, vdata.16b, 0
lsl shift, srcin, 2
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
fmov synd, dend
lsr synd, synd, shift
cbz synd, L(start_loop)
@@ -64,37 +55,40 @@ L(finish):
csel result, cntin, result, ls
ret
+L(nomatch):
+ mov result, cntin
+ ret
+
L(start_loop):
sub tmp, src, srcin
+ add tmp, tmp, 17
subs cntrem, cntin, tmp
- b.ls L(nomatch)
+ b.lo L(nomatch)
/* Make sure that it won't overread by a 16-byte chunk */
- add tmp, cntrem, 15
- tbnz tmp, 4, L(loop32_2)
-
+ tbz cntrem, 4, L(loop32_2)
+ sub src, src, 16
.p2align 5
L(loop32):
- ldr qdata, [src], 16
+ ldr qdata, [src, 32]!
cmeq vhas_chr.16b, vdata.16b, 0
umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
fmov synd, dend
cbnz synd, L(end)
L(loop32_2):
- ldr qdata, [src], 16
+ ldr qdata, [src, 16]
subs cntrem, cntrem, 32
cmeq vhas_chr.16b, vdata.16b, 0
- b.ls L(end)
+ b.lo L(end_2)
umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
fmov synd, dend
cbz synd, L(loop32)
-
+L(end_2):
+ add src, src, 16
L(end):
- and vhas_chr.16b, vhas_chr.16b, vrepmask.16b
- addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */
- sub src, src, 16
- mov synd, vend.d[0]
+ shrn vend.8b, vhas_chr.8h, 4 /* 128->64 */
sub result, src, srcin
+ fmov synd, dend
#ifndef __AARCH64EB__
rbit synd, synd
#endif
@@ -104,9 +98,5 @@ L(end):
csel result, cntin, result, ls
ret
-L(nomatch):
- mov result, cntin
- ret
-
END (__strnlen_aarch64)
diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S b/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S
index 1e4fb1a68f7e..bb61ab9ad4e7 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strrchr-mte.S
@@ -1,8 +1,8 @@
/*
* strrchr - find last position of a character in a string.
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2023, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* MTE compatible.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#define srcin x0
#define chrin w1
@@ -19,7 +19,6 @@
#define src x2
#define tmp x3
-#define wtmp w3
#define synd x3
#define shift x4
#define src_match x4
@@ -31,7 +30,6 @@
#define vhas_nul v2
#define vhas_chr v3
#define vrepmask v4
-#define vrepmask2 v5
#define vend v5
#define dend d5
@@ -47,55 +45,67 @@ ENTRY (__strrchr_aarch64_mte)
PTR_ARG (0)
bic src, srcin, 15
dup vrepchr.16b, chrin
- mov wtmp, 0x3003
- dup vrepmask.8h, wtmp
- tst srcin, 15
- beq L(loop1)
-
- ld1 {vdata.16b}, [src], 16
+ movi vrepmask.16b, 0x33
+ ld1 {vdata.16b}, [src]
cmeq vhas_nul.16b, vdata.16b, 0
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
- mov wtmp, 0xf00f
- dup vrepmask2.8h, wtmp
bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
- addp vend.16b, vhas_nul.16b, vhas_nul.16b
+ shrn vend.8b, vhas_nul.8h, 4
lsl shift, srcin, 2
fmov synd, dend
lsr synd, synd, shift
lsl synd, synd, shift
ands nul_match, synd, 0xcccccccccccccccc
bne L(tail)
- cbnz synd, L(loop2)
+ cbnz synd, L(loop2_start)
- .p2align 5
+ .p2align 4
L(loop1):
- ld1 {vdata.16b}, [src], 16
+ ldr q1, [src, 16]
+ cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
+ cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
+ umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
+ fmov synd, dend
+ cbnz synd, L(loop1_end)
+ ldr q1, [src, 32]!
cmeq vhas_chr.16b, vdata.16b, vrepchr.16b
cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b
umaxp vend.16b, vhas_nul.16b, vhas_nul.16b
fmov synd, dend
cbz synd, L(loop1)
-
+ sub src, src, 16
+L(loop1_end):
+ add src, src, 16
cmeq vhas_nul.16b, vdata.16b, 0
+#ifdef __AARCH64EB__
+ bif vhas_nul.16b, vhas_chr.16b, vrepmask.16b
+ shrn vend.8b, vhas_nul.8h, 4
+ fmov synd, dend
+ rbit synd, synd
+#else
bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b
- bic vhas_nul.8h, 0x0f, lsl 8
- addp vend.16b, vhas_nul.16b, vhas_nul.16b
+ shrn vend.8b, vhas_nul.8h, 4
fmov synd, dend
+#endif
ands nul_match, synd, 0xcccccccccccccccc
- beq L(loop2)
-
+ beq L(loop2_start)
L(tail):
sub nul_match, nul_match, 1
and chr_match, synd, 0x3333333333333333
ands chr_match, chr_match, nul_match
- sub result, src, 1
+ add result, src, 15
clz tmp, chr_match
sub result, result, tmp, lsr 2
csel result, result, xzr, ne
ret
.p2align 4
+ nop
+ nop
+L(loop2_start):
+ add src, src, 16
+ bic vrepmask.8h, 0xf0
+
L(loop2):
cmp synd, 0
csel src_match, src, src_match, ne
diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S b/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S
index d36d69af37fd..825a7384cfc1 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strrchr-sve.S
@@ -1,11 +1,11 @@
/*
* strrchr - find the last of a character in a string
*
- * Copyright (c) 2019-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#if __ARM_FEATURE_SVE
/* Assumptions:
diff --git a/contrib/arm-optimized-routines/string/aarch64/strrchr.S b/contrib/arm-optimized-routines/string/aarch64/strrchr.S
index 56185ff534e3..bf9cb297b6cb 100644
--- a/contrib/arm-optimized-routines/string/aarch64/strrchr.S
+++ b/contrib/arm-optimized-routines/string/aarch64/strrchr.S
@@ -1,8 +1,8 @@
/*
* strrchr - find last position of a character in a string.
*
- * Copyright (c) 2014-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2014-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/* Assumptions:
@@ -11,7 +11,7 @@
* Neon Available.
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
/* Arguments and results. */
#define srcin x0
diff --git a/contrib/arm-optimized-routines/string/arm/asmdefs.h b/contrib/arm-optimized-routines/string/arm/asmdefs.h
new file mode 100644
index 000000000000..e31188804716
--- /dev/null
+++ b/contrib/arm-optimized-routines/string/arm/asmdefs.h
@@ -0,0 +1,477 @@
+/*
+ * Macros for asm code. Arm version.
+ *
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
+ */
+
+#ifndef _ASMDEFS_H
+#define _ASMDEFS_H
+
+/* Check whether leaf function PAC signing has been requested in the
+ -mbranch-protect compile-time option. */
+#define LEAF_PROTECT_BIT 2
+
+#ifdef __ARM_FEATURE_PAC_DEFAULT
+# define HAVE_PAC_LEAF \
+ ((__ARM_FEATURE_PAC_DEFAULT & (1 << LEAF_PROTECT_BIT)) && 1)
+#else
+# define HAVE_PAC_LEAF 0
+#endif
+
+/* Provide default parameters for PAC-code handling in leaf-functions. */
+#if HAVE_PAC_LEAF
+# ifndef PAC_LEAF_PUSH_IP
+# define PAC_LEAF_PUSH_IP 1
+# endif
+#else /* !HAVE_PAC_LEAF */
+# undef PAC_LEAF_PUSH_IP
+# define PAC_LEAF_PUSH_IP 0
+#endif /* HAVE_PAC_LEAF */
+
+#define STACK_ALIGN_ENFORCE 0
+
+/******************************************************************************
+* Implementation of the prologue and epilogue assembler macros and their
+* associated helper functions.
+*
+* These functions add support for the following:
+*
+* - M-profile branch target identification (BTI) landing-pads when compiled
+* with `-mbranch-protection=bti'.
+* - PAC-signing and verification instructions, depending on hardware support
+* and whether the PAC-signing of leaf functions has been requested via the
+* `-mbranch-protection=pac-ret+leaf' compiler argument.
+* - 8-byte stack alignment preservation at function entry, defaulting to the
+* value of STACK_ALIGN_ENFORCE.
+*
+* Notes:
+* - Prologue stack alignment is implemented by detecting a push with an odd
+* number of registers and prepending a dummy register to the list.
+* - If alignment is attempted on a list containing r0, compilation will result
+* in an error.
+* - If alignment is attempted in a list containing r1, r0 will be prepended to
+* the register list and r0 will be restored prior to function return. for
+* functions with non-void return types, this will result in the corruption of
+* the result register.
+* - Stack alignment is enforced via the following helper macro call-chain:
+*
+* {prologue|epilogue} ->_align8 -> _preprocess_reglist ->
+* _preprocess_reglist1 -> {_prologue|_epilogue}
+*
+* - Debug CFI directives are automatically added to prologues and epilogues,
+* assisted by `cfisavelist' and `cfirestorelist', respectively.
+*
+* Arguments:
+* prologue
+* --------
+* - first - If `last' specified, this serves as start of general-purpose
+* register (GPR) range to push onto stack, otherwise represents
+* single GPR to push onto stack. If omitted, no GPRs pushed
+* onto stack at prologue.
+* - last - If given, specifies inclusive upper-bound of GPR range.
+* - push_ip - Determines whether IP register is to be pushed to stack at
+* prologue. When pac-signing is requested, this holds the
+* the pac-key. Either 1 or 0 to push or not push, respectively.
+* Default behavior: Set to value of PAC_LEAF_PUSH_IP macro.
+* - push_lr - Determines whether to push lr to the stack on function entry.
+* Either 1 or 0 to push or not push, respectively.
+* - align8 - Whether to enforce alignment. Either 1 or 0, with 1 requesting
+* alignment.
+*
+* epilogue
+* --------
+* The epilogue should be called passing the same arguments as those passed to
+* the prologue to ensure the stack is not corrupted on function return.
+*
+* Usage examples:
+*
+* prologue push_ip=1 -> push {ip}
+* epilogue push_ip=1, align8=1 -> pop {r2, ip}
+* prologue push_ip=1, push_lr=1 -> push {ip, lr}
+* epilogue 1 -> pop {r1}
+* prologue 1, align8=1 -> push {r0, r1}
+* epilogue 1, push_ip=1 -> pop {r1, ip}
+* prologue 1, 4 -> push {r1-r4}
+* epilogue 1, 4 push_ip=1 -> pop {r1-r4, ip}
+*
+******************************************************************************/
+
+/* Emit .cfi_restore directives for a consecutive sequence of registers. */
+ .macro cfirestorelist first, last
+ .cfi_restore \last
+ .if \last-\first
+ cfirestorelist \first, \last-1
+ .endif
+ .endm
+
+/* Emit .cfi_offset directives for a consecutive sequence of registers. */
+ .macro cfisavelist first, last, index=1
+ .cfi_offset \last, -4*(\index)
+ .if \last-\first
+ cfisavelist \first, \last-1, \index+1
+ .endif
+ .endm
+
+.macro _prologue first=-1, last=-1, push_ip=PAC_LEAF_PUSH_IP, push_lr=0
+ .if \push_ip & 1 != \push_ip
+ .error "push_ip may be either 0 or 1"
+ .endif
+ .if \push_lr & 1 != \push_lr
+ .error "push_lr may be either 0 or 1"
+ .endif
+ .if \first != -1
+ .if \last == -1
+ /* Upper-bound not provided: Set upper = lower. */
+ _prologue \first, \first, \push_ip, \push_lr
+ .exitm
+ .endif
+ .endif
+#if HAVE_PAC_LEAF
+# if __ARM_FEATURE_BTI_DEFAULT
+ pacbti ip, lr, sp
+# else
+ pac ip, lr, sp
+# endif /* __ARM_FEATURE_BTI_DEFAULT */
+ .cfi_register 143, 12
+#else
+# if __ARM_FEATURE_BTI_DEFAULT
+ bti
+# endif /* __ARM_FEATURE_BTI_DEFAULT */
+#endif /* HAVE_PAC_LEAF */
+ .if \first != -1
+ .if \last != \first
+ .if \last >= 13
+ .error "SP cannot be in the save list"
+ .endif
+ .if \push_ip
+ .if \push_lr
+ /* Case 1: push register range, ip and lr registers. */
+ push {r\first-r\last, ip, lr}
+ .cfi_adjust_cfa_offset ((\last-\first)+3)*4
+ .cfi_offset 14, -4
+ .cfi_offset 143, -8
+ cfisavelist \first, \last, 3
+ .else // !\push_lr
+ /* Case 2: push register range and ip register. */
+ push {r\first-r\last, ip}
+ .cfi_adjust_cfa_offset ((\last-\first)+2)*4
+ .cfi_offset 143, -4
+ cfisavelist \first, \last, 2
+ .endif
+ .else // !\push_ip
+ .if \push_lr
+ /* Case 3: push register range and lr register. */
+ push {r\first-r\last, lr}
+ .cfi_adjust_cfa_offset ((\last-\first)+2)*4
+ .cfi_offset 14, -4
+ cfisavelist \first, \last, 2
+ .else // !\push_lr
+ /* Case 4: push register range. */
+ push {r\first-r\last}
+ .cfi_adjust_cfa_offset ((\last-\first)+1)*4
+ cfisavelist \first, \last, 1
+ .endif
+ .endif
+ .else // \last == \first
+ .if \push_ip
+ .if \push_lr
+ /* Case 5: push single GP register plus ip and lr registers. */
+ push {r\first, ip, lr}
+ .cfi_adjust_cfa_offset 12
+ .cfi_offset 14, -4
+ .cfi_offset 143, -8
+ cfisavelist \first, \first, 3
+ .else // !\push_lr
+ /* Case 6: push single GP register plus ip register. */
+ push {r\first, ip}
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset 143, -4
+ cfisavelist \first, \first, 2
+ .endif
+ .else // !\push_ip
+ .if \push_lr
+ /* Case 7: push single GP register plus lr register. */
+ push {r\first, lr}
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset 14, -4
+ cfisavelist \first, \first, 2
+ .else // !\push_lr
+ /* Case 8: push single GP register. */
+ push {r\first}
+ .cfi_adjust_cfa_offset 4
+ cfisavelist \first, \first, 1
+ .endif
+ .endif
+ .endif
+ .else // \first == -1
+ .if \push_ip
+ .if \push_lr
+ /* Case 9: push ip and lr registers. */
+ push {ip, lr}
+ .cfi_adjust_cfa_offset 8
+ .cfi_offset 14, -4
+ .cfi_offset 143, -8
+ .else // !\push_lr
+ /* Case 10: push ip register. */
+ push {ip}
+ .cfi_adjust_cfa_offset 4
+ .cfi_offset 143, -4
+ .endif
+ .else // !\push_ip
+ .if \push_lr
+ /* Case 11: push lr register. */
+ push {lr}
+ .cfi_adjust_cfa_offset 4
+ .cfi_offset 14, -4
+ .endif
+ .endif
+ .endif
+.endm
+
+.macro _epilogue first=-1, last=-1, push_ip=PAC_LEAF_PUSH_IP, push_lr=0
+ .if \push_ip & 1 != \push_ip
+ .error "push_ip may be either 0 or 1"
+ .endif
+ .if \push_lr & 1 != \push_lr
+ .error "push_lr may be either 0 or 1"
+ .endif
+ .if \first != -1
+ .if \last == -1
+ /* Upper-bound not provided: Set upper = lower. */
+ _epilogue \first, \first, \push_ip, \push_lr
+ .exitm
+ .endif
+ .if \last != \first
+ .if \last >= 13
+ .error "SP cannot be in the save list"
+ .endif
+ .if \push_ip
+ .if \push_lr
+ /* Case 1: pop register range, ip and lr registers. */
+ pop {r\first-r\last, ip, lr}
+ .cfi_restore 14
+ .cfi_register 143, 12
+ cfirestorelist \first, \last
+ .else // !\push_lr
+ /* Case 2: pop register range and ip register. */
+ pop {r\first-r\last, ip}
+ .cfi_register 143, 12
+ cfirestorelist \first, \last
+ .endif
+ .else // !\push_ip
+ .if \push_lr
+ /* Case 3: pop register range and lr register. */
+ pop {r\first-r\last, lr}
+ .cfi_restore 14
+ cfirestorelist \first, \last
+ .else // !\push_lr
+ /* Case 4: pop register range. */
+ pop {r\first-r\last}
+ cfirestorelist \first, \last
+ .endif
+ .endif
+ .else // \last == \first
+ .if \push_ip
+ .if \push_lr
+ /* Case 5: pop single GP register plus ip and lr registers. */
+ pop {r\first, ip, lr}
+ .cfi_restore 14
+ .cfi_register 143, 12
+ cfirestorelist \first, \first
+ .else // !\push_lr
+ /* Case 6: pop single GP register plus ip register. */
+ pop {r\first, ip}
+ .cfi_register 143, 12
+ cfirestorelist \first, \first
+ .endif
+ .else // !\push_ip
+ .if \push_lr
+ /* Case 7: pop single GP register plus lr register. */
+ pop {r\first, lr}
+ .cfi_restore 14
+ cfirestorelist \first, \first
+ .else // !\push_lr
+ /* Case 8: pop single GP register. */
+ pop {r\first}
+ cfirestorelist \first, \first
+ .endif
+ .endif
+ .endif
+ .else // \first == -1
+ .if \push_ip
+ .if \push_lr
+ /* Case 9: pop ip and lr registers. */
+ pop {ip, lr}
+ .cfi_restore 14
+ .cfi_register 143, 12
+ .else // !\push_lr
+ /* Case 10: pop ip register. */
+ pop {ip}
+ .cfi_register 143, 12
+ .endif
+ .else // !\push_ip
+ .if \push_lr
+ /* Case 11: pop lr register. */
+ pop {lr}
+ .cfi_restore 14
+ .endif
+ .endif
+ .endif
+#if HAVE_PAC_LEAF
+ aut ip, lr, sp
+#endif /* HAVE_PAC_LEAF */
+ bx lr
+.endm
+
+/* Clean up expressions in 'last'. */
+.macro _preprocess_reglist1 first:req, last:req, push_ip:req, push_lr:req, reglist_op:req
+ .if \last == 0
+ \reglist_op \first, 0, \push_ip, \push_lr
+ .elseif \last == 1
+ \reglist_op \first, 1, \push_ip, \push_lr
+ .elseif \last == 2
+ \reglist_op \first, 2, \push_ip, \push_lr
+ .elseif \last == 3
+ \reglist_op \first, 3, \push_ip, \push_lr
+ .elseif \last == 4
+ \reglist_op \first, 4, \push_ip, \push_lr
+ .elseif \last == 5
+ \reglist_op \first, 5, \push_ip, \push_lr
+ .elseif \last == 6
+ \reglist_op \first, 6, \push_ip, \push_lr
+ .elseif \last == 7
+ \reglist_op \first, 7, \push_ip, \push_lr
+ .elseif \last == 8
+ \reglist_op \first, 8, \push_ip, \push_lr
+ .elseif \last == 9
+ \reglist_op \first, 9, \push_ip, \push_lr
+ .elseif \last == 10
+ \reglist_op \first, 10, \push_ip, \push_lr
+ .elseif \last == 11
+ \reglist_op \first, 11, \push_ip, \push_lr
+ .else
+ .error "last (\last) out of range"
+ .endif
+.endm
+
+/* Clean up expressions in 'first'. */
+.macro _preprocess_reglist first:req, last, push_ip=0, push_lr=0, reglist_op:req
+ .ifb \last
+ _preprocess_reglist \first \first \push_ip \push_lr
+ .else
+ .if \first > \last
+ .error "last (\last) must be at least as great as first (\first)"
+ .endif
+ .if \first == 0
+ _preprocess_reglist1 0, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 1
+ _preprocess_reglist1 1, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 2
+ _preprocess_reglist1 2, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 3
+ _preprocess_reglist1 3, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 4
+ _preprocess_reglist1 4, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 5
+ _preprocess_reglist1 5, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 6
+ _preprocess_reglist1 6, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 7
+ _preprocess_reglist1 7, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 8
+ _preprocess_reglist1 8, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 9
+ _preprocess_reglist1 9, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 10
+ _preprocess_reglist1 10, \last, \push_ip, \push_lr, \reglist_op
+ .elseif \first == 11
+ _preprocess_reglist1 11, \last, \push_ip, \push_lr, \reglist_op
+ .else
+ .error "first (\first) out of range"
+ .endif
+ .endif
+.endm
+
+.macro _align8 first, last, push_ip=0, push_lr=0, reglist_op=_prologue
+ .ifb \first
+ .ifnb \last
+ .error "can't have last (\last) without specifying first"
+ .else // \last not blank
+ .if ((\push_ip + \push_lr) % 2) == 0
+ \reglist_op first=-1, last=-1, push_ip=\push_ip, push_lr=\push_lr
+ .exitm
+ .else // ((\push_ip + \push_lr) % 2) odd
+ _align8 2, 2, \push_ip, \push_lr, \reglist_op
+ .exitm
+ .endif // ((\push_ip + \push_lr) % 2) == 0
+ .endif // .ifnb \last
+ .endif // .ifb \first
+
+ .ifb \last
+ _align8 \first, \first, \push_ip, \push_lr, \reglist_op
+ .else
+ .if \push_ip & 1 <> \push_ip
+ .error "push_ip may be 0 or 1"
+ .endif
+ .if \push_lr & 1 <> \push_lr
+ .error "push_lr may be 0 or 1"
+ .endif
+ .ifeq (\last - \first + \push_ip + \push_lr) % 2
+ .if \first == 0
+ .error "Alignment required and first register is r0"
+ .exitm
+ .endif
+ _preprocess_reglist \first-1, \last, \push_ip, \push_lr, \reglist_op
+ .else
+ _preprocess_reglist \first \last, \push_ip, \push_lr, \reglist_op
+ .endif
+ .endif
+.endm
+
+.macro prologue first, last, push_ip=PAC_LEAF_PUSH_IP, push_lr=0, align8=STACK_ALIGN_ENFORCE
+ .if \align8
+ _align8 \first, \last, \push_ip, \push_lr, _prologue
+ .else
+ _prologue first=\first, last=\last, push_ip=\push_ip, push_lr=\push_lr
+ .endif
+.endm
+
+.macro epilogue first, last, push_ip=PAC_LEAF_PUSH_IP, push_lr=0, align8=STACK_ALIGN_ENFORCE
+ .if \align8
+ _align8 \first, \last, \push_ip, \push_lr, reglist_op=_epilogue
+ .else
+ _epilogue first=\first, last=\last, push_ip=\push_ip, push_lr=\push_lr
+ .endif
+.endm
+
+#define ENTRY_ALIGN(name, alignment) \
+ .global name; \
+ .type name,%function; \
+ .align alignment; \
+ name: \
+ .fnstart; \
+ .cfi_startproc;
+
+#define ENTRY(name) ENTRY_ALIGN(name, 6)
+
+#define ENTRY_ALIAS(name) \
+ .global name; \
+ .type name,%function; \
+ name:
+
+#if defined (IS_LEAF)
+# define END_UNWIND .cantunwind;
+#else
+# define END_UNWIND
+#endif
+
+#define END(name) \
+ .cfi_endproc; \
+ END_UNWIND \
+ .fnend; \
+ .size name, .-name;
+
+#define L(l) .L ## l
+
+#endif
diff --git a/contrib/arm-optimized-routines/string/arm/check-arch.S b/contrib/arm-optimized-routines/string/arm/check-arch.S
index 1cff9345e343..95516710fb85 100644
--- a/contrib/arm-optimized-routines/string/arm/check-arch.S
+++ b/contrib/arm-optimized-routines/string/arm/check-arch.S
@@ -1,10 +1,13 @@
/*
* check ARCH setting.
*
- * Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if !__arm__
# error ARCH setting does not match the compiler.
#endif
+
+/* For attributes that may affect ABI. */
+#include "asmdefs.h"
diff --git a/contrib/arm-optimized-routines/string/arm/memchr.S b/contrib/arm-optimized-routines/string/arm/memchr.S
index 3f1ac4df136f..823d6013eb35 100644
--- a/contrib/arm-optimized-routines/string/arm/memchr.S
+++ b/contrib/arm-optimized-routines/string/arm/memchr.S
@@ -1,8 +1,8 @@
/*
* memchr - scan memory for a character
*
- * Copyright (c) 2010-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2010-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/*
@@ -23,7 +23,11 @@
@ Removed unneeded cbz from align loop
.syntax unified
+#if __ARM_ARCH >= 8 && __ARM_ARCH_PROFILE == 'M'
+ /* keep config inherited from -march= */
+#else
.arch armv7-a
+#endif
@ this lets us check a flag in a 00/ff byte easily in either endianness
#ifdef __ARMEB__
@@ -32,6 +36,8 @@
#define CHARTSTMASK(c) 1<<(c*8)
#endif
.thumb
+#include "asmdefs.h"
+
@ ---------------------------------------------------------------------------
.thumb_func
@@ -39,11 +45,14 @@
.p2align 4,,15
.global __memchr_arm
.type __memchr_arm,%function
+ .fnstart
+ .cfi_startproc
__memchr_arm:
@ r0 = start of memory to scan
@ r1 = character to look for
@ r2 = length
@ returns r0 = pointer to character or NULL if not found
+ prologue
and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char
cmp r2,#16 @ If it's short don't bother with anything clever
@@ -64,6 +73,11 @@ __memchr_arm:
10:
@ At this point, we are aligned, we know we have at least 8 bytes to work with
push {r4,r5,r6,r7}
+ .cfi_adjust_cfa_offset 16
+ .cfi_rel_offset 4, 0
+ .cfi_rel_offset 5, 4
+ .cfi_rel_offset 6, 8
+ .cfi_rel_offset 7, 12
orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes
orr r1, r1, r1, lsl #16
bic r4, r2, #7 @ Number of double words to work with
@@ -83,6 +97,11 @@ __memchr_arm:
bne 15b @ (Flags from the subs above) If not run out of bytes then go around again
pop {r4,r5,r6,r7}
+ .cfi_restore 7
+ .cfi_restore 6
+ .cfi_restore 5
+ .cfi_restore 4
+ .cfi_adjust_cfa_offset -16
and r1,r1,#0xff @ Get r1 back to a single character from the expansion above
and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done
@@ -97,16 +116,25 @@ __memchr_arm:
bne 21b @ on r2 flags
40:
+ .cfi_remember_state
movs r0,#0 @ not found
- bx lr
+ epilogue
50:
+ .cfi_restore_state
+ .cfi_remember_state
subs r0,r0,#1 @ found
- bx lr
+ epilogue
60: @ We're here because the fast path found a hit - now we have to track down exactly which word it was
@ r0 points to the start of the double word after the one that was tested
@ r5 has the 00/ff pattern for the first word, r6 has the chained value
+ .cfi_restore_state @ Standard post-prologue state
+ .cfi_adjust_cfa_offset 16
+ .cfi_rel_offset 4, 0
+ .cfi_rel_offset 5, 4
+ .cfi_rel_offset 6, 8
+ .cfi_rel_offset 7, 12
cmp r5, #0
itte eq
moveq r5, r6 @ the end is in the 2nd word
@@ -126,7 +154,15 @@ __memchr_arm:
61:
pop {r4,r5,r6,r7}
+ .cfi_restore 7
+ .cfi_restore 6
+ .cfi_restore 5
+ .cfi_restore 4
+ .cfi_adjust_cfa_offset -16
subs r0,r0,#1
- bx lr
+ epilogue
+ .cfi_endproc
+ .cantunwind
+ .fnend
.size __memchr_arm, . - __memchr_arm
diff --git a/contrib/arm-optimized-routines/string/arm/memcpy.S b/contrib/arm-optimized-routines/string/arm/memcpy.S
index 86e64938edb1..2423cfd69061 100644
--- a/contrib/arm-optimized-routines/string/arm/memcpy.S
+++ b/contrib/arm-optimized-routines/string/arm/memcpy.S
@@ -1,8 +1,8 @@
/*
* memcpy - copy memory area
*
- * Copyright (c) 2013-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2013-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/*
@@ -17,7 +17,7 @@
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
.syntax unified
/* This implementation requires ARM state. */
diff --git a/contrib/arm-optimized-routines/string/arm/memset.S b/contrib/arm-optimized-routines/string/arm/memset.S
index 11e927368fd1..487b9d6a8f6c 100644
--- a/contrib/arm-optimized-routines/string/arm/memset.S
+++ b/contrib/arm-optimized-routines/string/arm/memset.S
@@ -2,7 +2,7 @@
* memset - fill memory with a constant
*
* Copyright (c) 2010-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
/*
diff --git a/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S b/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S
index b75d4143db57..4d55306810ad 100644
--- a/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S
+++ b/contrib/arm-optimized-routines/string/arm/strcmp-armv6m.S
@@ -1,10 +1,12 @@
/*
* strcmp for ARMv6-M (optimized for performance, not size)
*
- * Copyright (c) 2014-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2014-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
+#include "asmdefs.h"
+
#if __ARM_ARCH == 6 && __ARM_ARCH_6M__ >= 1
.thumb_func
diff --git a/contrib/arm-optimized-routines/string/arm/strcmp.S b/contrib/arm-optimized-routines/string/arm/strcmp.S
index 51443e343058..74b3d235fb18 100644
--- a/contrib/arm-optimized-routines/string/arm/strcmp.S
+++ b/contrib/arm-optimized-routines/string/arm/strcmp.S
@@ -1,8 +1,8 @@
/*
* strcmp for ARMv7
*
- * Copyright (c) 2012-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2012-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if __ARM_ARCH >= 7 && __ARM_ARCH_ISA_ARM >= 1
@@ -12,7 +12,7 @@
is sufficiently aligned. Use saturating arithmetic to optimize
the compares. */
-#include "../asmdefs.h"
+#include "asmdefs.h"
/* Build Options:
STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first
@@ -26,6 +26,11 @@
#define STRCMP_NO_PRECHECK 0
+/* Ensure the .cantunwind directive is prepended to .fnend.
+ Leaf functions cannot throw exceptions - EHABI only supports
+ synchronous exceptions. */
+#define IS_LEAF
+
/* This version uses Thumb-2 code. */
.thumb
.syntax unified
@@ -98,8 +103,9 @@
ldrd r4, r5, [sp], #16
.cfi_restore 4
.cfi_restore 5
+ .cfi_adjust_cfa_offset -16
sub result, result, r1, lsr #24
- bx lr
+ epilogue push_ip=HAVE_PAC_LEAF
#else
/* To use the big-endian trick we'd have to reverse all three words.
that's slower than this approach. */
@@ -119,21 +125,15 @@
ldrd r4, r5, [sp], #16
.cfi_restore 4
.cfi_restore 5
+ .cfi_adjust_cfa_offset -16
sub result, result, r1
- bx lr
+ epilogue push_ip=HAVE_PAC_LEAF
#endif
.endm
- .p2align 5
-L(strcmp_start_addr):
-#if STRCMP_NO_PRECHECK == 0
-L(fastpath_exit):
- sub r0, r2, r3
- bx lr
- nop
-#endif
-ENTRY_ALIGN (__strcmp_arm, 0)
+ENTRY(__strcmp_arm)
+ prologue push_ip=HAVE_PAC_LEAF
#if STRCMP_NO_PRECHECK == 0
ldrb r2, [src1]
ldrb r3, [src2]
@@ -143,13 +143,13 @@ ENTRY_ALIGN (__strcmp_arm, 0)
bne L(fastpath_exit)
#endif
strd r4, r5, [sp, #-16]!
- .cfi_def_cfa_offset 16
- .cfi_offset 4, -16
- .cfi_offset 5, -12
+ .cfi_adjust_cfa_offset 16
+ .cfi_rel_offset 4, 0
+ .cfi_rel_offset 5, 4
orr tmp1, src1, src2
strd r6, r7, [sp, #8]
- .cfi_offset 6, -8
- .cfi_offset 7, -4
+ .cfi_rel_offset 6, 8
+ .cfi_rel_offset 7, 12
mvn const_m1, #0
lsl r2, tmp1, #29
cbz r2, L(loop_aligned8)
@@ -318,10 +318,19 @@ L(misaligned_exit):
mov result, tmp1
ldr r4, [sp], #16
.cfi_restore 4
- bx lr
+ .cfi_adjust_cfa_offset -16
+ epilogue push_ip=HAVE_PAC_LEAF
#if STRCMP_NO_PRECHECK == 0
+L(fastpath_exit):
+ .cfi_restore_state
+ .cfi_remember_state
+ sub r0, r2, r3
+ epilogue push_ip=HAVE_PAC_LEAF
+
L(aligned_m1):
+ .cfi_restore_state
+ .cfi_remember_state
add src2, src2, #4
#endif
L(src1_aligned):
@@ -368,9 +377,9 @@ L(overlap3):
/* R6/7 Not used in this sequence. */
.cfi_restore 6
.cfi_restore 7
+ .cfi_adjust_cfa_offset -16
neg result, result
- bx lr
-
+ epilogue push_ip=HAVE_PAC_LEAF
6:
.cfi_restore_state
S2LO data1, data1, #24
@@ -445,7 +454,8 @@ L(strcmp_done_equal):
/* R6/7 not used in this sequence. */
.cfi_restore 6
.cfi_restore 7
- bx lr
+ .cfi_adjust_cfa_offset -16
+ epilogue push_ip=HAVE_PAC_LEAF
L(strcmp_tail):
.cfi_restore_state
@@ -467,8 +477,9 @@ L(strcmp_tail):
/* R6/7 not used in this sequence. */
.cfi_restore 6
.cfi_restore 7
+ .cfi_adjust_cfa_offset -16
sub result, result, data2, lsr #24
- bx lr
+ epilogue push_ip=HAVE_PAC_LEAF
END (__strcmp_arm)
diff --git a/contrib/arm-optimized-routines/string/arm/strcpy.c b/contrib/arm-optimized-routines/string/arm/strcpy.c
index 02cf94ff4be0..b5728a2534f0 100644
--- a/contrib/arm-optimized-routines/string/arm/strcpy.c
+++ b/contrib/arm-optimized-routines/string/arm/strcpy.c
@@ -2,7 +2,7 @@
* strcpy
*
* Copyright (c) 2008-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if defined (__thumb2__) && !defined (__thumb__)
diff --git a/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S b/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S
index 5ad30c941586..5eb8671bdc8b 100644
--- a/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S
+++ b/contrib/arm-optimized-routines/string/arm/strlen-armv6t2.S
@@ -1,8 +1,8 @@
/*
* strlen - calculate the length of a string
*
- * Copyright (c) 2010-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2010-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if __ARM_ARCH >= 6 && __ARM_ARCH_ISA_THUMB == 2
@@ -13,7 +13,7 @@
*/
-#include "../asmdefs.h"
+#include "asmdefs.h"
#ifdef __ARMEB__
#define S2LO lsl
@@ -23,6 +23,11 @@
#define S2HI lsl
#endif
+/* Ensure the .cantunwind directive is prepended to .fnend.
+ Leaf functions cannot throw exceptions - EHABI only supports
+ synchronous exceptions. */
+#define IS_LEAF
+
/* This code requires Thumb. */
.thumb
.syntax unified
@@ -41,8 +46,8 @@
#define tmp2 r5
ENTRY (__strlen_armv6t2)
+ prologue 4 5 push_ip=HAVE_PAC_LEAF
pld [srcin, #0]
- strd r4, r5, [sp, #-8]!
bic src, srcin, #7
mvn const_m1, #0
ands tmp1, srcin, #7 /* (8 - bytes) to alignment. */
@@ -92,6 +97,7 @@ L(start_realigned):
beq L(loop_aligned)
L(null_found):
+ .cfi_remember_state
cmp data1a, #0
itt eq
addeq result, result, #4
@@ -100,11 +106,11 @@ L(null_found):
rev data1a, data1a
#endif
clz data1a, data1a
- ldrd r4, r5, [sp], #8
add result, result, data1a, lsr #3 /* Bits -> Bytes. */
- bx lr
+ epilogue 4 5 push_ip=HAVE_PAC_LEAF
L(misaligned8):
+ .cfi_restore_state
ldrd data1a, data1b, [src]
and tmp2, tmp1, #3
rsb result, tmp1, #0
diff --git a/contrib/arm-optimized-routines/string/bench/memcpy.c b/contrib/arm-optimized-routines/string/bench/memcpy.c
index 6bd27633e224..1468663e51cd 100644
--- a/contrib/arm-optimized-routines/string/bench/memcpy.c
+++ b/contrib/arm-optimized-routines/string/bench/memcpy.c
@@ -1,8 +1,8 @@
/*
* memcpy benchmark.
*
- * Copyright (c) 2020-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2020-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define _GNU_SOURCE
diff --git a/contrib/arm-optimized-routines/string/bench/memset.c b/contrib/arm-optimized-routines/string/bench/memset.c
index 2d6196931307..990e23ba9a36 100644
--- a/contrib/arm-optimized-routines/string/bench/memset.c
+++ b/contrib/arm-optimized-routines/string/bench/memset.c
@@ -2,7 +2,7 @@
* memset benchmark.
*
* Copyright (c) 2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define _GNU_SOURCE
diff --git a/contrib/arm-optimized-routines/string/bench/strlen.c b/contrib/arm-optimized-routines/string/bench/strlen.c
index b7eee6e905ab..f05d0d5b89e6 100644
--- a/contrib/arm-optimized-routines/string/bench/strlen.c
+++ b/contrib/arm-optimized-routines/string/bench/strlen.c
@@ -2,7 +2,7 @@
* strlen benchmark.
*
* Copyright (c) 2020-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#define _GNU_SOURCE
diff --git a/contrib/arm-optimized-routines/string/include/benchlib.h b/contrib/arm-optimized-routines/string/include/benchlib.h
index 0f2ce2eb6bce..f1bbea388cd2 100644
--- a/contrib/arm-optimized-routines/string/include/benchlib.h
+++ b/contrib/arm-optimized-routines/string/include/benchlib.h
@@ -2,7 +2,7 @@
* Benchmark support functions.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/include/stringlib.h b/contrib/arm-optimized-routines/string/include/stringlib.h
index 85e630279ceb..f41a46446888 100644
--- a/contrib/arm-optimized-routines/string/include/stringlib.h
+++ b/contrib/arm-optimized-routines/string/include/stringlib.h
@@ -1,8 +1,8 @@
/*
* Public API.
*
- * Copyright (c) 2019-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stddef.h>
diff --git a/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c b/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c
index d8c02d92d626..c45fa6662a77 100644
--- a/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c
+++ b/contrib/arm-optimized-routines/string/test/__mtag_tag_region.c
@@ -2,7 +2,7 @@
* __mtag_tag_region test.
*
* Copyright (c) 2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if __ARM_FEATURE_MEMORY_TAGGING && WANT_MTE_TEST
diff --git a/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c b/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c
index 221c223a2f31..a4a7861620d1 100644
--- a/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c
+++ b/contrib/arm-optimized-routines/string/test/__mtag_tag_zero_region.c
@@ -2,7 +2,7 @@
* __mtag_tag_zero_region test.
*
* Copyright (c) 2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if __ARM_FEATURE_MEMORY_TAGGING && WANT_MTE_TEST
diff --git a/contrib/arm-optimized-routines/string/test/memchr.c b/contrib/arm-optimized-routines/string/test/memchr.c
index 0ff77f5710bf..c6a94481c0ad 100644
--- a/contrib/arm-optimized-routines/string/test/memchr.c
+++ b/contrib/arm-optimized-routines/string/test/memchr.c
@@ -2,7 +2,7 @@
* memchr test.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/memcmp.c b/contrib/arm-optimized-routines/string/test/memcmp.c
index 7a7cf9cff35a..f9236b83a60d 100644
--- a/contrib/arm-optimized-routines/string/test/memcmp.c
+++ b/contrib/arm-optimized-routines/string/test/memcmp.c
@@ -2,7 +2,7 @@
* memcmp test.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/memcpy.c b/contrib/arm-optimized-routines/string/test/memcpy.c
index 21b35b990b9b..fa15a95b2bda 100644
--- a/contrib/arm-optimized-routines/string/test/memcpy.c
+++ b/contrib/arm-optimized-routines/string/test/memcpy.c
@@ -1,8 +1,8 @@
/*
* memcpy test.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/memmove.c b/contrib/arm-optimized-routines/string/test/memmove.c
index 12a70574c7c5..5d509c03affa 100644
--- a/contrib/arm-optimized-routines/string/test/memmove.c
+++ b/contrib/arm-optimized-routines/string/test/memmove.c
@@ -1,8 +1,8 @@
/*
* memmove test.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/memrchr.c b/contrib/arm-optimized-routines/string/test/memrchr.c
index adf96f049cc9..4171a56daefd 100644
--- a/contrib/arm-optimized-routines/string/test/memrchr.c
+++ b/contrib/arm-optimized-routines/string/test/memrchr.c
@@ -2,7 +2,7 @@
* memchr test.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _GNU_SOURCE
diff --git a/contrib/arm-optimized-routines/string/test/memset.c b/contrib/arm-optimized-routines/string/test/memset.c
index f1721442dbaf..5543f44bb026 100644
--- a/contrib/arm-optimized-routines/string/test/memset.c
+++ b/contrib/arm-optimized-routines/string/test/memset.c
@@ -2,7 +2,7 @@
* memset test.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/mte.h b/contrib/arm-optimized-routines/string/test/mte.h
index e67cbd9d2d40..40b0ecf6c194 100644
--- a/contrib/arm-optimized-routines/string/test/mte.h
+++ b/contrib/arm-optimized-routines/string/test/mte.h
@@ -2,7 +2,7 @@
* Memory tagging testing code.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef __TEST_MTE_H
diff --git a/contrib/arm-optimized-routines/string/test/stpcpy.c b/contrib/arm-optimized-routines/string/test/stpcpy.c
index 1b61245bf8df..0300892a1f3c 100644
--- a/contrib/arm-optimized-routines/string/test/stpcpy.c
+++ b/contrib/arm-optimized-routines/string/test/stpcpy.c
@@ -1,8 +1,8 @@
/*
* stpcpy test.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _GNU_SOURCE
diff --git a/contrib/arm-optimized-routines/string/test/strchr.c b/contrib/arm-optimized-routines/string/test/strchr.c
index f3ae982ef0ad..66180acfb57c 100644
--- a/contrib/arm-optimized-routines/string/test/strchr.c
+++ b/contrib/arm-optimized-routines/string/test/strchr.c
@@ -2,7 +2,7 @@
* strchr test.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/strchrnul.c b/contrib/arm-optimized-routines/string/test/strchrnul.c
index 6c30ab2123f1..aad0bf59da66 100644
--- a/contrib/arm-optimized-routines/string/test/strchrnul.c
+++ b/contrib/arm-optimized-routines/string/test/strchrnul.c
@@ -2,7 +2,7 @@
* strchrnul test.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _GNU_SOURCE
diff --git a/contrib/arm-optimized-routines/string/test/strcmp.c b/contrib/arm-optimized-routines/string/test/strcmp.c
index 0262397dec88..4aa95f4f2f1d 100644
--- a/contrib/arm-optimized-routines/string/test/strcmp.c
+++ b/contrib/arm-optimized-routines/string/test/strcmp.c
@@ -1,8 +1,8 @@
/*
* strcmp test.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/strcpy.c b/contrib/arm-optimized-routines/string/test/strcpy.c
index 6de3bed590ef..af297f90396a 100644
--- a/contrib/arm-optimized-routines/string/test/strcpy.c
+++ b/contrib/arm-optimized-routines/string/test/strcpy.c
@@ -1,8 +1,8 @@
/*
* strcpy test.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/stringtest.h b/contrib/arm-optimized-routines/string/test/stringtest.h
index fe855fc21736..6bb7e1fdfeca 100644
--- a/contrib/arm-optimized-routines/string/test/stringtest.h
+++ b/contrib/arm-optimized-routines/string/test/stringtest.h
@@ -2,7 +2,7 @@
* Common string test code.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <ctype.h>
diff --git a/contrib/arm-optimized-routines/string/test/strlen.c b/contrib/arm-optimized-routines/string/test/strlen.c
index 6278380f26df..47ef3dcf0ef0 100644
--- a/contrib/arm-optimized-routines/string/test/strlen.c
+++ b/contrib/arm-optimized-routines/string/test/strlen.c
@@ -1,15 +1,14 @@
/*
* strlen test.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
-#include <sys/mman.h>
#include <limits.h>
#include "mte.h"
#include "stringlib.h"
diff --git a/contrib/arm-optimized-routines/string/test/strncmp.c b/contrib/arm-optimized-routines/string/test/strncmp.c
index f8c2167f8f1e..4bbab6f93450 100644
--- a/contrib/arm-optimized-routines/string/test/strncmp.c
+++ b/contrib/arm-optimized-routines/string/test/strncmp.c
@@ -1,8 +1,8 @@
/*
* strncmp test.
*
- * Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * Copyright (c) 2019-2022, Arm Limited.
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/test/strnlen.c b/contrib/arm-optimized-routines/string/test/strnlen.c
index 0dea00eaf8e3..a800fd1993cd 100644
--- a/contrib/arm-optimized-routines/string/test/strnlen.c
+++ b/contrib/arm-optimized-routines/string/test/strnlen.c
@@ -2,7 +2,7 @@
* strnlen test.
*
* Copyright (c) 2019-2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#ifndef _GNU_SOURCE
diff --git a/contrib/arm-optimized-routines/string/test/strrchr.c b/contrib/arm-optimized-routines/string/test/strrchr.c
index fedbdc52fcc1..580ca497f8a4 100644
--- a/contrib/arm-optimized-routines/string/test/strrchr.c
+++ b/contrib/arm-optimized-routines/string/test/strrchr.c
@@ -2,7 +2,7 @@
* strrchr test.
*
* Copyright (c) 2019-2021, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#include <stdint.h>
diff --git a/contrib/arm-optimized-routines/string/x86_64/check-arch.S b/contrib/arm-optimized-routines/string/x86_64/check-arch.S
index 26ade0a0c7db..5afcf7b7ee54 100644
--- a/contrib/arm-optimized-routines/string/x86_64/check-arch.S
+++ b/contrib/arm-optimized-routines/string/x86_64/check-arch.S
@@ -2,7 +2,7 @@
* check ARCH setting.
*
* Copyright (c) 2020, Arm Limited.
- * SPDX-License-Identifier: MIT
+ * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
*/
#if !__x86_64__