aboutsummaryrefslogtreecommitdiff
path: root/lib/libc/amd64/string/strlen.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/amd64/string/strlen.S')
-rw-r--r--lib/libc/amd64/string/strlen.S58
1 files changed, 54 insertions, 4 deletions
diff --git a/lib/libc/amd64/string/strlen.S b/lib/libc/amd64/string/strlen.S
index 1d2428e3420e..cc248af001ac 100644
--- a/lib/libc/amd64/string/strlen.S
+++ b/lib/libc/amd64/string/strlen.S
@@ -1,10 +1,15 @@
-/*
+/*-
* Written by Mateusz Guzik <mjg@freebsd.org>
+ * Copyright (c) 2023 The FreeBSD Foundation
+ *
+ * Portions of this software were developed by Robert Clausecker
+ * <fuz@FreeBSD.org> under sponsorship from the FreeBSD Foundation.
+ *
* Public domain.
*/
#include <machine/asm.h>
-__FBSDID("$FreeBSD$");
+#include "amd64_archlevel.h"
/*
* Note: this routine was written with kernel use in mind (read: no simd),
@@ -14,6 +19,11 @@ __FBSDID("$FreeBSD$");
#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+ARCHFUNCS(strlen)
+ ARCHFUNC(strlen, scalar)
+ ARCHFUNC(strlen, baseline)
+ENDARCHFUNCS(strlen)
+
/*
* strlen(string)
* %rdi
@@ -30,7 +40,7 @@ __FBSDID("$FreeBSD$");
*
* The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
*/
-ENTRY(strlen)
+ARCHENTRY(strlen, scalar)
movabsq $0xfefefefefefefeff,%r8
movabsq $0x8080808080808080,%r9
@@ -76,6 +86,46 @@ ENTRY(strlen)
leaq (%rcx,%rdi),%rax
subq %r10,%rax
ret
-END(strlen)
+ARCHEND(strlen, scalar)
+
+ARCHENTRY(strlen, baseline)
+ mov %rdi, %rcx
+ pxor %xmm1, %xmm1
+ and $~0xf, %rdi # align string
+ pcmpeqb (%rdi), %xmm1 # compare head (with junk before string)
+ mov %rcx, %rsi # string pointer copy for later
+ and $0xf, %ecx # amount of bytes rdi is past 16 byte alignment
+ pmovmskb %xmm1, %eax
+ add $32, %rdi # advance to next iteration
+ shr %cl, %eax # clear out matches in junk bytes
+ test %eax, %eax # any match? (can't use ZF from SHR as CL=0 is possible)
+ jnz 2f
+
+ ALIGN_TEXT
+1: pxor %xmm1, %xmm1
+ pcmpeqb -16(%rdi), %xmm1 # find NUL bytes
+ pmovmskb %xmm1, %eax
+ test %eax, %eax # were any NUL bytes present?
+ jnz 3f
+
+ /* the same unrolled once more */
+ pxor %xmm1, %xmm1
+ pcmpeqb (%rdi), %xmm1
+ pmovmskb %xmm1, %eax
+ add $32, %rdi # advance to next iteration
+ test %eax, %eax
+ jz 1b
+
+ /* match found in loop body */
+ sub $16, %rdi # undo half the advancement
+3: tzcnt %eax, %eax # find the first NUL byte
+ sub %rsi, %rdi # string length until beginning of (%rdi)
+ lea -16(%rdi, %rax, 1), %rax # that plus loc. of NUL byte: full string length
+ ret
+
+ /* match found in head */
+2: tzcnt %eax, %eax # compute string length
+ ret
+ARCHEND(strlen, baseline)
.section .note.GNU-stack,"",%progbits