aboutsummaryrefslogtreecommitdiff
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/support.S66
-rw-r--r--sys/conf/files.amd641
2 files changed, 66 insertions, 1 deletions
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index b623fba277db..4c0f7da87ef8 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -697,6 +697,72 @@ ENTRY(fillw)
ret
END(fillw)
+/*
+ * strlen(string)
+ * %rdi
+ *
+ * Uses the ((x - 0x01....01) & ~x & 0x80....80) trick.
+ *
+ * 0x01....01 is replaced with 0x0 - 0x01....01 so that it can be added
+ * with leaq.
+ *
+ * For a description see either:
+ * - "Hacker's Delight" by Henry S. Warren, Jr.
+ * - "Optimizing subroutines in assembly language: An optimization guide for x86 platforms"
+ * by Agner Fog
+ *
+ * The latter contains a 32-bit variant of the same algorithm coded in assembly for i386.
+ */
+ENTRY(strlen)
+ PUSH_FRAME_POINTER
+ movabsq $0xfefefefefefefeff,%r8
+ movabsq $0x8080808080808080,%r9
+
+ movq %rdi,%r10
+ movq %rdi,%rcx
+ testb $7,%dil
+ jz 2f
+
+ /*
+ * Handle misaligned reads: align to 8 and fill
+ * the spurious bytes.
+ */
+ andq $~7,%rdi
+ movq (%rdi),%r11
+ shlq $3,%rcx
+ movq $-1,%rdx
+ shlq %cl,%rdx
+ notq %rdx
+ orq %rdx,%r11
+
+ leaq (%r11,%r8),%rcx
+ notq %r11
+ andq %r11,%rcx
+ andq %r9,%rcx
+ jnz 3f
+
+ /*
+ * Main loop.
+ */
+ ALIGN_TEXT
+1:
+ leaq 8(%rdi),%rdi
+2:
+ movq (%rdi),%r11
+ leaq (%r11,%r8),%rcx
+ notq %r11
+ andq %r11,%rcx
+ andq %r9,%rcx
+ jz 1b
+3:
+ bsfq %rcx,%rcx
+ shrq $3,%rcx
+ leaq (%rcx,%rdi),%rax
+ subq %r10,%rax
+ POP_FRAME_POINTER
+ ret
+END(strlen)
+
/*****************************************************************************/
/* copyout and fubyte family */
/*****************************************************************************/
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index 98a78a8b1ef9..395f501198f8 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -390,7 +390,6 @@ isa/syscons_isa.c optional sc
isa/vga_isa.c optional vga
kern/imgact_aout.c optional compat_aout
kern/link_elf_obj.c standard
-libkern/strlen.c standard
#
# IA32 binary support
#