diff options
author | Mateusz Guzik <mjg@FreeBSD.org> | 2021-01-31 15:46:18 +0000 |
---|---|---|
committer | Mateusz Guzik <mjg@FreeBSD.org> | 2021-02-04 17:59:10 +0000 |
commit | 88a580ebeea10e7bf9eb823dbbf48349eb8da540 (patch) | |
tree | 35b6e9ccab07dba004e52a9332ccc6b6f913dc10 | |
parent | 068f2402d28bf2ddee884c83be1dff3a7631569b (diff) |
amd64: move memcmp checks upfront
This is a tradeoff which saves jumps for smaller sizes while making
the 8-16 range slower (roughly in line with the other cases).
Tested with glibc test suite.
For example size 3 (most common with vfs namecache) (ops/s):
before: 407086026
after: 461391995
The regressed range of 8-16 (with 8 as example):
before: 540850489
after: 461671032
(cherry picked from commit f1be262ec11c1c35e6485f432415b5b52adb505d)
-rw-r--r-- | lib/libc/amd64/string/memcmp.S | 50 | ||||
-rw-r--r-- | sys/amd64/amd64/support.S | 52 |
2 files changed, 57 insertions, 45 deletions
diff --git a/lib/libc/amd64/string/memcmp.S b/lib/libc/amd64/string/memcmp.S index 67c7df280679..0c8121f9d885 100644 --- a/lib/libc/amd64/string/memcmp.S +++ b/lib/libc/amd64/string/memcmp.S @@ -45,9 +45,25 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -57,9 +73,8 @@ ENTRY(memcmp) cmpq %r8,%r9 jne 10081608f ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -69,9 +84,8 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 10040804f ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -81,15 +95,7 @@ ENTRY(memcmp) cmpl %r8d,%r9d jne 1f ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -110,7 +116,7 @@ ALIGN_TEXT cmpq %r8,%r9 jne 10163224f ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -140,7 +146,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -152,29 +158,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S index 49baa50ac294..b623fba277db 100644 --- a/sys/amd64/amd64/support.S +++ b/sys/amd64/amd64/support.S @@ -93,9 +93,26 @@ ENTRY(memcmp) cmpq $16,%rdx ja 101632f -100816: cmpb $8,%dl - jl 100408f + jg 100816f + + cmpb $4,%dl + jg 100408f + + cmpb $2,%dl + jge 100204f + + cmpb $1,%dl + jl 100000f + movzbl (%rdi),%eax + movzbl (%rsi),%r8d + subl %r8d,%eax +100000: + POP_FRAME_POINTER + ret + + ALIGN_TEXT +100816: movq (%rdi),%r8 movq (%rsi),%r9 cmpq %r8,%r9 @@ -106,9 +123,8 @@ ENTRY(memcmp) jne 10081608f POP_FRAME_POINTER ret + ALIGN_TEXT 100408: - cmpb $4,%dl - jl 100204f movl (%rdi),%r8d movl (%rsi),%r9d cmpl %r8d,%r9d @@ -119,9 +135,8 @@ ENTRY(memcmp) jne 10040804f POP_FRAME_POINTER ret + ALIGN_TEXT 100204: - cmpb $2,%dl - jl 100001f movzwl (%rdi),%r8d movzwl (%rsi),%r9d cmpl %r8d,%r9d @@ -132,16 +147,7 @@ ENTRY(memcmp) jne 1f POP_FRAME_POINTER ret -100001: - cmpb $1,%dl - jl 100000f - movzbl (%rdi),%eax - movzbl (%rsi),%r8d - subl %r8d,%eax -100000: - POP_FRAME_POINTER - ret -ALIGN_TEXT + ALIGN_TEXT 101632: cmpq $32,%rdx ja 103200f @@ -163,7 +169,7 @@ ALIGN_TEXT jne 10163224f POP_FRAME_POINTER ret -ALIGN_TEXT + ALIGN_TEXT 103200: movq (%rdi),%r8 movq 8(%rdi),%r9 @@ -194,7 +200,7 @@ ALIGN_TEXT * * Before we compute it we narrow down the range (16 -> 8 -> 4 bytes). */ -ALIGN_TEXT + ALIGN_TEXT 10320016: leaq 16(%rdi),%rdi leaq 16(%rsi),%rsi @@ -206,29 +212,29 @@ ALIGN_TEXT leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10081608: 10163224: leaq -8(%rdi,%rdx),%rdi leaq -8(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163216: leaq -16(%rdi,%rdx),%rdi leaq -16(%rsi,%rdx),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10163208: leaq 8(%rdi),%rdi leaq 8(%rsi),%rsi jmp 80f -ALIGN_TEXT + ALIGN_TEXT 10040804: leaq -4(%rdi,%rdx),%rdi leaq -4(%rsi,%rdx),%rsi jmp 1f -ALIGN_TEXT + ALIGN_TEXT 80: movl (%rdi),%r8d movl (%rsi),%r9d |