aboutsummaryrefslogtreecommitdiff
path: root/sys/amd64/amd64/support.S
diff options
context:
space:
mode:
authorMateusz Guzik <mjg@FreeBSD.org>2018-11-30 20:58:08 +0000
committerMateusz Guzik <mjg@FreeBSD.org>2018-11-30 20:58:08 +0000
commit94243af2da71038d14884433111d8724df14e69f (patch)
tree0a42bb13eb3a50f35e1741a9cca5530066db45f1 /sys/amd64/amd64/support.S
parent1489776d43e7decd9a0a6ba5faae437b0159f68e (diff)
downloadsrc-94243af2da71038d14884433111d8724df14e69f.tar.gz
src-94243af2da71038d14884433111d8724df14e69f.zip
amd64: handle small memmove buffers with overlapping stores
Handling sizes of > 32 backwards will be updated later. Reviewed by: kib (kernel part) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D18387
Notes
Notes: svn path=/head/; revision=341351
Diffstat (limited to 'sys/amd64/amd64/support.S')
-rw-r--r--sys/amd64/amd64/support.S91
1 files changed, 51 insertions, 40 deletions
diff --git a/sys/amd64/amd64/support.S b/sys/amd64/amd64/support.S
index 2d519a7a3768..c5b167d17830 100644
--- a/sys/amd64/amd64/support.S
+++ b/sys/amd64/amd64/support.S
@@ -205,11 +205,19 @@ END(memcmp)
* rsi - source
* rdx - count
*
- * The macro possibly clobbers the above and: rcx, r8.
- * It does not clobber rax, r10 nor r11.
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
*/
.macro MEMMOVE erms overlap begin end
\begin
+
+ /*
+ * For sizes 0..32 all data is read before it is written, so there
+ * is no correctness issue with direction of copying.
+ */
+ cmpq $32,%rcx
+ jbe 101632f
+
.if \overlap == 1
movq %rdi,%r8
subq %rsi,%r8
@@ -217,13 +225,10 @@ END(memcmp)
jb 2f
.endif
- cmpq $32,%rcx
- jb 1016f
-
cmpq $256,%rcx
ja 1256f
-1032:
+103200:
movq (%rsi),%rdx
movq %rdx,(%rdi)
movq 8(%rsi),%rdx
@@ -236,56 +241,62 @@ END(memcmp)
leaq 32(%rdi),%rdi
subq $32,%rcx
cmpq $32,%rcx
- jae 1032b
+ jae 103200b
cmpb $0,%cl
- jne 1016f
+ jne 101632f
\end
ret
ALIGN_TEXT
-1016:
+101632:
cmpb $16,%cl
- jl 1008f
+ jl 100816f
movq (%rsi),%rdx
+ movq 8(%rsi),%r8
+ movq -16(%rsi,%rcx),%r9
+ movq -8(%rsi,%rcx),%r10
movq %rdx,(%rdi)
- movq 8(%rsi),%rdx
- movq %rdx,8(%rdi)
- subb $16,%cl
- jz 1000f
- leaq 16(%rsi),%rsi
- leaq 16(%rdi),%rdi
-1008:
+ movq %r8,8(%rdi)
+ movq %r9,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100816:
cmpb $8,%cl
- jl 1004f
+ jl 100408f
movq (%rsi),%rdx
+ movq -8(%rsi,%rcx),%r8
movq %rdx,(%rdi)
- subb $8,%cl
- jz 1000f
- leaq 8(%rsi),%rsi
- leaq 8(%rdi),%rdi
-1004:
+ movq %r8,-8(%rdi,%rcx,)
+ \end
+ ret
+ ALIGN_TEXT
+100408:
cmpb $4,%cl
- jl 1002f
+ jl 100204f
movl (%rsi),%edx
+ movl -4(%rsi,%rcx),%r8d
movl %edx,(%rdi)
- subb $4,%cl
- jz 1000f
- leaq 4(%rsi),%rsi
- leaq 4(%rdi),%rdi
-1002:
+ movl %r8d,-4(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100204:
cmpb $2,%cl
- jl 1001f
- movw (%rsi),%dx
+ jl 100001f
+ movzwl (%rsi),%edx
+ movzwl -2(%rsi,%rcx),%r8d
movw %dx,(%rdi)
- subb $2,%cl
- jz 1000f
- leaq 2(%rsi),%rsi
- leaq 2(%rdi),%rdi
-1001:
+ movw %r8w,-2(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100001:
cmpb $1,%cl
- jl 1000f
+ jl 100000f
movb (%rsi),%dl
movb %dl,(%rdi)
-1000:
+100000:
\end
ret
@@ -299,8 +310,8 @@ END(memcmp)
rep
movsq
movq %rdx,%rcx
- andb $7,%cl /* any bytes left? */
- jne 1004b
+ andl $7,%ecx /* any bytes left? */
+ jne 100408b
.endif
\end
ret