aboutsummaryrefslogtreecommitdiff
path: root/lib/libc/amd64/string/memmove.S
diff options
context:
space:
mode:
Diffstat (limited to 'lib/libc/amd64/string/memmove.S')
-rw-r--r--lib/libc/amd64/string/memmove.S308
1 files changed, 308 insertions, 0 deletions
diff --git a/lib/libc/amd64/string/memmove.S b/lib/libc/amd64/string/memmove.S
new file mode 100644
index 000000000000..7878e6e9bee6
--- /dev/null
+++ b/lib/libc/amd64/string/memmove.S
@@ -0,0 +1,308 @@
+/*-
+ * Copyright (c) 2018 The FreeBSD Foundation
+ *
+ * This software was developed by Mateusz Guzik <mjg@FreeBSD.org>
+ * under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <machine/asm.h>
+/*
+ * Note: this routine was written with kernel use in mind (read: no simd),
+ * it is only present in userspace as a temporary measure until something
+ * better gets imported.
+ */
+
+#define ALIGN_TEXT .p2align 4,0x90 /* 16-byte alignment, nop filled */
+
+/*
+ * memmove(dst, src, cnt)
+ * rdi, rsi, rdx
+ */
+
+/*
+ * Register state at entry is supposed to be as follows:
+ * rdi - destination
+ * rsi - source
+ * rdx - count
+ *
+ * The macro possibly clobbers the above and: rcx, r8, r9, 10
+ * It does not clobber rax nor r11.
+ */
+.macro MEMMOVE erms overlap begin end
+ \begin
+
+ /*
+ * For sizes 0..32 all data is read before it is written, so there
+ * is no correctness issue with direction of copying.
+ */
+ cmpq $32,%rcx
+ jbe 101632f
+
+.if \overlap == 1
+ movq %rdi,%r8
+ subq %rsi,%r8
+ cmpq %rcx,%r8 /* overlapping && src < dst? */
+ jb 2f
+.endif
+
+ cmpq $256,%rcx
+ ja 1256f
+
+ ALIGN_TEXT
+103200:
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
+ movq 8(%rsi),%rdx
+ movq %rdx,8(%rdi)
+ movq 16(%rsi),%rdx
+ movq %rdx,16(%rdi)
+ movq 24(%rsi),%rdx
+ movq %rdx,24(%rdi)
+ leaq 32(%rsi),%rsi
+ leaq 32(%rdi),%rdi
+ subq $32,%rcx
+ cmpq $32,%rcx
+ jae 103200b
+ cmpb $0,%cl
+ jne 101632f
+ \end
+ ret
+ ALIGN_TEXT
+101632:
+ cmpb $16,%cl
+ jl 100816f
+ movq (%rsi),%rdx
+ movq 8(%rsi),%r8
+ movq -16(%rsi,%rcx),%r9
+ movq -8(%rsi,%rcx),%r10
+ movq %rdx,(%rdi)
+ movq %r8,8(%rdi)
+ movq %r9,-16(%rdi,%rcx)
+ movq %r10,-8(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100816:
+ cmpb $8,%cl
+ jl 100408f
+ movq (%rsi),%rdx
+ movq -8(%rsi,%rcx),%r8
+ movq %rdx,(%rdi)
+ movq %r8,-8(%rdi,%rcx,)
+ \end
+ ret
+ ALIGN_TEXT
+100408:
+ cmpb $4,%cl
+ jl 100204f
+ movl (%rsi),%edx
+ movl -4(%rsi,%rcx),%r8d
+ movl %edx,(%rdi)
+ movl %r8d,-4(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100204:
+ cmpb $2,%cl
+ jl 100001f
+ movzwl (%rsi),%edx
+ movzwl -2(%rsi,%rcx),%r8d
+ movw %dx,(%rdi)
+ movw %r8w,-2(%rdi,%rcx)
+ \end
+ ret
+ ALIGN_TEXT
+100001:
+ cmpb $1,%cl
+ jl 100000f
+ movb (%rsi),%dl
+ movb %dl,(%rdi)
+100000:
+ \end
+ ret
+
+ ALIGN_TEXT
+1256:
+ testb $15,%dil
+ jnz 100f
+.if \erms == 1
+ rep
+ movsb
+.else
+ shrq $3,%rcx /* copy by 64-bit words */
+ rep
+ movsq
+ movq %rdx,%rcx
+ andl $7,%ecx /* any bytes left? */
+ jne 100408b
+.endif
+ \end
+ ret
+100:
+ movq (%rsi),%r8
+ movq 8(%rsi),%r9
+ movq %rdi,%r10
+ movq %rdi,%rcx
+ andq $15,%rcx
+ leaq -16(%rdx,%rcx),%rdx
+ neg %rcx
+ leaq 16(%rdi,%rcx),%rdi
+ leaq 16(%rsi,%rcx),%rsi
+ movq %rdx,%rcx
+.if \erms == 1
+ rep
+ movsb
+ movq %r8,(%r10)
+ movq %r9,8(%r10)
+.else
+ shrq $3,%rcx /* copy by 64-bit words */
+ rep
+ movsq
+ movq %r8,(%r10)
+ movq %r9,8(%r10)
+ movq %rdx,%rcx
+ andl $7,%ecx /* any bytes left? */
+ jne 100408b
+.endif
+ \end
+ ret
+
+.if \overlap == 1
+ /*
+ * Copy backwards.
+ */
+ ALIGN_TEXT
+2:
+ cmpq $256,%rcx
+ ja 2256f
+
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
+
+ cmpq $32,%rcx
+ jb 2016f
+
+ ALIGN_TEXT
+2032:
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
+ movq -8(%rsi),%rdx
+ movq %rdx,-8(%rdi)
+ movq -16(%rsi),%rdx
+ movq %rdx,-16(%rdi)
+ movq -24(%rsi),%rdx
+ movq %rdx,-24(%rdi)
+ leaq -32(%rsi),%rsi
+ leaq -32(%rdi),%rdi
+ subq $32,%rcx
+ cmpq $32,%rcx
+ jae 2032b
+ cmpb $0,%cl
+ jne 2016f
+ \end
+ ret
+ ALIGN_TEXT
+2016:
+ cmpb $16,%cl
+ jl 2008f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
+ movq -8(%rsi),%rdx
+ movq %rdx,-8(%rdi)
+ subb $16,%cl
+ jz 2000f
+ leaq -16(%rsi),%rsi
+ leaq -16(%rdi),%rdi
+2008:
+ cmpb $8,%cl
+ jl 2004f
+ movq (%rsi),%rdx
+ movq %rdx,(%rdi)
+ subb $8,%cl
+ jz 2000f
+ leaq -8(%rsi),%rsi
+ leaq -8(%rdi),%rdi
+2004:
+ cmpb $4,%cl
+ jl 2002f
+ movl 4(%rsi),%edx
+ movl %edx,4(%rdi)
+ subb $4,%cl
+ jz 2000f
+ leaq -4(%rsi),%rsi
+ leaq -4(%rdi),%rdi
+2002:
+ cmpb $2,%cl
+ jl 2001f
+ movw 6(%rsi),%dx
+ movw %dx,6(%rdi)
+ subb $2,%cl
+ jz 2000f
+ leaq -2(%rsi),%rsi
+ leaq -2(%rdi),%rdi
+2001:
+ cmpb $1,%cl
+ jl 2000f
+ movb 7(%rsi),%dl
+ movb %dl,7(%rdi)
+2000:
+ \end
+ ret
+ ALIGN_TEXT
+2256:
+ std
+ leaq -8(%rdi,%rcx),%rdi
+ leaq -8(%rsi,%rcx),%rsi
+ shrq $3,%rcx
+ rep
+ movsq
+ cld
+ movq %rdx,%rcx
+ andb $7,%cl
+ jne 2004b
+ \end
+ ret
+.endif
+.endm
+
+
+.macro MEMMOVE_BEGIN
+ movq %rdi,%rax
+ movq %rdx,%rcx
+.endm
+
+.macro MEMMOVE_END
+.endm
+
+#ifndef MEMCPY
+ENTRY(memmove)
+ MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
+END(memmove)
+#else
+ENTRY(memcpy)
+ MEMMOVE erms=0 overlap=1 begin=MEMMOVE_BEGIN end=MEMMOVE_END
+END(memcpy)
+#endif
+
+ .section .note.GNU-stack,"",%progbits