aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeandro Lupori <luporl@FreeBSD.org>2020-01-15 20:25:52 +0000
committerLeandro Lupori <luporl@FreeBSD.org>2020-01-15 20:25:52 +0000
commite16c18650cdc45fd7d9ff1edfe34ab32e7658299 (patch)
tree55ab06f5860b73fea4af19cfd5cf47fbcacc70c9
parent181e35008cfbe18b2cfccb08d349415d8c21d8b6 (diff)
downloadsrc-e16c18650cdc.tar.gz
src-e16c18650cdc.zip
[PPC64] memcpy/memmove/bcopy optimization
For copies shorter than 512 bytes, the data is copied using plain ld/std instructions. For 512 bytes or more, the copy is done in 3 phases: Phase 1: copy from the src buffer until it's aligned at a 16-byte boundary Phase 2: copy as many aligned 64-byte blocks from the src buffer as possible Phase 3: copy the remaining data, if any In phase 2, this code uses VSX instructions when available. Otherwise, it uses ldx/stdx. Submitted by: Luis Pires <lffpires_ruabrasil.org> (original version) Reviewed by: jhibbits Differential Revision: https://reviews.freebsd.org/D15118
Notes
Notes: svn path=/head/; revision=356767
-rw-r--r--lib/libc/powerpc64/string/Makefile.inc9
-rw-r--r--lib/libc/powerpc64/string/bcopy.S306
-rw-r--r--lib/libc/powerpc64/string/bcopy_resolver.c68
-rw-r--r--lib/libc/powerpc64/string/bcopy_vsx.S61
-rw-r--r--lib/libc/powerpc64/string/memcpy.S122
-rw-r--r--lib/libc/powerpc64/string/memcpy_resolver.c4
-rw-r--r--lib/libc/powerpc64/string/memcpy_vsx.S65
-rw-r--r--lib/libc/powerpc64/string/memmove.S4
-rw-r--r--lib/libc/powerpc64/string/memmove_resolver.c4
-rw-r--r--lib/libc/powerpc64/string/memmove_vsx.S5
10 files changed, 648 insertions, 0 deletions
diff --git a/lib/libc/powerpc64/string/Makefile.inc b/lib/libc/powerpc64/string/Makefile.inc
index 0e21da4286cb..7cedf0dc3d63 100644
--- a/lib/libc/powerpc64/string/Makefile.inc
+++ b/lib/libc/powerpc64/string/Makefile.inc
@@ -1,6 +1,15 @@
# $FreeBSD$
MDSRCS+= \
+ bcopy.S \
+ bcopy_vsx.S \
+ bcopy_resolver.c \
+ memcpy.S \
+ memcpy_vsx.S \
+ memcpy_resolver.c \
+ memmove.S \
+ memmove_vsx.S \
+ memmove_resolver.c \
strcpy_arch_2_05.S \
strcpy.c \
strcpy_resolver.c \
diff --git a/lib/libc/powerpc64/string/bcopy.S b/lib/libc/powerpc64/string/bcopy.S
new file mode 100644
index 000000000000..bb860c098feb
--- /dev/null
+++ b/lib/libc/powerpc64/string/bcopy.S
@@ -0,0 +1,306 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#define BLOCK_SIZE_BITS 6
+#define BLOCK_SIZE (1 << BLOCK_SIZE_BITS)
+#define BLOCK_SIZE_MASK (BLOCK_SIZE - 1)
+
+#define MULTI_PHASE_THRESHOLD 512
+
+#ifndef FN_NAME
+#ifdef MEMMOVE
+#define FN_NAME __memmove
+WEAK_REFERENCE(__memmove, memmove);
+#else
+#define FN_NAME __bcopy
+WEAK_REFERENCE(__bcopy, bcopy);
+#endif
+#endif
+
+/*
+ * r3: dst
+ * r4: src
+ * r5: len
+ */
+
+ENTRY(FN_NAME)
+ cmpld %r3, %r4 /* src == dst? nothing to do */
+ beqlr-
+ cmpdi %r5, 0 /* len == 0? nothing to do */
+ beqlr-
+
+#ifdef MEMMOVE
+ std %r3, -8(%r1) /* save dst */
+#else /* bcopy: swap src/dst */
+ mr %r0, %r3
+ mr %r3, %r4
+ mr %r4, %r0
+#endif
+
+ cmpldi %r5, MULTI_PHASE_THRESHOLD
+ bge .Lmulti_phase
+
+ /* align src */
+ cmpd %r4, %r3 /* forward or backward copy? */
+ blt .Lbackward_align
+
+ .align 5
+.Lalign:
+ andi. %r0, %r4, 15
+ beq .Lsingle_copy
+ lbz %r0, 0(%r4)
+ addi %r4, %r4, 1
+ stb %r0, 0(%r3)
+ addi %r3, %r3, 1
+ addi %r5, %r5, -1
+ cmpdi %r5, 0
+ beq- .Ldone
+ b .Lalign
+
+.Lbackward_align:
+ /* advance src and dst to end (past last byte) */
+ add %r3, %r3, %r5
+ add %r4, %r4, %r5
+ .align 5
+.Lbackward_align_loop:
+ andi. %r0, %r4, 15
+ beq .Lbackward_single_copy
+ lbzu %r0, -1(%r4)
+ addi %r5, %r5, -1
+ stbu %r0, -1(%r3)
+ cmpdi %r5, 0
+ beq- .Ldone
+ b .Lbackward_align_loop
+
+.Lsingle_copy:
+ /* forward copy */
+ li %r0, 1
+ li %r8, 16
+ li %r9, 0
+ b .Lsingle_phase
+
+.Lbackward_single_copy:
+ /* backward copy */
+ li %r0, -1
+ li %r8, -16
+ li %r9, -15
+ /* point src and dst to last byte */
+ addi %r3, %r3, -1
+ addi %r4, %r4, -1
+
+.Lsingle_phase:
+ srdi. %r6, %r5, 4 /* number of 16-bytes */
+ beq .Lsingle_1
+
+ /* pre-adjustment */
+ add %r3, %r3, %r9
+ add %r4, %r4, %r9
+
+ mtctr %r6
+ .align 5
+.Lsingle_16_loop:
+ ld %r6, 0(%r4)
+ ld %r7, 8(%r4)
+ add %r4, %r4, %r8
+ std %r6, 0(%r3)
+ std %r7, 8(%r3)
+ add %r3, %r3, %r8
+ bdnz .Lsingle_16_loop
+
+ /* post-adjustment */
+ sub %r3, %r3, %r9
+ sub %r4, %r4, %r9
+
+.Lsingle_1:
+ andi. %r6, %r5, 0x0f /* number of 1-bytes */
+ beq .Ldone /* 1-bytes == 0? done */
+
+ mtctr %r6
+ .align 5
+.Lsingle_1_loop:
+ lbz %r6, 0(%r4)
+ add %r4, %r4, %r0 /* increment */
+ stb %r6, 0(%r3)
+ add %r3, %r3, %r0 /* increment */
+ bdnz .Lsingle_1_loop
+
+.Ldone:
+#ifdef MEMMOVE
+ ld %r3, -8(%r1) /* restore dst */
+#endif
+ blr
+
+
+.Lmulti_phase:
+ /* set up multi-phase copy parameters */
+
+ /* r7 = bytes before the aligned section of the buffer */
+ andi. %r6, %r4, 15
+ subfic %r7, %r6, 16
+ /* r8 = bytes in and after the aligned section of the buffer */
+ sub %r8, %r5, %r7
+ /* r9 = bytes after the aligned section of the buffer */
+ andi. %r9, %r8, BLOCK_SIZE_MASK
+ /* r10 = BLOCKS in the aligned section of the buffer */
+ srdi %r10, %r8, BLOCK_SIZE_BITS
+
+ /* forward or backward copy? */
+ cmpd %r4, %r3
+ blt .Lbackward_multi_copy
+
+ /* set up forward copy parameters */
+ std %r7, -32(%r1) /* bytes to copy in phase 1 */
+ std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
+ std %r9, -48(%r1) /* bytes to copy in phase 3 */
+
+ li %r0, 1 /* increment for phases 1 and 3 */
+ li %r5, BLOCK_SIZE /* increment for phase 2 */
+
+ /* op offsets for phase 2 */
+ li %r7, 0
+ li %r8, 16
+ li %r9, 32
+ li %r10, 48
+
+ std %r8, -16(%r1) /* 16-byte increment (16) */
+ std %r7, -24(%r1) /* 16-byte pre/post adjustment (0) */
+
+ b .Lphase1
+
+.Lbackward_multi_copy:
+ /* set up backward copy parameters */
+ std %r9, -32(%r1) /* bytes to copy in phase 1 */
+ std %r10, -40(%r1) /* BLOCKS to copy in phase 2 */
+ std %r7, -48(%r1) /* bytes to copy in phase 3 */
+
+ li %r0, -1 /* increment for phases 1 and 3 */
+ add %r6, %r5, %r0 /* r6 = len - 1 */
+ li %r5, -BLOCK_SIZE /* increment for phase 2 */
+ /* advance src and dst to the last position */
+ add %r3, %r3, %r6
+ add %r4, %r4, %r6
+
+ /* op offsets for phase 2 */
+ li %r7, -15
+ li %r8, -31
+ li %r9, -47
+ li %r10, -63
+
+ add %r6, %r7, %r0 /* r6 = -16 */
+ std %r6, -16(%r1) /* 16-byte increment (-16) */
+ std %r7, -24(%r1) /* 16-byte pre/post adjustment (-15) */
+
+.Lphase1:
+ ld %r6, -32(%r1) /* bytes to copy in phase 1 */
+ cmpldi %r6, 0 /* r6 == 0? skip phase 1 */
+ beq+ .Lphase2
+
+ mtctr %r6
+ .align 5
+.Lphase1_loop:
+ lbz %r6, 0(%r4)
+ add %r4, %r4, %r0 /* phase 1 increment */
+ stb %r6, 0(%r3)
+ add %r3, %r3, %r0 /* phase 1 increment */
+ bdnz .Lphase1_loop
+
+.Lphase2:
+ ld %r6, -40(%r1) /* BLOCKS to copy in phase 2 */
+ cmpldi %r6, 0 /* %r6 == 0? skip phase 2 */
+ beq .Lphase3
+
+#ifdef FN_PHASE2
+FN_PHASE2
+#else
+ /* save registers */
+ std %r14, -56(%r1)
+ std %r15, -64(%r1)
+ std %r16, -72(%r1)
+ std %r17, -80(%r1)
+ std %r18, -88(%r1)
+ std %r19, -96(%r1)
+ std %r20, -104(%r1)
+ std %r21, -112(%r1)
+
+ addi %r18, %r7, 8
+ addi %r19, %r8, 8
+ addi %r20, %r9, 8
+ addi %r21, %r10, 8
+
+ mtctr %r6
+ .align 5
+.Lphase2_loop:
+ ldx %r14, %r7, %r4
+ ldx %r15, %r18, %r4
+ ldx %r16, %r8, %r4
+ ldx %r17, %r19, %r4
+ stdx %r14, %r7, %r3
+ stdx %r15, %r18, %r3
+ stdx %r16, %r8, %r3
+ stdx %r17, %r19, %r3
+
+ ldx %r14, %r9, %r4
+ ldx %r15, %r20, %r4
+ ldx %r16, %r10, %r4
+ ldx %r17, %r21, %r4
+ stdx %r14, %r9, %r3
+ stdx %r15, %r20, %r3
+ stdx %r16, %r10, %r3
+ stdx %r17, %r21, %r3
+
+ add %r4, %r4, %r5 /* phase 2 increment */
+ add %r3, %r3, %r5 /* phase 2 increment */
+
+ bdnz .Lphase2_loop
+
+ /* restore registers */
+ ld %r14, -56(%r1)
+ ld %r15, -64(%r1)
+ ld %r16, -72(%r1)
+ ld %r17, -80(%r1)
+ ld %r18, -88(%r1)
+ ld %r19, -96(%r1)
+ ld %r20, -104(%r1)
+ ld %r21, -112(%r1)
+#endif
+
+.Lphase3:
+ /* load registers for transitioning into the single-phase logic */
+ ld %r5, -48(%r1) /* bytes to copy in phase 3 */
+ ld %r8, -16(%r1) /* 16-byte increment */
+ ld %r9, -24(%r1) /* 16-byte pre/post adjustment */
+ b .Lsingle_phase
+
+END(FN_NAME)
+
+ .section .note.GNU-stack,"",%progbits
+
diff --git a/lib/libc/powerpc64/string/bcopy_resolver.c b/lib/libc/powerpc64/string/bcopy_resolver.c
new file mode 100644
index 000000000000..aaaf81a1c2a7
--- /dev/null
+++ b/lib/libc/powerpc64/string/bcopy_resolver.c
@@ -0,0 +1,68 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <machine/cpu.h>
+#include <machine/ifunc.h>
+
+#define _CAT(a,b) a##b
+#define CAT(a,b) _CAT(a,b)
+#define CAT3(a,b,c) CAT(CAT(a,b),c)
+
+#ifdef MEMCOPY
+#define FN_NAME memcpy
+#define FN_RET void *
+#define FN_PARAMS (void *dst, const void *src, size_t len)
+
+#elif defined(MEMMOVE)
+#define FN_NAME memmove
+#define FN_RET void *
+#define FN_PARAMS (void *dst, const void *src, size_t len)
+
+#else
+#define FN_NAME bcopy
+#define FN_RET void
+#define FN_PARAMS (const void *src, void *dst, size_t len)
+#endif
+
+#define FN_NAME_NOVSX CAT(__, FN_NAME)
+#define FN_NAME_VSX CAT3(__, FN_NAME, _vsx)
+
+FN_RET FN_NAME_NOVSX FN_PARAMS;
+FN_RET FN_NAME_VSX FN_PARAMS;
+
+DEFINE_UIFUNC(, FN_RET, FN_NAME, FN_PARAMS)
+{
+ if (cpu_features & PPC_FEATURE_HAS_VSX)
+ return (FN_NAME_VSX);
+ else
+ return (FN_NAME_NOVSX);
+}
diff --git a/lib/libc/powerpc64/string/bcopy_vsx.S b/lib/libc/powerpc64/string/bcopy_vsx.S
new file mode 100644
index 000000000000..fca9d3192e39
--- /dev/null
+++ b/lib/libc/powerpc64/string/bcopy_vsx.S
@@ -0,0 +1,61 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef FN_NAME
+#define FN_NAME __bcopy_vsx
+#endif
+
+/*
+ * r3: dst
+ * r4: src
+ * r5: block increment
+ * r6: blocks to copy
+ * r7/r8/r9/r10: 16-byte offsets to copy
+ */
+
+#define FN_PHASE2 \
+ mtctr %r6 ;\
+ .align 5 ;\
+.Lphase2_loop: ;\
+ lxvd2x %vs6, %r7, %r4 ;\
+ lxvd2x %vs7, %r8, %r4 ;\
+ lxvd2x %vs8, %r9, %r4 ;\
+ lxvd2x %vs9, %r10, %r4 ;\
+ stxvd2x %vs6, %r7, %r3 ;\
+ stxvd2x %vs7, %r8, %r3 ;\
+ stxvd2x %vs8, %r9, %r3 ;\
+ stxvd2x %vs9, %r10, %r3 ;\
+ /* phase 2 increment */ ;\
+ add %r4, %r4, %r5 ;\
+ add %r3, %r3, %r5 ;\
+ \
+ bdnz .Lphase2_loop ;\
+
+#include "bcopy.S"
diff --git a/lib/libc/powerpc64/string/memcpy.S b/lib/libc/powerpc64/string/memcpy.S
new file mode 100644
index 000000000000..2ea676bf67a9
--- /dev/null
+++ b/lib/libc/powerpc64/string/memcpy.S
@@ -0,0 +1,122 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <machine/asm.h>
+__FBSDID("$FreeBSD$");
+
+#ifndef FN_NAME
+#define FN_NAME __memcpy
+WEAK_REFERENCE(__memcpy, memcpy);
+#define BLOCK_BITS 4
+#endif
+
+#define BLOCK_BYTES (1 << BLOCK_BITS)
+#define BLOCK_MASK (BLOCK_BYTES - 1)
+
+/*
+ * r3: dst
+ * r4: src
+ * r5: len
+ */
+ENTRY(FN_NAME)
+ cmpdi %r5, 0 /* len == 0? nothing to do */
+ beqlr-
+
+ mr %r8, %r3 /* save dst */
+
+ /* align src */
+.Lalignment_loop:
+ lbz %r6, 0(%r4)
+ stb %r6, 0(%r3)
+ addi %r3, %r3, 1
+ addi %r4, %r4, 1
+ addi %r5, %r5, -1
+ cmpdi %r5, 0
+ beq .Lexit
+ andi. %r0, %r4, BLOCK_MASK
+ bne .Lalignment_loop
+
+ /* r7 = remaining, non-block, bytes */
+ andi. %r7, %r5, BLOCK_MASK
+
+ /* Check if there are blocks of BLOCK_BYTES to be copied */
+ xor. %r5, %r5, %r7
+ beq .Lcopy_remaining_fix_index_byte
+
+#ifdef FN_COPY_LOOP
+FN_COPY_LOOP
+#else
+ /* Setup to copy word with ldu and stdu */
+ ld %r6, 0(%r4)
+ ld %r9, 8(%r4)
+ std %r6, 0(%r3)
+ std %r9, 8(%r3)
+ addi %r5, %r5, -BLOCK_BYTES
+ cmpd %r5, 0
+ beq .Lcopy_remaining_fix_index_word
+
+ srdi %r5, %r5, BLOCK_BITS
+ mtctr %r5
+.Lcopy_word:
+ ldu %r6, 16(%r4)
+ ld %r9, 8(%r4)
+ stdu %r6, 16(%r3)
+ std %r9, 8(%r3)
+ bdnz .Lcopy_word
+
+.Lcopy_remaining_fix_index_word:
+ /* Check if there are remaining bytes */
+ cmpd %r7, 0
+ beq .Lexit
+ addi %r3, %r3, BLOCK_MASK
+ addi %r4, %r4, BLOCK_MASK
+ b .Lcopy_remaining
+#endif
+
+.Lcopy_remaining_fix_index_byte:
+ addi %r4, %r4, -1
+ addi %r3, %r3, -1
+
+ /* Copy remaining bytes */
+.Lcopy_remaining:
+ mtctr %r7
+.Lcopy_remaining_loop:
+ lbzu %r6, 1(%r4)
+ stbu %r6, 1(%r3)
+ bdnz .Lcopy_remaining_loop
+
+.Lexit:
+ /* Restore dst */
+ mr %r3, %r8
+ blr
+
+END(FN_NAME)
+
+ .section .note.GNU-stack,"",%progbits
+
diff --git a/lib/libc/powerpc64/string/memcpy_resolver.c b/lib/libc/powerpc64/string/memcpy_resolver.c
new file mode 100644
index 000000000000..d5f4303f3069
--- /dev/null
+++ b/lib/libc/powerpc64/string/memcpy_resolver.c
@@ -0,0 +1,4 @@
+/* $FreeBSD$ */
+
+#define MEMCOPY
+#include "bcopy_resolver.c"
diff --git a/lib/libc/powerpc64/string/memcpy_vsx.S b/lib/libc/powerpc64/string/memcpy_vsx.S
new file mode 100644
index 000000000000..708eb7d9f45d
--- /dev/null
+++ b/lib/libc/powerpc64/string/memcpy_vsx.S
@@ -0,0 +1,65 @@
+/*-
+ * Copyright (c) 2018 Instituto de Pesquisas Eldorado
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of the author nor the names of its contributors may
+ * be used to endorse or promote products derived from this software
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#define FN_NAME __memcpy_vsx
+#define BLOCK_BITS 6
+
+/*
+ * r5: bytes to copy (multiple of BLOCK_BYTES)
+ *
+ */
+#define FN_COPY_LOOP \
+ /* Load CTR with number of blocks */ \
+ srdi %r5, %r5, BLOCK_BITS ;\
+ mtctr %r5 ;\
+ /* Prepare indexes to load and store data */ \
+ xor %r6, %r6, %r6 ;\
+ li %r9, 16 ;\
+ li %r10, 32 ;\
+ li %r11, 48 ;\
+.Lcopy_vsx_loop: \
+ lxvd2x %vs6, %r6, %r4 ;\
+ lxvd2x %vs7, %r9, %r4 ;\
+ lxvd2x %vs8, %r10, %r4 ;\
+ lxvd2x %vs9, %r11, %r4 ;\
+ stxvd2x %vs6, %r6, %r3 ;\
+ stxvd2x %vs7, %r9, %r3 ;\
+ stxvd2x %vs8, %r10, %r3 ;\
+ stxvd2x %vs9, %r11, %r3 ;\
+ \
+ addi %r3, %r3, BLOCK_BYTES ;\
+ addi %r4, %r4, BLOCK_BYTES ;\
+ bdnz .Lcopy_vsx_loop ;\
+ \
+ /* Check if there is remaining bytes */ \
+ cmpd %r7, 0 ;\
+ beq .Lexit ;\
+
+#include "memcpy.S"
diff --git a/lib/libc/powerpc64/string/memmove.S b/lib/libc/powerpc64/string/memmove.S
new file mode 100644
index 000000000000..e0b8c6c502f1
--- /dev/null
+++ b/lib/libc/powerpc64/string/memmove.S
@@ -0,0 +1,4 @@
+/* $FreeBSD$ */
+
+#define MEMMOVE
+#include "bcopy.S"
diff --git a/lib/libc/powerpc64/string/memmove_resolver.c b/lib/libc/powerpc64/string/memmove_resolver.c
new file mode 100644
index 000000000000..3f5fd973bd0a
--- /dev/null
+++ b/lib/libc/powerpc64/string/memmove_resolver.c
@@ -0,0 +1,4 @@
+/* $FreeBSD$ */
+
+#define MEMMOVE
+#include "bcopy_resolver.c"
diff --git a/lib/libc/powerpc64/string/memmove_vsx.S b/lib/libc/powerpc64/string/memmove_vsx.S
new file mode 100644
index 000000000000..bf8a90da468a
--- /dev/null
+++ b/lib/libc/powerpc64/string/memmove_vsx.S
@@ -0,0 +1,5 @@
+/* $FreeBSD$ */
+
+#define MEMMOVE
+#define FN_NAME __memmove_vsx
+#include "bcopy_vsx.S"