aboutsummaryrefslogtreecommitdiff
path: root/test/CodeGen/SystemZ/memcpy-01.ll
diff options
context:
space:
mode:
Diffstat (limited to 'test/CodeGen/SystemZ/memcpy-01.ll')
-rw-r--r--test/CodeGen/SystemZ/memcpy-01.ll235
1 files changed, 235 insertions, 0 deletions
diff --git a/test/CodeGen/SystemZ/memcpy-01.ll b/test/CodeGen/SystemZ/memcpy-01.ll
new file mode 100644
index 000000000000..b53ec5452e25
--- /dev/null
+++ b/test/CodeGen/SystemZ/memcpy-01.ll
@@ -0,0 +1,235 @@
+; Test memcpy using MVC.
+;
+; RUN: llc < %s -mtriple=s390x-linux-gnu | FileCheck %s
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8 *nocapture, i8 *nocapture, i32, i32, i1) nounwind
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8 *nocapture, i8 *nocapture, i64, i32, i1) nounwind
+declare void @foo(i8 *, i8 *)
+
+; Test a no-op move, i32 version.
+define void @f1(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f1:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 0, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test a no-op move, i64 version.
+define void @f2(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f2:
+; CHECK-NOT: %r2
+; CHECK-NOT: %r3
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 0, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test a 1-byte move, i32 version.
+define void @f3(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f3:
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 1, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test a 1-byte move, i64 version.
+define void @f4(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f4:
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the upper range of a single MVC, i32 version.
+define void @f5(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f5:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 256, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the upper range of a single MVC, i64 version.
+define void @f6(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f6:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 256, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the first case that needs two MVCs.
+define void @f7(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f7:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(1,%r2), 256(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8 *%dest, i8 *%src, i32 257, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the last-but-one case that needs two MVCs.
+define void @f8(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f8:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(255,%r2), 256(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 511, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the last case that needs two MVCs.
+define void @f9(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f9:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 512, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test an arbitrary value that uses straight-line code.
+define void @f10(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f10:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(255,%r2), 1024(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ ret void
+}
+
+; ...and again in cases where not all parts are in range of MVC.
+define void @f11(i8 *%srcbase, i8 *%destbase) {
+; CHECK-LABEL: f11:
+; CHECK: mvc 4000(256,%r2), 3500(%r3)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4256(%r2)
+; CHECK: mvc 0(256,[[NEWDEST]]), 3756(%r3)
+; CHECK: mvc 256(256,[[NEWDEST]]), 4012(%r3)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4268(%r3)
+; CHECK: mvc 512(256,[[NEWDEST]]), 0([[NEWSRC]])
+; CHECK: mvc 768(255,[[NEWDEST]]), 256([[NEWSRC]])
+; CHECK: br %r14
+ %dest = getelementptr i8 *%srcbase, i64 4000
+ %src = getelementptr i8* %destbase, i64 3500
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ ret void
+}
+
+; ...and again with a destination frame base that goes out of range.
+define void @f12() {
+; CHECK-LABEL: f12:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 4076(256,%r15), 2100(%r15)
+; CHECK: lay [[NEWDEST:%r[1-5]]], 4332(%r15)
+; CHECK: mvc 0(256,[[NEWDEST]]), 2356(%r15)
+; CHECK: mvc 256(256,[[NEWDEST]]), 2612(%r15)
+; CHECK: mvc 512(256,[[NEWDEST]]), 2868(%r15)
+; CHECK: mvc 768(255,[[NEWDEST]]), 3124(%r15)
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+ %arr = alloca [6000 x i8]
+ %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 3900
+ %src = getelementptr [6000 x i8] *%arr, i64 0, i64 1924
+ call void @foo(i8 *%dest, i8 *%src)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ call void @foo(i8 *%dest, i8 *%src)
+ ret void
+}
+
+; ...and again with a source frame base that goes out of range.
+define void @f13() {
+; CHECK-LABEL: f13:
+; CHECK: brasl %r14, foo@PLT
+; CHECK: mvc 200(256,%r15), 3826(%r15)
+; CHECK: mvc 456(256,%r15), 4082(%r15)
+; CHECK: lay [[NEWSRC:%r[1-5]]], 4338(%r15)
+; CHECK: mvc 712(256,%r15), 0([[NEWSRC]])
+; CHECK: mvc 968(256,%r15), 256([[NEWSRC]])
+; CHECK: mvc 1224(255,%r15), 512([[NEWSRC]])
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+ %arr = alloca [6000 x i8]
+ %dest = getelementptr [6000 x i8] *%arr, i64 0, i64 24
+ %src = getelementptr [6000 x i8] *%arr, i64 0, i64 3650
+ call void @foo(i8 *%dest, i8 *%src)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1279, i32 1,
+ i1 false)
+ call void @foo(i8 *%dest, i8 *%src)
+ ret void
+}
+
+; Test the last case that is done using straight-line code.
+define void @f14(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f14:
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: mvc 256(256,%r2), 256(%r3)
+; CHECK: mvc 512(256,%r2), 512(%r3)
+; CHECK: mvc 768(256,%r2), 768(%r3)
+; CHECK: mvc 1024(256,%r2), 1024(%r3)
+; CHECK: mvc 1280(256,%r2), 1280(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1536, i32 1,
+ i1 false)
+ ret void
+}
+
+; Test the first case that is done using a loop.
+define void @f15(i8 *%dest, i8 *%src) {
+; CHECK-LABEL: f15:
+; CHECK: lghi [[COUNT:%r[0-5]]], 6
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 768(%r2)
+; CHECK: mvc 0(256,%r2), 0(%r3)
+; CHECK: la %r2, 256(%r2)
+; CHECK: la %r3, 256(%r3)
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 0(1,%r2), 0(%r3)
+; CHECK: br %r14
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
+ i1 false)
+ ret void
+}
+
+; ...and again with frame bases, where the base must be loaded into a
+; register before the loop.
+define void @f16() {
+; CHECK-LABEL: f16:
+; CHECK: brasl %r14, foo@PLT
+; CHECK-DAG: lghi [[COUNT:%r[0-5]]], 6
+; CHECK-DAG: la [[BASE:%r[0-5]]], 160(%r15)
+; CHECK: [[LABEL:\.L[^:]*]]:
+; CHECK: pfd 2, 2368([[BASE]])
+; CHECK: mvc 1600(256,[[BASE]]), 0([[BASE]])
+; CHECK: la [[BASE]], 256([[BASE]])
+; CHECK: brctg [[COUNT]], [[LABEL]]
+; CHECK: mvc 1600(1,[[BASE]]), 0([[BASE]])
+; CHECK: brasl %r14, foo@PLT
+; CHECK: br %r14
+ %arr = alloca [3200 x i8]
+ %dest = getelementptr [3200 x i8] *%arr, i64 0, i64 1600
+ %src = getelementptr [3200 x i8] *%arr, i64 0, i64 0
+ call void @foo(i8 *%dest, i8 *%src)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8 *%dest, i8 *%src, i64 1537, i32 1,
+ i1 false)
+ call void @foo(i8 *%dest, i8 *%src)
+ ret void
+}