diff options
| author | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
|---|---|---|
| committer | Dimitry Andric <dim@FreeBSD.org> | 2016-07-23 20:41:05 +0000 |
| commit | 01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch) | |
| tree | 4def12e759965de927d963ac65840d663ef9d1ea /test/Transforms/LoopIdiom | |
| parent | f0f4822ed4b66e3579e92a89f368f8fb860e218e (diff) | |
Vendor import of llvm release_39 branch r276489:vendor/llvm/llvm-release_39-r276489
Diffstat (limited to 'test/Transforms/LoopIdiom')
| -rw-r--r-- | test/Transforms/LoopIdiom/AMDGPU/popcnt.ll | 25 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/basic.ll | 37 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/debug-line.ll | 6 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/nontemporal_store.ll | 32 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/pr28196.ll | 26 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/struct.ll | 221 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/struct_pattern.ll | 186 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/unroll.ll | 80 | ||||
| -rw-r--r-- | test/Transforms/LoopIdiom/unwind.ll | 33 |
9 files changed, 642 insertions, 4 deletions
diff --git a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll index e4301bbb06d3..e594c79a3e17 100644 --- a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll +++ b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll @@ -1,4 +1,4 @@ -; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s +; RUN: opt -loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s ; Mostly copied from x86 version. @@ -59,6 +59,29 @@ while.end: ; preds = %while.body, %entry ret i32 %c.0.lcssa } +; CHECK-LABEL: @popcount_i128 +; CHECK: entry +; CHECK: llvm.ctpop.i128 +; CHECK: ret +define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp { +entry: + %tobool3 = icmp eq i128 %a, 0 + br i1 %tobool3, label %while.end, label %while.body + +while.body: ; preds = %entry, %while.body + %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ] + %a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ] + %inc = add nsw i32 %c.05, 1 + %sub = add i128 %a.addr.04, -1 + %and = and i128 %sub, %a.addr.04 + %tobool = icmp eq i128 %and, 0 + br i1 %tobool, label %while.end, label %while.body + +while.end: ; preds = %while.body, %entry + %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ] + ret i32 %c.0.lcssa +} + ; To recognize this pattern: ;int popcount(unsigned long long a, int mydata1, int mydata2) { ; int c = 0; diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll index 27a955175b59..4d584de9c6f7 100644 --- a/test/Transforms/LoopIdiom/basic.ll +++ b/test/Transforms/LoopIdiom/basic.ll @@ -531,3 +531,40 @@ for.cond.cleanup: ; preds = %for.body ; CHECK: call void @llvm.memcpy ; CHECK: ret } + +; Two dimensional nested loop with negative stride should be promoted to one big memset. +define void @test19(i8* nocapture %X) { +entry: + br label %for.cond1.preheader + +for.cond1.preheader: ; preds = %entry, %for.inc4 + %i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ] + %mul = mul nsw i32 %i.06, 100 + br label %for.body3 + +for.body3: ; preds = %for.cond1.preheader, %for.body3 + %j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ] + %add = add nsw i32 %j.05, %mul + %idxprom = sext i32 %add to i64 + %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom + store i8 0, i8* %arrayidx, align 1 + %dec = add nsw i32 %j.05, -1 + %cmp2 = icmp sgt i32 %j.05, 0 + br i1 %cmp2, label %for.body3, label %for.inc4 + +for.inc4: ; preds = %for.body3 + %dec5 = add nsw i32 %i.06, -1 + %cmp = icmp sgt i32 %i.06, 0 + br i1 %cmp, label %for.cond1.preheader, label %for.end6 + +for.end6: ; preds = %for.inc4 + ret void +; CHECK-LABEL: @test19( +; CHECK: entry: +; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false) +; CHECK: ret void +} + +; Validate that "memset_pattern" has the proper attributes. +; CHECK: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]] +; CHECK: [[ATTRS]] = { argmemonly } diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll index a85e48997548..a6a4af4e8d4e 100644 --- a/test/Transforms/LoopIdiom/debug-line.ll +++ b/test/Transforms/LoopIdiom/debug-line.ll @@ -28,11 +28,11 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone !llvm.module.flags = !{!19} -!llvm.dbg.sp = !{!0} +!llvm.dbg.cu = !{!2} -!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3) +!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !18, scope: !1, type: !3) !1 = !DIFile(filename: "li.c", directory: "/private/tmp") -!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9) +!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: FullDebug, file: !18, enums: !9, retainedTypes: !9) !3 = !DISubroutineType(types: !4) !4 = !{null} !5 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6) diff --git a/test/Transforms/LoopIdiom/nontemporal_store.ll b/test/Transforms/LoopIdiom/nontemporal_store.ll new file mode 100644 index 000000000000..a5f8c7c451c7 --- /dev/null +++ b/test/Transforms/LoopIdiom/nontemporal_store.ll @@ -0,0 +1,32 @@ +; RUN: opt -loop-idiom < %s -S | FileCheck %s +; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(loop-idiom)' < %s -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +%struct.bigBlock_t = type { [256 x <4 x float>] } + +; CHECK-LABEL: @test( +; CHECK-NOT: llvm.memset +define void @test(%struct.bigBlock_t* %p) { +entry: + %0 = getelementptr inbounds %struct.bigBlock_t, %struct.bigBlock_t* %p, i64 0, i32 0, i64 0, i64 0 + br label %for.body + +for.body: ; preds = %entry, %for.body + %index.02 = phi i32 [ 0, %entry ], [ %add, %for.body ] + %dst.01 = phi float* [ %0, %entry ], [ %add.ptr2, %for.body ] + %cast.i5 = bitcast float* %dst.01 to <4 x float>* + store <4 x float> zeroinitializer, <4 x float>* %cast.i5, align 16, !nontemporal !0 + %add.ptr1 = getelementptr inbounds float, float* %dst.01, i64 4 + %cast.i = bitcast float* %add.ptr1 to <4 x float>* + store <4 x float> zeroinitializer, <4 x float>* %cast.i, align 16, !nontemporal !0 + %add.ptr2 = getelementptr inbounds float, float* %dst.01, i64 8 + %add = add nuw nsw i32 %index.02, 32 + %cmp = icmp ult i32 %add, 4096 + br i1 %cmp, label %for.body, label %for.end + +for.end: ; preds = %for.body + ret void +} + +!0 = !{i32 1} diff --git a/test/Transforms/LoopIdiom/pr28196.ll b/test/Transforms/LoopIdiom/pr28196.ll new file mode 100644 index 000000000000..10f49fbcd09f --- /dev/null +++ b/test/Transforms/LoopIdiom/pr28196.ll @@ -0,0 +1,26 @@ +; RUN: opt -loop-idiom -S < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +define void @test1() { +entry: + br label %for.body.preheader + +for.body.preheader: ; preds = %for.cond + br label %for.body + +for.body: ; preds = %for.body, %for.body.preheader + %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %add.ptr3 = getelementptr inbounds i32, i32* null, i32 %indvars.iv + %add.ptr4 = getelementptr inbounds i32, i32* %add.ptr3, i32 1 + %0 = load i32, i32* %add.ptr4, align 4 + store i32 %0, i32* %add.ptr3, align 4 + %indvars.iv.next = add nsw i32 %indvars.iv, 1 + %exitcond = icmp ne i32 %indvars.iv.next, 6 + br i1 %exitcond, label %for.body, label %for.body.preheader +} + +; CHECK-LABEL: define void @test1( +; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* inttoptr (i64 4 to i8*), i64 24, i32 4, i1 false) +; CHECK-NOT: store diff --git a/test/Transforms/LoopIdiom/struct.ll b/test/Transforms/LoopIdiom/struct.ll new file mode 100644 index 000000000000..2828024952e2 --- /dev/null +++ b/test/Transforms/LoopIdiom/struct.ll @@ -0,0 +1,221 @@ +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +target triple = "x86_64-apple-darwin10.0.0" + +%struct.foo = type { i32, i32 } +%struct.foo1 = type { i32, i32, i32 } +%struct.foo2 = type { i32, i16, i16 } + +;void bar1(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 0; +; } +;} +define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar1( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void bar2(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].b = 0; +; f[i].a = 0; +; } +;} +define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar2( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void bar3(foo_t *f, unsigned n) { +; for (unsigned i = n; i > 0; --i) { +; f[i].a = 0; +; f[i].b = 0; +; } +;} +define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %0 = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %1 = trunc i64 %indvars.iv to i32 + %dec = add i32 %1, -1 + %cmp = icmp eq i32 %dec, 0 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + br i1 %cmp, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar3( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void bar4(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 1; +; } +;} +define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 1, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar4( +; CHECK-NOT: call void @llvm.memset +} + +;void bar5(foo1_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 0; +; } +;} +define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1 + store i32 0, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar5( +; CHECK-NOT: call void @llvm.memset +} + +;void bar6(foo2_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 0; +; f[i].c = 0; +; } +;} +define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 1 + store i16 0, i16* %b, align 4 + %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 2 + store i16 0, i16* %c, align 2 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar6( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} diff --git a/test/Transforms/LoopIdiom/struct_pattern.ll b/test/Transforms/LoopIdiom/struct_pattern.ll new file mode 100644 index 000000000000..d7809b746b15 --- /dev/null +++ b/test/Transforms/LoopIdiom/struct_pattern.ll @@ -0,0 +1,186 @@ +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 +; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 +; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 + +target triple = "x86_64-apple-darwin10.0.0" + +%struct.foo = type { i32, i32 } +%struct.foo1 = type { i32, i32, i32 } + +;void bar1(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 2; +; f[i].b = 2; +; } +;} +define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 2, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 2, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar1( +; CHECK: call void @memset_pattern16 +; CHECK-NOT: store +} + +;void bar2(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].b = 2; +; f[i].a = 2; +; } +;} +define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 2, i32* %b, align 4 + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 2, i32* %a, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar2( +; CHECK: call void @memset_pattern16 +; CHECK-NOT: store +} + +;void bar3(foo_t *f, unsigned n) { +; for (unsigned i = n; i > 0; --i) { +; f[i].a = 2; +; f[i].b = 2; +; } +;} +define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %0 = zext i32 %n to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 2, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 2, i32* %b, align 4 + %1 = trunc i64 %indvars.iv to i32 + %dec = add i32 %1, -1 + %cmp = icmp eq i32 %dec, 0 + %indvars.iv.next = add nsw i64 %indvars.iv, -1 + br i1 %cmp, label %for.end.loopexit, label %for.body + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar3( +; CHECK: call void @memset_pattern16 +; CHECK-NOT: store +} + +;void bar4(foo_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 0; +; f[i].b = 1; +; } +;} +define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0 + store i32 0, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1 + store i32 1, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar4( +; CHECK-NOT: call void @memset_pattern16 +} + +;void bar5(foo1_t *f, unsigned n) { +; for (unsigned i = 0; i < n; ++i) { +; f[i].a = 1; +; f[i].b = 1; +; } +;} +define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp { +entry: + %cmp1 = icmp eq i32 %n, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0 + store i32 1, i32* %a, align 4 + %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1 + store i32 1, i32* %b, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 + %lftr.wideiv = trunc i64 %indvars.iv.next to i32 + %exitcond = icmp ne i32 %lftr.wideiv, %n + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @bar5( +; CHECK-NOT: call void @memset_pattern16 +} diff --git a/test/Transforms/LoopIdiom/unroll.ll b/test/Transforms/LoopIdiom/unroll.ll new file mode 100644 index 000000000000..0cdfda254d78 --- /dev/null +++ b/test/Transforms/LoopIdiom/unroll.ll @@ -0,0 +1,80 @@ +; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64" + +; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16 + +target triple = "x86_64-apple-darwin10.0.0" + +;void test(int *f, unsigned n) { +; for (unsigned i = 0; i < 2 * n; i += 2) { +; f[i] = 0; +; f[i+1] = 0; +; } +;} +define void @test(i32* %f, i32 %n) nounwind ssp { +entry: + %mul = shl i32 %n, 1 + %cmp1 = icmp eq i32 %mul, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %0 = zext i32 %mul to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv + store i32 0, i32* %arrayidx, align 4 + %1 = or i64 %indvars.iv, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1 + store i32 0, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 + %cmp = icmp ult i64 %indvars.iv.next, %0 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @test( +; CHECK: call void @llvm.memset +; CHECK-NOT: store +} + +;void test_pattern(int *f, unsigned n) { +; for (unsigned i = 0; i < 2 * n; i += 2) { +; f[i] = 2; +; f[i+1] = 2; +; } +;} +define void @test_pattern(i32* %f, i32 %n) nounwind ssp { +entry: + %mul = shl i32 %n, 1 + %cmp1 = icmp eq i32 %mul, 0 + br i1 %cmp1, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + %0 = zext i32 %mul to i64 + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv + store i32 2, i32* %arrayidx, align 4 + %1 = or i64 %indvars.iv, 1 + %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1 + store i32 2, i32* %arrayidx2, align 4 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2 + %cmp = icmp ult i64 %indvars.iv.next, %0 + br i1 %cmp, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +; CHECK-LABEL: @test_pattern( +; CHECK: call void @memset_pattern16 +; CHECK-NOT: store +} diff --git a/test/Transforms/LoopIdiom/unwind.ll b/test/Transforms/LoopIdiom/unwind.ll new file mode 100644 index 000000000000..a132cba164bd --- /dev/null +++ b/test/Transforms/LoopIdiom/unwind.ll @@ -0,0 +1,33 @@ +; RUN: opt -loop-idiom < %s -S | FileCheck %s +target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" +target triple = "x86_64-unknown-linux-gnu" + +declare void @ff() + +define void @test(i8* noalias nocapture %base, i64 %size) #1 { +entry: + %cmp3 = icmp eq i64 %size, 0 + br i1 %cmp3, label %for.end, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.body: ; preds = %for.body.preheader, %for.body +; CHECK-LABEL: @test( +; CHECK-NOT: llvm.memset + %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ] + tail call void @ff() + %arrayidx = getelementptr inbounds i8, i8* %base, i64 %indvars.iv + store i8 0, i8* %arrayidx, align 1 + %indvars.iv.next = add i64 %indvars.iv, 1 + %exitcond = icmp ne i64 %indvars.iv.next, %size + br i1 %exitcond, label %for.body, label %for.end.loopexit + +for.end.loopexit: ; preds = %for.body + br label %for.end + +for.end: ; preds = %for.end.loopexit, %entry + ret void +} + +attributes #1 = { uwtable } |
