aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms/LoopIdiom
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
commit01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree4def12e759965de927d963ac65840d663ef9d1ea /test/Transforms/LoopIdiom
parentf0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)
Vendor import of llvm release_39 branch r276489:vendor/llvm/llvm-release_39-r276489
Diffstat (limited to 'test/Transforms/LoopIdiom')
-rw-r--r--test/Transforms/LoopIdiom/AMDGPU/popcnt.ll25
-rw-r--r--test/Transforms/LoopIdiom/basic.ll37
-rw-r--r--test/Transforms/LoopIdiom/debug-line.ll6
-rw-r--r--test/Transforms/LoopIdiom/nontemporal_store.ll32
-rw-r--r--test/Transforms/LoopIdiom/pr28196.ll26
-rw-r--r--test/Transforms/LoopIdiom/struct.ll221
-rw-r--r--test/Transforms/LoopIdiom/struct_pattern.ll186
-rw-r--r--test/Transforms/LoopIdiom/unroll.ll80
-rw-r--r--test/Transforms/LoopIdiom/unwind.ll33
9 files changed, 642 insertions, 4 deletions
diff --git a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
index e4301bbb06d3..e594c79a3e17 100644
--- a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
+++ b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s
+; RUN: opt -loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
; Mostly copied from x86 version.
@@ -59,6 +59,29 @@ while.end: ; preds = %while.body, %entry
ret i32 %c.0.lcssa
}
+; CHECK-LABEL: @popcount_i128
+; CHECK: entry
+; CHECK: llvm.ctpop.i128
+; CHECK: ret
+define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i128 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i128 %a.addr.04, -1
+ %and = and i128 %sub, %a.addr.04
+ %tobool = icmp eq i128 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
; To recognize this pattern:
;int popcount(unsigned long long a, int mydata1, int mydata2) {
; int c = 0;
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index 27a955175b59..4d584de9c6f7 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -531,3 +531,40 @@ for.cond.cleanup: ; preds = %for.body
; CHECK: call void @llvm.memcpy
; CHECK: ret
}
+
+; Two dimensional nested loop with negative stride should be promoted to one big memset.
+define void @test19(i8* nocapture %X) {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc4
+ %i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ]
+ %mul = mul nsw i32 %i.06, 100
+ br label %for.body3
+
+for.body3: ; preds = %for.cond1.preheader, %for.body3
+ %j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ]
+ %add = add nsw i32 %j.05, %mul
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %dec = add nsw i32 %j.05, -1
+ %cmp2 = icmp sgt i32 %j.05, 0
+ br i1 %cmp2, label %for.body3, label %for.inc4
+
+for.inc4: ; preds = %for.body3
+ %dec5 = add nsw i32 %i.06, -1
+ %cmp = icmp sgt i32 %i.06, 0
+ br i1 %cmp, label %for.cond1.preheader, label %for.end6
+
+for.end6: ; preds = %for.inc4
+ ret void
+; CHECK-LABEL: @test19(
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
+; CHECK: ret void
+}
+
+; Validate that "memset_pattern" has the proper attributes.
+; CHECK: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]]
+; CHECK: [[ATTRS]] = { argmemonly }
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index a85e48997548..a6a4af4e8d4e 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -28,11 +28,11 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
!llvm.module.flags = !{!19}
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
-!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !18, scope: !1, type: !3)
!1 = !DIFile(filename: "li.c", directory: "/private/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: FullDebug, file: !18, enums: !9, retainedTypes: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
diff --git a/test/Transforms/LoopIdiom/nontemporal_store.ll b/test/Transforms/LoopIdiom/nontemporal_store.ll
new file mode 100644
index 000000000000..a5f8c7c451c7
--- /dev/null
+++ b/test/Transforms/LoopIdiom/nontemporal_store.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(loop-idiom)' < %s -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.bigBlock_t = type { [256 x <4 x float>] }
+
+; CHECK-LABEL: @test(
+; CHECK-NOT: llvm.memset
+define void @test(%struct.bigBlock_t* %p) {
+entry:
+ %0 = getelementptr inbounds %struct.bigBlock_t, %struct.bigBlock_t* %p, i64 0, i32 0, i64 0, i64 0
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %index.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %dst.01 = phi float* [ %0, %entry ], [ %add.ptr2, %for.body ]
+ %cast.i5 = bitcast float* %dst.01 to <4 x float>*
+ store <4 x float> zeroinitializer, <4 x float>* %cast.i5, align 16, !nontemporal !0
+ %add.ptr1 = getelementptr inbounds float, float* %dst.01, i64 4
+ %cast.i = bitcast float* %add.ptr1 to <4 x float>*
+ store <4 x float> zeroinitializer, <4 x float>* %cast.i, align 16, !nontemporal !0
+ %add.ptr2 = getelementptr inbounds float, float* %dst.01, i64 8
+ %add = add nuw nsw i32 %index.02, 32
+ %cmp = icmp ult i32 %add, 4096
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/LoopIdiom/pr28196.ll b/test/Transforms/LoopIdiom/pr28196.ll
new file mode 100644
index 000000000000..10f49fbcd09f
--- /dev/null
+++ b/test/Transforms/LoopIdiom/pr28196.ll
@@ -0,0 +1,26 @@
+; RUN: opt -loop-idiom -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+ br label %for.body.preheader
+
+for.body.preheader: ; preds = %for.cond
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %add.ptr3 = getelementptr inbounds i32, i32* null, i32 %indvars.iv
+ %add.ptr4 = getelementptr inbounds i32, i32* %add.ptr3, i32 1
+ %0 = load i32, i32* %add.ptr4, align 4
+ store i32 %0, i32* %add.ptr3, align 4
+ %indvars.iv.next = add nsw i32 %indvars.iv, 1
+ %exitcond = icmp ne i32 %indvars.iv.next, 6
+ br i1 %exitcond, label %for.body, label %for.body.preheader
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* inttoptr (i64 4 to i8*), i64 24, i32 4, i1 false)
+; CHECK-NOT: store
diff --git a/test/Transforms/LoopIdiom/struct.ll b/test/Transforms/LoopIdiom/struct.ll
new file mode 100644
index 000000000000..2828024952e2
--- /dev/null
+++ b/test/Transforms/LoopIdiom/struct.ll
@@ -0,0 +1,221 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.foo = type { i32, i32 }
+%struct.foo1 = type { i32, i32, i32 }
+%struct.foo2 = type { i32, i16, i16 }
+
+;void bar1(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar1(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar2(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].b = 0;
+; f[i].a = 0;
+; }
+;}
+define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar2(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar3(foo_t *f, unsigned n) {
+; for (unsigned i = n; i > 0; --i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %n to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %1 = trunc i64 %indvars.iv to i32
+ %dec = add i32 %1, -1
+ %cmp = icmp eq i32 %dec, 0
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar3(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar4(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 1;
+; }
+;}
+define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar4(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar5(foo1_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar5(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar6(foo2_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; f[i].c = 0;
+; }
+;}
+define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 1
+ store i16 0, i16* %b, align 4
+ %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 2
+ store i16 0, i16* %c, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar6(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
diff --git a/test/Transforms/LoopIdiom/struct_pattern.ll b/test/Transforms/LoopIdiom/struct_pattern.ll
new file mode 100644
index 000000000000..d7809b746b15
--- /dev/null
+++ b/test/Transforms/LoopIdiom/struct_pattern.ll
@@ -0,0 +1,186 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.foo = type { i32, i32 }
+%struct.foo1 = type { i32, i32, i32 }
+
+;void bar1(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 2;
+; f[i].b = 2;
+; }
+;}
+define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar1(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar2(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].b = 2;
+; f[i].a = 2;
+; }
+;}
+define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar2(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar3(foo_t *f, unsigned n) {
+; for (unsigned i = n; i > 0; --i) {
+; f[i].a = 2;
+; f[i].b = 2;
+; }
+;}
+define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %n to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %1 = trunc i64 %indvars.iv to i32
+ %dec = add i32 %1, -1
+ %cmp = icmp eq i32 %dec, 0
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar3(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar4(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 1;
+; }
+;}
+define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar4(
+; CHECK-NOT: call void @memset_pattern16
+}
+
+;void bar5(foo1_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 1;
+; f[i].b = 1;
+; }
+;}
+define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
+ store i32 1, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar5(
+; CHECK-NOT: call void @memset_pattern16
+}
diff --git a/test/Transforms/LoopIdiom/unroll.ll b/test/Transforms/LoopIdiom/unroll.ll
new file mode 100644
index 000000000000..0cdfda254d78
--- /dev/null
+++ b/test/Transforms/LoopIdiom/unroll.ll
@@ -0,0 +1,80 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+;void test(int *f, unsigned n) {
+; for (unsigned i = 0; i < 2 * n; i += 2) {
+; f[i] = 0;
+; f[i+1] = 0;
+; }
+;}
+define void @test(i32* %f, i32 %n) nounwind ssp {
+entry:
+ %mul = shl i32 %n, 1
+ %cmp1 = icmp eq i32 %mul, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %mul to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 0, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
+ store i32 0, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %0
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @test(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void test_pattern(int *f, unsigned n) {
+; for (unsigned i = 0; i < 2 * n; i += 2) {
+; f[i] = 2;
+; f[i+1] = 2;
+; }
+;}
+define void @test_pattern(i32* %f, i32 %n) nounwind ssp {
+entry:
+ %mul = shl i32 %n, 1
+ %cmp1 = icmp eq i32 %mul, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %mul to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 2, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
+ store i32 2, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %0
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @test_pattern(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
diff --git a/test/Transforms/LoopIdiom/unwind.ll b/test/Transforms/LoopIdiom/unwind.ll
new file mode 100644
index 000000000000..a132cba164bd
--- /dev/null
+++ b/test/Transforms/LoopIdiom/unwind.ll
@@ -0,0 +1,33 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @ff()
+
+define void @test(i8* noalias nocapture %base, i64 %size) #1 {
+entry:
+ %cmp3 = icmp eq i64 %size, 0
+ br i1 %cmp3, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+; CHECK-LABEL: @test(
+; CHECK-NOT: llvm.memset
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ tail call void @ff()
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %indvars.iv
+ store i8 0, i8* %arrayidx, align 1
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, %size
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+attributes #1 = { uwtable }