aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2012-12-02 13:10:19 +0000
committerDimitry Andric <dim@FreeBSD.org>2012-12-02 13:10:19 +0000
commit522600a229b950314b5f4af84eba4f3e8a0ffea1 (patch)
tree32b4679ab4b8f28e5228daafc65e9dc436935353 /test/Transforms
parent902a7b529820e6a0aa85f98f21afaeb1805a22f8 (diff)
downloadsrc-522600a229b950314b5f4af84eba4f3e8a0ffea1.tar.gz
src-522600a229b950314b5f4af84eba4f3e8a0ffea1.zip
Vendor import of llvm release_32 branch r168974 (effectively, 3.2 RC2):vendor/llvm/llvm-release_32-r168974
Notes
Notes: svn path=/vendor/llvm/dist/; revision=243789 svn path=/vendor/llvm/llvm-release_32-r168974/; revision=243790; tag=vendor/llvm/llvm-release_32-r168974
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/BBVectorize/X86/cmp-types.ll16
-rw-r--r--test/Transforms/BBVectorize/X86/loop1.ll53
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec.ll54
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec2.ll85
-rw-r--r--test/Transforms/BBVectorize/X86/sh-rec3.ll170
-rw-r--r--test/Transforms/BBVectorize/X86/sh-types.ll25
-rw-r--r--test/Transforms/BBVectorize/X86/simple-ldstr.ll29
-rw-r--r--test/Transforms/BBVectorize/X86/simple.ll103
-rw-r--r--test/Transforms/BBVectorize/X86/vs-cast.ll12
-rw-r--r--test/Transforms/BBVectorize/cycle.ll2
-rw-r--r--test/Transforms/BBVectorize/lit.local.cfg5
-rw-r--r--test/Transforms/BBVectorize/loop1.ll2
-rw-r--r--test/Transforms/BBVectorize/search-limit.ll2
-rw-r--r--test/Transforms/BBVectorize/simple-int.ll6
-rw-r--r--test/Transforms/BBVectorize/simple-ldstr-ptrs.ll53
-rw-r--r--test/Transforms/BBVectorize/simple-ldstr.ll64
-rw-r--r--test/Transforms/BBVectorize/simple-sel.ll4
-rw-r--r--test/Transforms/BBVectorize/simple.ll66
-rw-r--r--test/Transforms/ConstProp/loads.ll132
-rw-r--r--test/Transforms/CorrelatedValuePropagation/crash.ll25
-rw-r--r--test/Transforms/DeadArgElim/dbginfo.ll64
-rw-r--r--test/Transforms/DeadStoreElimination/libcalls.ll70
-rw-r--r--test/Transforms/DeadStoreElimination/simple.ll14
-rw-r--r--test/Transforms/EarlyCSE/commute.ll66
-rw-r--r--test/Transforms/GVN/crash.ll36
-rw-r--r--test/Transforms/GVN/malloc-load-removal.ll31
-rw-r--r--test/Transforms/GVN/pr14166.ll27
-rw-r--r--test/Transforms/GVN/rle.ll8
-rw-r--r--test/Transforms/GlobalOpt/blockaddress.ll20
-rw-r--r--test/Transforms/GlobalOpt/load-store-global.ll25
-rw-r--r--test/Transforms/GlobalOpt/tls.ll53
-rw-r--r--test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll4
-rw-r--r--test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll27
-rw-r--r--test/Transforms/IndVarSimplify/crash.ll44
-rw-r--r--test/Transforms/IndVarSimplify/eliminate-comparison.ll103
-rw-r--r--test/Transforms/IndVarSimplify/no-iv-rewrite.ll1
-rw-r--r--test/Transforms/IndVarSimplify/verify-scev.ll421
-rw-r--r--test/Transforms/Inline/recursive.ll38
-rw-r--r--test/Transforms/InstCombine/2012-07-25-LoadPart.ll10
-rw-r--r--test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll57
-rw-r--r--test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll24
-rw-r--r--test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll19
-rw-r--r--test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll51
-rw-r--r--test/Transforms/InstCombine/align-addr.ll16
-rw-r--r--test/Transforms/InstCombine/alloca.ll16
-rw-r--r--test/Transforms/InstCombine/and-fcmp.ll2
-rw-r--r--test/Transforms/InstCombine/cast.ll206
-rw-r--r--test/Transforms/InstCombine/disable-simplify-libcalls.ll236
-rw-r--r--test/Transforms/InstCombine/div-shift.ll14
-rw-r--r--test/Transforms/InstCombine/fcmp.ll93
-rw-r--r--test/Transforms/InstCombine/fold-vector-select.ll153
-rw-r--r--test/Transforms/InstCombine/icmp.ll18
-rw-r--r--test/Transforms/InstCombine/memcmp-1.ll72
-rw-r--r--test/Transforms/InstCombine/memcmp-2.ll17
-rw-r--r--test/Transforms/InstCombine/memcpy-1.ll17
-rw-r--r--test/Transforms/InstCombine/memcpy-2.ll17
-rw-r--r--test/Transforms/InstCombine/memcpy-from-global.ll (renamed from test/Transforms/ScalarRepl/memcpy-from-global.ll)30
-rw-r--r--test/Transforms/InstCombine/memcpy_chk-1.ll60
-rw-r--r--test/Transforms/InstCombine/memcpy_chk-2.ll24
-rw-r--r--test/Transforms/InstCombine/memmove-1.ll17
-rw-r--r--test/Transforms/InstCombine/memmove-2.ll17
-rw-r--r--test/Transforms/InstCombine/memmove_chk-1.ll60
-rw-r--r--test/Transforms/InstCombine/memmove_chk-2.ll24
-rw-r--r--test/Transforms/InstCombine/memset-1.ll17
-rw-r--r--test/Transforms/InstCombine/memset-2.ll17
-rw-r--r--test/Transforms/InstCombine/memset_chk-1.ll61
-rw-r--r--test/Transforms/InstCombine/memset_chk-2.ll20
-rw-r--r--test/Transforms/InstCombine/memset_chk.ll18
-rw-r--r--test/Transforms/InstCombine/obfuscated_splat.ll11
-rw-r--r--test/Transforms/InstCombine/objsize.ll3
-rw-r--r--test/Transforms/InstCombine/select.ll34
-rw-r--r--test/Transforms/InstCombine/stpcpy-1.ll46
-rw-r--r--test/Transforms/InstCombine/stpcpy-2.ll22
-rw-r--r--test/Transforms/InstCombine/stpcpy_chk-1.ll96
-rw-r--r--test/Transforms/InstCombine/stpcpy_chk-2.ll21
-rw-r--r--test/Transforms/InstCombine/strcat-1.ll38
-rw-r--r--test/Transforms/InstCombine/strcat-2.ll32
-rw-r--r--test/Transforms/InstCombine/strcat-3.ll22
-rw-r--r--test/Transforms/InstCombine/strchr-1.ll54
-rw-r--r--test/Transforms/InstCombine/strchr-2.ll21
-rw-r--r--test/Transforms/InstCombine/strcmp-1.ll82
-rw-r--r--test/Transforms/InstCombine/strcmp-2.ll20
-rw-r--r--test/Transforms/InstCombine/strcpy-1.ll45
-rw-r--r--test/Transforms/InstCombine/strcpy-2.ll22
-rw-r--r--test/Transforms/InstCombine/strcpy_chk-1.ll94
-rw-r--r--test/Transforms/InstCombine/strcpy_chk-2.ll21
-rw-r--r--test/Transforms/InstCombine/strcpy_chk.ll13
-rw-r--r--test/Transforms/InstCombine/strcspn-1.ll57
-rw-r--r--test/Transforms/InstCombine/strcspn-2.ll21
-rw-r--r--test/Transforms/InstCombine/strlen-1.ll97
-rw-r--r--test/Transforms/InstCombine/strlen-2.ll18
-rw-r--r--test/Transforms/InstCombine/strncat-1.ll37
-rw-r--r--test/Transforms/InstCombine/strncat-2.ll53
-rw-r--r--test/Transforms/InstCombine/strncat-3.ll22
-rw-r--r--test/Transforms/InstCombine/strncmp-1.ll99
-rw-r--r--test/Transforms/InstCombine/strncmp-2.ll20
-rw-r--r--test/Transforms/InstCombine/strncpy-1.ll95
-rw-r--r--test/Transforms/InstCombine/strncpy-2.ll22
-rw-r--r--test/Transforms/InstCombine/strncpy_chk-1.ll66
-rw-r--r--test/Transforms/InstCombine/strncpy_chk-2.ll21
-rw-r--r--test/Transforms/InstCombine/strpbrk-1.ll68
-rw-r--r--test/Transforms/InstCombine/strpbrk-2.ll23
-rw-r--r--test/Transforms/InstCombine/strrchr-1.ll54
-rw-r--r--test/Transforms/InstCombine/strrchr-2.ll21
-rw-r--r--test/Transforms/InstCombine/strspn-1.ll56
-rw-r--r--test/Transforms/InstCombine/strstr-1.ll65
-rw-r--r--test/Transforms/InstCombine/strstr-2.ll18
-rw-r--r--test/Transforms/InstCombine/strto-1.ll82
-rw-r--r--test/Transforms/InstCombine/struct-assign-tbaa.ll44
-rw-r--r--test/Transforms/InstCombine/udiv-simplify-bug-1.ll4
-rw-r--r--test/Transforms/InstCombine/vec_demanded_elts.ll2
-rw-r--r--test/Transforms/InstCombine/vec_shuffle.ll43
-rw-r--r--test/Transforms/InstCombine/vector_gep2.ll11
-rw-r--r--test/Transforms/InstCombine/weak-symbols.ll33
-rw-r--r--test/Transforms/InstSimplify/compare.ll9
-rw-r--r--test/Transforms/Internalize/2008-05-09-AllButMain.ll58
-rw-r--r--test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll2
-rw-r--r--test/Transforms/JumpThreading/crash.ll53
-rw-r--r--test/Transforms/JumpThreading/select.ll36
-rw-r--r--test/Transforms/LICM/2003-12-11-SinkingToPHI.ll2
-rw-r--r--test/Transforms/LICM/hoisting.ll28
-rw-r--r--test/Transforms/LoopIdiom/basic.ll33
-rw-r--r--test/Transforms/LoopIdiom/crash.ll25
-rw-r--r--test/Transforms/LoopIdiom/non-canonical-loop.ll34
-rw-r--r--test/Transforms/LoopIdiom/scev-invalidation.ll74
-rw-r--r--test/Transforms/LoopRotate/multiple-exits.ll236
-rw-r--r--test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll44
-rw-r--r--test/Transforms/LoopUnroll/pr11361.ll2
-rw-r--r--test/Transforms/LoopUnroll/pr14167.ll44
-rw-r--r--test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll2
-rw-r--r--test/Transforms/LoopVectorize/2012-10-20-infloop.ll27
-rw-r--r--test/Transforms/LoopVectorize/2012-10-22-isconsec.ll57
-rw-r--r--test/Transforms/LoopVectorize/X86/avx1.ll49
-rw-r--r--test/Transforms/LoopVectorize/X86/conversion-cost.ll48
-rw-r--r--test/Transforms/LoopVectorize/X86/cost-model.ll38
-rw-r--r--test/Transforms/LoopVectorize/X86/gcc-examples.ll62
-rw-r--r--test/Transforms/LoopVectorize/X86/lit.local.cfg6
-rw-r--r--test/Transforms/LoopVectorize/cpp-new-array.ll46
-rw-r--r--test/Transforms/LoopVectorize/flags.ll53
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll650
-rw-r--r--test/Transforms/LoopVectorize/increment.ll66
-rw-r--r--test/Transforms/LoopVectorize/induction_plus.ll30
-rw-r--r--test/Transforms/LoopVectorize/lit.local.cfg1
-rw-r--r--test/Transforms/LoopVectorize/non-const-n.ll38
-rw-r--r--test/Transforms/LoopVectorize/read-only.ll32
-rw-r--r--test/Transforms/LoopVectorize/reduction.ll232
-rw-r--r--test/Transforms/LoopVectorize/runtime-check.ll36
-rw-r--r--test/Transforms/LoopVectorize/scalar-select.ll37
-rw-r--r--test/Transforms/LoopVectorize/small-loop.ll33
-rw-r--r--test/Transforms/LoopVectorize/start-non-zero.ll35
-rw-r--r--test/Transforms/LoopVectorize/write-only.ll26
-rw-r--r--test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll4
-rw-r--r--test/Transforms/MemCpyOpt/align.ll24
-rw-r--r--test/Transforms/MemCpyOpt/form-memset.ll24
-rw-r--r--test/Transforms/MetaRenamer/lit.local.cfg1
-rw-r--r--test/Transforms/MetaRenamer/metarenamer.ll96
-rw-r--r--test/Transforms/ObjCARC/basic.ll4
-rw-r--r--test/Transforms/ObjCARC/nested.ll85
-rw-r--r--test/Transforms/ObjCARC/path-overflow.ll329
-rw-r--r--test/Transforms/PhaseOrdering/gdce.ll106
-rw-r--r--test/Transforms/Reassociate/crash.ll28
-rw-r--r--test/Transforms/SCCP/loadtest.ll5
-rw-r--r--test/Transforms/SROA/alignment.ll171
-rw-r--r--test/Transforms/SROA/basictest.ll1136
-rw-r--r--test/Transforms/SROA/big-endian.ll119
-rw-r--r--test/Transforms/SROA/fca.ll49
-rw-r--r--test/Transforms/SROA/lit.local.cfg1
-rw-r--r--test/Transforms/SROA/phi-and-select.ll427
-rw-r--r--test/Transforms/SROA/vector-promotion.ll267
-rw-r--r--test/Transforms/SimplifyCFG/SPARC/lit.local.cfg6
-rw-r--r--test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll32
-rw-r--r--test/Transforms/SimplifyCFG/X86/lit.local.cfg6
-rw-r--r--test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll779
-rw-r--r--test/Transforms/SimplifyCFG/phi-undef-loadstore.ll28
-rw-r--r--test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll37
-rw-r--r--test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll140
-rw-r--r--test/Transforms/SimplifyCFG/preserve-branchweights.ll230
-rw-r--r--test/Transforms/SimplifyCFG/sink-common-code.ll53
-rw-r--r--test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll14
-rw-r--r--test/Transforms/SimplifyLibCalls/FFS.ll15
-rw-r--r--test/Transforms/SimplifyLibCalls/StpCpy.ll43
-rw-r--r--test/Transforms/SimplifyLibCalls/StrCat.ll33
-rw-r--r--test/Transforms/SimplifyLibCalls/StrChr.ll26
-rw-r--r--test/Transforms/SimplifyLibCalls/StrCmp.ll65
-rw-r--r--test/Transforms/SimplifyLibCalls/StrCpy.ll37
-rw-r--r--test/Transforms/SimplifyLibCalls/StrLen.ll62
-rw-r--r--test/Transforms/SimplifyLibCalls/StrNCat.ll31
-rw-r--r--test/Transforms/SimplifyLibCalls/StrNCmp.ll78
-rw-r--r--test/Transforms/SimplifyLibCalls/StrNCpy.ll29
-rw-r--r--test/Transforms/SimplifyLibCalls/StrPBrk.ll25
-rw-r--r--test/Transforms/SimplifyLibCalls/StrRChr.ll23
-rw-r--r--test/Transforms/SimplifyLibCalls/StrSpn.ll41
-rw-r--r--test/Transforms/SimplifyLibCalls/StrStr.ll60
-rw-r--r--test/Transforms/SimplifyLibCalls/double-float-shrink.ll333
-rw-r--r--test/Transforms/SimplifyLibCalls/float-shrink-compare.ll179
-rw-r--r--test/Transforms/SimplifyLibCalls/floor.ll23
-rw-r--r--test/Transforms/SimplifyLibCalls/memcmp.ll35
-rw-r--r--test/Transforms/SimplifyLibCalls/memmove.ll12
-rw-r--r--test/Transforms/SimplifyLibCalls/memset-64.ll12
-rw-r--r--test/Transforms/SimplifyLibCalls/memset.ll12
-rw-r--r--test/Transforms/SimplifyLibCalls/weak-symbols.ll26
201 files changed, 12058 insertions, 839 deletions
diff --git a/test/Transforms/BBVectorize/X86/cmp-types.ll b/test/Transforms/BBVectorize/X86/cmp-types.ll
new file mode 100644
index 000000000000..a4fcbb6048f5
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/cmp-types.ll
@@ -0,0 +1,16 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+%"struct.btSoftBody" = type { float, float, float*, i8 }
+
+define void @test1(%"struct.btSoftBody"* %n1, %"struct.btSoftBody"* %n2) uwtable align 2 {
+entry:
+ %tobool15 = icmp ne %"struct.btSoftBody"* %n1, null
+ %cond16 = zext i1 %tobool15 to i32
+ %tobool21 = icmp ne %"struct.btSoftBody"* %n2, null
+ %cond22 = zext i1 %tobool21 to i32
+ ret void
+; CHECK: @test1
+}
+
diff --git a/test/Transforms/BBVectorize/X86/loop1.ll b/test/Transforms/BBVectorize/X86/loop1.ll
new file mode 100644
index 000000000000..493f23b09853
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/loop1.ll
@@ -0,0 +1,53 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -basicaa -loop-unroll -unroll-threshold=45 -unroll-allow-partial -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-UNRL
+; The second check covers the use of alias analysis (with loop unrolling).
+
+define void @test1(double* noalias %out, double* noalias %in1, double* noalias %in2) nounwind uwtable {
+entry:
+ br label %for.body
+; CHECK: @test1
+; CHECK-UNRL: @test1
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double* %in1, i64 %indvars.iv
+ %0 = load double* %arrayidx, align 8
+ %arrayidx2 = getelementptr inbounds double* %in2, i64 %indvars.iv
+ %1 = load double* %arrayidx2, align 8
+ %mul = fmul double %0, %0
+ %mul3 = fmul double %0, %1
+ %add = fadd double %mul, %mul3
+ %add4 = fadd double %1, %1
+ %add5 = fadd double %add4, %0
+ %mul6 = fmul double %0, %add5
+ %add7 = fadd double %add, %mul6
+ %mul8 = fmul double %1, %1
+ %add9 = fadd double %0, %0
+ %add10 = fadd double %add9, %0
+ %mul11 = fmul double %mul8, %add10
+ %add12 = fadd double %add7, %mul11
+ %arrayidx14 = getelementptr inbounds double* %out, i64 %indvars.iv
+ store double %add12, double* %arrayidx14, align 8
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 10
+ br i1 %exitcond, label %for.end, label %for.body
+; CHECK-NOT: <2 x double>
+; CHECK-UNRL: %mul = fmul <2 x double> %2, %2
+; CHECK-UNRL: %mul3 = fmul <2 x double> %2, %3
+; CHECK-UNRL: %add = fadd <2 x double> %mul, %mul3
+; CHECK-UNRL: %add4 = fadd <2 x double> %3, %3
+; CHECK-UNRL: %add5 = fadd <2 x double> %add4, %2
+; CHECK-UNRL: %mul6 = fmul <2 x double> %2, %add5
+; CHECK-UNRL: %add7 = fadd <2 x double> %add, %mul6
+; CHECK-UNRL: %mul8 = fmul <2 x double> %3, %3
+; CHECK-UNRL: %add9 = fadd <2 x double> %2, %2
+; CHECK-UNRL: %add10 = fadd <2 x double> %add9, %2
+; CHECK-UNRL: %mul11 = fmul <2 x double> %mul8, %add10
+; CHECK-UNRL: %add12 = fadd <2 x double> %add7, %mul11
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/BBVectorize/X86/sh-rec.ll b/test/Transforms/BBVectorize/X86/sh-rec.ll
new file mode 100644
index 000000000000..1e0492c2a8c2
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-rec.ll
@@ -0,0 +1,54 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+define void @ptoa() nounwind uwtable {
+entry:
+ %call = call i8* @malloc() nounwind
+ br i1 undef, label %return, label %if.end10
+
+if.end10: ; preds = %entry
+ %incdec.ptr = getelementptr inbounds i8* %call, i64 undef
+ %call17 = call i32 @ptou() nounwind
+ %incdec.ptr26.1 = getelementptr inbounds i8* %incdec.ptr, i64 -2
+ store i8 undef, i8* %incdec.ptr26.1, align 1
+ %div27.1 = udiv i32 %call17, 100
+ %rem.2 = urem i32 %div27.1, 10
+ %add2230.2 = or i32 %rem.2, 48
+ %conv25.2 = trunc i32 %add2230.2 to i8
+ %incdec.ptr26.2 = getelementptr inbounds i8* %incdec.ptr, i64 -3
+ store i8 %conv25.2, i8* %incdec.ptr26.2, align 1
+ %incdec.ptr26.3 = getelementptr inbounds i8* %incdec.ptr, i64 -4
+ store i8 undef, i8* %incdec.ptr26.3, align 1
+ %div27.3 = udiv i32 %call17, 10000
+ %rem.4 = urem i32 %div27.3, 10
+ %add2230.4 = or i32 %rem.4, 48
+ %conv25.4 = trunc i32 %add2230.4 to i8
+ %incdec.ptr26.4 = getelementptr inbounds i8* %incdec.ptr, i64 -5
+ store i8 %conv25.4, i8* %incdec.ptr26.4, align 1
+ %div27.4 = udiv i32 %call17, 100000
+ %rem.5 = urem i32 %div27.4, 10
+ %add2230.5 = or i32 %rem.5, 48
+ %conv25.5 = trunc i32 %add2230.5 to i8
+ %incdec.ptr26.5 = getelementptr inbounds i8* %incdec.ptr, i64 -6
+ store i8 %conv25.5, i8* %incdec.ptr26.5, align 1
+ %incdec.ptr26.6 = getelementptr inbounds i8* %incdec.ptr, i64 -7
+ store i8 0, i8* %incdec.ptr26.6, align 1
+ %incdec.ptr26.7 = getelementptr inbounds i8* %incdec.ptr, i64 -8
+ store i8 undef, i8* %incdec.ptr26.7, align 1
+ %div27.7 = udiv i32 %call17, 100000000
+ %rem.8 = urem i32 %div27.7, 10
+ %add2230.8 = or i32 %rem.8, 48
+ %conv25.8 = trunc i32 %add2230.8 to i8
+ %incdec.ptr26.8 = getelementptr inbounds i8* %incdec.ptr, i64 -9
+ store i8 %conv25.8, i8* %incdec.ptr26.8, align 1
+ unreachable
+
+return: ; preds = %entry
+ ret void
+; CHECK: @ptoa
+}
+
+declare noalias i8* @malloc() nounwind
+
+declare i32 @ptou()
diff --git a/test/Transforms/BBVectorize/X86/sh-rec2.ll b/test/Transforms/BBVectorize/X86/sh-rec2.ll
new file mode 100644
index 000000000000..ef2239932fa1
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-rec2.ll
@@ -0,0 +1,85 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
+
+define void @gsm_encode(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i8* %c) nounwind uwtable {
+entry:
+ %xmc = alloca [52 x i16], align 16
+ %arraydecay5 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 0
+ call void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352* %s, i16* %source, i16* undef, i16* null, i16* undef, i16* undef, i16* undef, i16* %arraydecay5) nounwind
+ %incdec.ptr136 = getelementptr inbounds i8* %c, i64 10
+ %incdec.ptr157 = getelementptr inbounds i8* %c, i64 11
+ store i8 0, i8* %incdec.ptr136, align 1
+ %arrayidx162 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 11
+ %0 = load i16* %arrayidx162, align 2
+ %conv1631 = trunc i16 %0 to i8
+ %and164 = shl i8 %conv1631, 3
+ %shl165 = and i8 %and164, 56
+ %incdec.ptr172 = getelementptr inbounds i8* %c, i64 12
+ store i8 %shl165, i8* %incdec.ptr157, align 1
+ %1 = load i16* inttoptr (i64 2 to i16*), align 2
+ %conv1742 = trunc i16 %1 to i8
+ %and175 = shl i8 %conv1742, 1
+ %incdec.ptr183 = getelementptr inbounds i8* %c, i64 13
+ store i8 %and175, i8* %incdec.ptr172, align 1
+ %incdec.ptr199 = getelementptr inbounds i8* %c, i64 14
+ store i8 0, i8* %incdec.ptr183, align 1
+ %arrayidx214 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 15
+ %incdec.ptr220 = getelementptr inbounds i8* %c, i64 15
+ store i8 0, i8* %incdec.ptr199, align 1
+ %2 = load i16* %arrayidx214, align 2
+ %conv2223 = trunc i16 %2 to i8
+ %and223 = shl i8 %conv2223, 6
+ %incdec.ptr235 = getelementptr inbounds i8* %c, i64 16
+ store i8 %and223, i8* %incdec.ptr220, align 1
+ %arrayidx240 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 19
+ %3 = load i16* %arrayidx240, align 2
+ %conv2414 = trunc i16 %3 to i8
+ %and242 = shl i8 %conv2414, 2
+ %shl243 = and i8 %and242, 28
+ %incdec.ptr251 = getelementptr inbounds i8* %c, i64 17
+ store i8 %shl243, i8* %incdec.ptr235, align 1
+ %incdec.ptr272 = getelementptr inbounds i8* %c, i64 18
+ store i8 0, i8* %incdec.ptr251, align 1
+ %arrayidx282 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 25
+ %4 = load i16* %arrayidx282, align 2
+ %conv2835 = trunc i16 %4 to i8
+ %and284 = and i8 %conv2835, 7
+ %incdec.ptr287 = getelementptr inbounds i8* %c, i64 19
+ store i8 %and284, i8* %incdec.ptr272, align 1
+ %incdec.ptr298 = getelementptr inbounds i8* %c, i64 20
+ store i8 0, i8* %incdec.ptr287, align 1
+ %incdec.ptr314 = getelementptr inbounds i8* %c, i64 21
+ store i8 0, i8* %incdec.ptr298, align 1
+ %arrayidx319 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 26
+ %5 = load i16* %arrayidx319, align 4
+ %conv3206 = trunc i16 %5 to i8
+ %and321 = shl i8 %conv3206, 4
+ %shl322 = and i8 %and321, 112
+ %incdec.ptr335 = getelementptr inbounds i8* %c, i64 22
+ store i8 %shl322, i8* %incdec.ptr314, align 1
+ %arrayidx340 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 29
+ %6 = load i16* %arrayidx340, align 2
+ %conv3417 = trunc i16 %6 to i8
+ %and342 = shl i8 %conv3417, 3
+ %shl343 = and i8 %and342, 56
+ %incdec.ptr350 = getelementptr inbounds i8* %c, i64 23
+ store i8 %shl343, i8* %incdec.ptr335, align 1
+ %incdec.ptr366 = getelementptr inbounds i8* %c, i64 24
+ store i8 0, i8* %incdec.ptr350, align 1
+ %arrayidx381 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 36
+ %incdec.ptr387 = getelementptr inbounds i8* %c, i64 25
+ store i8 0, i8* %incdec.ptr366, align 1
+ %7 = load i16* %arrayidx381, align 8
+ %conv3898 = trunc i16 %7 to i8
+ %and390 = shl i8 %conv3898, 6
+ store i8 %and390, i8* %incdec.ptr387, align 1
+ unreachable
+; CHECK: @gsm_encode
+}
+
+declare void @Gsm_Coder(%struct.gsm_state.2.8.14.15.16.17.19.22.23.25.26.28.29.31.32.33.35.36.37.38.40.41.42.44.45.47.48.50.52.53.54.56.57.58.59.60.61.62.63.66.73.83.84.89.90.91.92.93.94.95.96.99.100.101.102.103.104.106.107.114.116.121.122.129.130.135.136.137.138.139.140.141.142.143.144.147.148.149.158.159.160.161.164.165.166.167.168.169.172.179.181.182.183.188.195.200.201.202.203.204.205.208.209.210.212.213.214.215.222.223.225.226.230.231.232.233.234.235.236.237.238.239.240.241.242.243.244.352*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
+
+declare void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/BBVectorize/X86/sh-rec3.ll b/test/Transforms/BBVectorize/X86/sh-rec3.ll
new file mode 100644
index 000000000000..fd2cc8bdd91c
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-rec3.ll
@@ -0,0 +1,170 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -basicaa -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565 = type { [280 x i16], i16, i64, i32, [8 x i16], [2 x [8 x i16]], i16, i16, [9 x i16], i16, i8, i8 }
+
+define void @gsm_encode(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i8* %c) nounwind uwtable {
+entry:
+ %LARc28 = alloca [2 x i64], align 16
+ %LARc28.sub = getelementptr inbounds [2 x i64]* %LARc28, i64 0, i64 0
+ %tmpcast = bitcast [2 x i64]* %LARc28 to [8 x i16]*
+ %Nc = alloca [4 x i16], align 2
+ %Mc = alloca [4 x i16], align 2
+ %bc = alloca [4 x i16], align 2
+ %xmc = alloca [52 x i16], align 16
+ %arraydecay = bitcast [2 x i64]* %LARc28 to i16*
+ %arraydecay1 = getelementptr inbounds [4 x i16]* %Nc, i64 0, i64 0
+ %arraydecay2 = getelementptr inbounds [4 x i16]* %bc, i64 0, i64 0
+ %arraydecay3 = getelementptr inbounds [4 x i16]* %Mc, i64 0, i64 0
+ %arraydecay5 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 0
+ call void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565* %s, i16* %source, i16* %arraydecay, i16* %arraydecay1, i16* %arraydecay2, i16* %arraydecay3, i16* undef, i16* %arraydecay5) nounwind
+ %0 = load i64* %LARc28.sub, align 16
+ %1 = trunc i64 %0 to i32
+ %conv1 = lshr i32 %1, 2
+ %and = and i32 %conv1, 15
+ %or = or i32 %and, 208
+ %conv6 = trunc i32 %or to i8
+ %incdec.ptr = getelementptr inbounds i8* %c, i64 1
+ store i8 %conv6, i8* %c, align 1
+ %conv84 = trunc i64 %0 to i8
+ %and9 = shl i8 %conv84, 6
+ %incdec.ptr15 = getelementptr inbounds i8* %c, i64 2
+ store i8 %and9, i8* %incdec.ptr, align 1
+ %2 = lshr i64 %0, 50
+ %shr226.tr = trunc i64 %2 to i8
+ %conv25 = and i8 %shr226.tr, 7
+ %incdec.ptr26 = getelementptr inbounds i8* %c, i64 3
+ store i8 %conv25, i8* %incdec.ptr15, align 1
+ %incdec.ptr42 = getelementptr inbounds i8* %c, i64 4
+ store i8 0, i8* %incdec.ptr26, align 1
+ %arrayidx52 = getelementptr inbounds [8 x i16]* %tmpcast, i64 0, i64 7
+ %3 = load i16* %arrayidx52, align 2
+ %conv537 = trunc i16 %3 to i8
+ %and54 = and i8 %conv537, 7
+ %incdec.ptr57 = getelementptr inbounds i8* %c, i64 5
+ store i8 %and54, i8* %incdec.ptr42, align 1
+ %incdec.ptr68 = getelementptr inbounds i8* %c, i64 6
+ store i8 0, i8* %incdec.ptr57, align 1
+ %4 = load i16* %arraydecay3, align 2
+ %conv748 = trunc i16 %4 to i8
+ %and75 = shl i8 %conv748, 5
+ %shl76 = and i8 %and75, 96
+ %incdec.ptr84 = getelementptr inbounds i8* %c, i64 7
+ store i8 %shl76, i8* %incdec.ptr68, align 1
+ %arrayidx94 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 1
+ %5 = load i16* %arrayidx94, align 2
+ %conv959 = trunc i16 %5 to i8
+ %and96 = shl i8 %conv959, 1
+ %shl97 = and i8 %and96, 14
+ %or103 = or i8 %shl97, 1
+ %incdec.ptr105 = getelementptr inbounds i8* %c, i64 8
+ store i8 %or103, i8* %incdec.ptr84, align 1
+ %arrayidx115 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 4
+ %6 = bitcast i16* %arrayidx115 to i32*
+ %7 = load i32* %6, align 8
+ %conv11610 = trunc i32 %7 to i8
+ %and117 = and i8 %conv11610, 7
+ %incdec.ptr120 = getelementptr inbounds i8* %c, i64 9
+ store i8 %and117, i8* %incdec.ptr105, align 1
+ %8 = lshr i32 %7, 16
+ %and12330 = shl nuw nsw i32 %8, 5
+ %and123 = trunc i32 %and12330 to i8
+ %incdec.ptr136 = getelementptr inbounds i8* %c, i64 10
+ store i8 %and123, i8* %incdec.ptr120, align 1
+ %incdec.ptr157 = getelementptr inbounds i8* %c, i64 11
+ store i8 0, i8* %incdec.ptr136, align 1
+ %incdec.ptr172 = getelementptr inbounds i8* %c, i64 12
+ store i8 0, i8* %incdec.ptr157, align 1
+ %arrayidx173 = getelementptr inbounds [4 x i16]* %Nc, i64 0, i64 1
+ %9 = load i16* %arrayidx173, align 2
+ %conv17412 = zext i16 %9 to i32
+ %and175 = shl nuw nsw i32 %conv17412, 1
+ %arrayidx177 = getelementptr inbounds [4 x i16]* %bc, i64 0, i64 1
+ %10 = load i16* %arrayidx177, align 2
+ %conv17826 = zext i16 %10 to i32
+ %shr17913 = lshr i32 %conv17826, 1
+ %and180 = and i32 %shr17913, 1
+ %or181 = or i32 %and175, %and180
+ %conv182 = trunc i32 %or181 to i8
+ %incdec.ptr183 = getelementptr inbounds i8* %c, i64 13
+ store i8 %conv182, i8* %incdec.ptr172, align 1
+ %arrayidx188 = getelementptr inbounds [4 x i16]* %Mc, i64 0, i64 1
+ %11 = load i16* %arrayidx188, align 2
+ %conv18914 = trunc i16 %11 to i8
+ %and190 = shl i8 %conv18914, 5
+ %shl191 = and i8 %and190, 96
+ %incdec.ptr199 = getelementptr inbounds i8* %c, i64 14
+ store i8 %shl191, i8* %incdec.ptr183, align 1
+ %arrayidx209 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 14
+ %12 = load i16* %arrayidx209, align 4
+ %conv21015 = trunc i16 %12 to i8
+ %and211 = shl i8 %conv21015, 1
+ %shl212 = and i8 %and211, 14
+ %or218 = or i8 %shl212, 1
+ %incdec.ptr220 = getelementptr inbounds i8* %c, i64 15
+ store i8 %or218, i8* %incdec.ptr199, align 1
+ %arrayidx225 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 16
+ %13 = bitcast i16* %arrayidx225 to i64*
+ %14 = load i64* %13, align 16
+ %conv22616 = trunc i64 %14 to i8
+ %and227 = shl i8 %conv22616, 3
+ %shl228 = and i8 %and227, 56
+ %incdec.ptr235 = getelementptr inbounds i8* %c, i64 16
+ store i8 %shl228, i8* %incdec.ptr220, align 1
+ %15 = lshr i64 %14, 32
+ %and23832 = shl nuw nsw i64 %15, 5
+ %and238 = trunc i64 %and23832 to i8
+ %incdec.ptr251 = getelementptr inbounds i8* %c, i64 17
+ store i8 %and238, i8* %incdec.ptr235, align 1
+ %arrayidx266 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 23
+ %incdec.ptr272 = getelementptr inbounds i8* %c, i64 18
+ store i8 0, i8* %incdec.ptr251, align 1
+ %16 = load i16* %arrayidx266, align 2
+ %conv27418 = trunc i16 %16 to i8
+ %and275 = shl i8 %conv27418, 6
+ %incdec.ptr287 = getelementptr inbounds i8* %c, i64 19
+ store i8 %and275, i8* %incdec.ptr272, align 1
+ %arrayidx288 = getelementptr inbounds [4 x i16]* %Nc, i64 0, i64 2
+ %17 = load i16* %arrayidx288, align 2
+ %conv28919 = zext i16 %17 to i32
+ %and290 = shl nuw nsw i32 %conv28919, 1
+ %arrayidx292 = getelementptr inbounds [4 x i16]* %bc, i64 0, i64 2
+ %18 = load i16* %arrayidx292, align 2
+ %conv29327 = zext i16 %18 to i32
+ %shr29420 = lshr i32 %conv29327, 1
+ %and295 = and i32 %shr29420, 1
+ %or296 = or i32 %and290, %and295
+ %conv297 = trunc i32 %or296 to i8
+ %incdec.ptr298 = getelementptr inbounds i8* %c, i64 20
+ store i8 %conv297, i8* %incdec.ptr287, align 1
+ %conv30021 = trunc i16 %18 to i8
+ %and301 = shl i8 %conv30021, 7
+ %incdec.ptr314 = getelementptr inbounds i8* %c, i64 21
+ store i8 %and301, i8* %incdec.ptr298, align 1
+ %incdec.ptr335 = getelementptr inbounds i8* %c, i64 22
+ store i8 0, i8* %incdec.ptr314, align 1
+ %arrayidx340 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 29
+ %19 = load i16* %arrayidx340, align 2
+ %conv34122 = trunc i16 %19 to i8
+ %and342 = shl i8 %conv34122, 3
+ %shl343 = and i8 %and342, 56
+ %incdec.ptr350 = getelementptr inbounds i8* %c, i64 23
+ store i8 %shl343, i8* %incdec.ptr335, align 1
+ %arrayidx355 = getelementptr inbounds [52 x i16]* %xmc, i64 0, i64 32
+ %20 = bitcast i16* %arrayidx355 to i32*
+ %21 = load i32* %20, align 16
+ %conv35623 = shl i32 %21, 2
+ %shl358 = and i32 %conv35623, 28
+ %22 = lshr i32 %21, 17
+ %and363 = and i32 %22, 3
+ %or364 = or i32 %shl358, %and363
+ %conv365 = trunc i32 %or364 to i8
+ store i8 %conv365, i8* %incdec.ptr350, align 1
+ unreachable
+; CHECK: @gsm_encode
+}
+
+declare void @Gsm_Coder(%struct.gsm_state.2.8.39.44.45.55.56.57.58.59.62.63.64.65.74.75.76.77.80.87.92.93.94.95.96.97.110.111.112.113.114.128.130.135.136.137.138.139.140.141.142.143.144.145.148.149.150.151.152.169.170.177.178.179.184.185.186.187.188.201.208.209.219.220.221.223.224.225.230.231.232.233.235.236.237.238.245.246.248.249.272.274.279.280.281.282.283.286.293.298.299.314.315.316.317.318.319.320.321.322.323.324.325.326.327.328.329.330.331.332.333.334.335.336.337.338.339.340.341.342.343.344.345.346.347.348.349.350.351.352.353.565*, i16*, i16*, i16*, i16*, i16*, i16*, i16*)
+
+declare void @llvm.trap() noreturn nounwind
diff --git a/test/Transforms/BBVectorize/X86/sh-types.ll b/test/Transforms/BBVectorize/X86/sh-types.ll
new file mode 100644
index 000000000000..0bcb714d5e65
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/sh-types.ll
@@ -0,0 +1,25 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+define <4 x float> @test7(<4 x float> %A1, <4 x float> %B1, double %C1, double %C2, double %D1, double %D2) {
+ %A2 = shufflevector <4 x float> %A1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+ %B2 = shufflevector <4 x float> %B1, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
+ %X1 = shufflevector <4 x float> %A2, <4 x float> undef, <2 x i32> <i32 0, i32 1>
+ %X2 = shufflevector <4 x float> %B2, <4 x float> undef, <2 x i32> <i32 2, i32 3>
+ %Y1 = shufflevector <2 x float> %X1, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+ %Y2 = shufflevector <2 x float> %X2, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
+
+ %M1 = fsub double %C1, %D1
+ %M2 = fsub double %C2, %D2
+ %N1 = fmul double %M1, %C1
+ %N2 = fmul double %M2, %C2
+ %Z1 = fadd double %N1, %D1
+ %Z2 = fadd double %N2, %D2
+
+ %R = fmul <4 x float> %Y1, %Y2
+ ret <4 x float> %R
+; CHECK: @test7
+; CHECK-NOT: <8 x float>
+; CHECK: ret <4 x float>
+}
+
diff --git a/test/Transforms/BBVectorize/X86/simple-ldstr.ll b/test/Transforms/BBVectorize/X86/simple-ldstr.ll
new file mode 100644
index 000000000000..0124399bad9d
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/simple-ldstr.ll
@@ -0,0 +1,29 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Simple 3-pair chain with loads and stores
+define void @test1(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+ %i0 = load double* %a, align 8
+ %i1 = load double* %b, align 8
+ %mul = fmul double %i0, %i1
+ %arrayidx3 = getelementptr inbounds double* %a, i64 1
+ %i3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %b, i64 1
+ %i4 = load double* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ store double %mul, double* %c, align 8
+ %arrayidx5 = getelementptr inbounds double* %c, i64 1
+ store double %mul5, double* %arrayidx5, align 8
+ ret void
+; CHECK: @test1
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 8
+; CHECK: ret void
+}
+
diff --git a/test/Transforms/BBVectorize/X86/simple.ll b/test/Transforms/BBVectorize/X86/simple.ll
new file mode 100644
index 000000000000..0113e38bb1c9
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/simple.ll
@@ -0,0 +1,103 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
+
+; Basic depth-3 chain
+define double @test1(double %A1, double %A2, double %B1, double %B2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = fmul double %X1, %A1
+ %Y2 = fmul double %X2, %A2
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test1
+; CHECK-NOT: fmul <2 x double>
+; CHECK: ret double %R
+}
+
+; Basic chain
+define double @test1a(double %A1, double %A2, double %B1, double %B2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = fmul double %X1, %A1
+ %Y2 = fmul double %X2, %A2
+ %Z1 = fadd double %Y1, %B1
+ %Z2 = fadd double %Y2, %B2
+ %W1 = fadd double %Y1, %Z1
+ %W2 = fadd double %Y2, %Z2
+ %V1 = fadd double %W1, %Z1
+ %V2 = fadd double %W2, %Z2
+ %Q1 = fadd double %W1, %V1
+ %Q2 = fadd double %W2, %V2
+ %S1 = fadd double %W1, %Q1
+ %S2 = fadd double %W2, %Q2
+ %R = fmul double %S1, %S2
+ ret double %R
+; CHECK: @test1a
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+; CHECK: %W1 = fadd <2 x double> %Y1, %Z1
+; CHECK: %V1 = fadd <2 x double> %W1, %Z1
+; CHECK: %Q1 = fadd <2 x double> %W1, %V1
+; CHECK: %S1 = fadd <2 x double> %W1, %Q1
+; CHECK: %S1.v.r1 = extractelement <2 x double> %S1, i32 0
+; CHECK: %S1.v.r2 = extractelement <2 x double> %S1, i32 1
+; CHECK: %R = fmul double %S1.v.r1, %S1.v.r2
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (last pair permuted)
+define double @test2(double %A1, double %A2, double %B1, double %B2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = fmul double %X1, %A1
+ %Y2 = fmul double %X2, %A2
+ %Z1 = fadd double %Y2, %B1
+ %Z2 = fadd double %Y1, %B2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test2
+; CHECK-NOT: fmul <2 x double>
+; CHECK: ret double %R
+}
+
+; Basic depth-4 chain (internal permutation)
+define double @test4(double %A1, double %A2, double %B1, double %B2) {
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+ %Y1 = fmul double %X1, %A1
+ %Y2 = fmul double %X2, %A2
+ %Z1 = fadd double %Y2, %B1
+ %Z2 = fadd double %Y1, %B2
+ %W1 = fadd double %Y2, %Z1
+ %W2 = fadd double %Y1, %Z2
+ %R = fmul double %Z1, %Z2
+ ret double %R
+; CHECK: @test4
+; CHECK-NOT: fmul <2 x double>
+; CHECK: ret double %R
+}
+
+; Basic chain with shuffles
+define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
+ %X1 = sub <8 x i8> %A1, %B1
+ %X2 = sub <8 x i8> %A2, %B2
+ %Y1 = mul <8 x i8> %X1, %A1
+ %Y2 = mul <8 x i8> %X2, %A2
+ %Z1 = add <8 x i8> %Y1, %B1
+ %Z2 = add <8 x i8> %Y2, %B2
+ %Q1 = shufflevector <8 x i8> %Z1, <8 x i8> %Z2, <8 x i32> <i32 15, i32 8, i32 6, i32 1, i32 13, i32 10, i32 4, i32 3>
+ %Q2 = shufflevector <8 x i8> %Z2, <8 x i8> %Z2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 2, i32 4, i32 4, i32 1>
+ %R = mul <8 x i8> %Q1, %Q2
+ ret <8 x i8> %R
+; CHECK: @test6
+; CHECK-NOT: sub <16 x i8>
+; CHECK: ret <8 x i8>
+}
+
diff --git a/test/Transforms/BBVectorize/X86/vs-cast.ll b/test/Transforms/BBVectorize/X86/vs-cast.ll
new file mode 100644
index 000000000000..be3efca925b8
--- /dev/null
+++ b/test/Transforms/BBVectorize/X86/vs-cast.ll
@@ -0,0 +1,12 @@
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+; RUN: opt < %s -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7 -bb-vectorize -S | FileCheck %s
+
+define void @main() nounwind uwtable {
+entry:
+ %0 = bitcast <2 x i64> undef to i128
+ %1 = bitcast <2 x i64> undef to i128
+ ret void
+; CHECK: @main
+}
+
diff --git a/test/Transforms/BBVectorize/cycle.ll b/test/Transforms/BBVectorize/cycle.ll
index 32a91ceee007..e8e82ce02479 100644
--- a/test/Transforms/BBVectorize/cycle.ll
+++ b/test/Transforms/BBVectorize/cycle.ll
@@ -107,6 +107,6 @@ done:
ret void
; CHECK: @test1
; CHECK: go:
-; CHECK-NEXT: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
+; CHECK: %conv.v.i0.1 = insertelement <2 x i32> undef, i32 %n.0, i32 0
; FIXME: When tree pruning is deterministic, include the entire output.
}
diff --git a/test/Transforms/BBVectorize/lit.local.cfg b/test/Transforms/BBVectorize/lit.local.cfg
index 19eebc0ac7ac..a8ad0f1a28b2 100644
--- a/test/Transforms/BBVectorize/lit.local.cfg
+++ b/test/Transforms/BBVectorize/lit.local.cfg
@@ -1 +1,6 @@
config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/BBVectorize/loop1.ll b/test/Transforms/BBVectorize/loop1.ll
index bebc91ad91a0..c22ea5852a1b 100644
--- a/test/Transforms/BBVectorize/loop1.ll
+++ b/test/Transforms/BBVectorize/loop1.ll
@@ -42,8 +42,8 @@ for.body: ; preds = %for.body, %entry
; CHECK: %mul = fmul double %0, %0
; CHECK: %mul3 = fmul double %0, %1
; CHECK: %add = fadd double %mul, %mul3
-; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
; CHECK: %mul8 = fmul double %1, %1
+; CHECK: %add4.v.i1.1 = insertelement <2 x double> undef, double %1, i32 0
; CHECK: %add4.v.i1.2 = insertelement <2 x double> %add4.v.i1.1, double %0, i32 1
; CHECK: %add4 = fadd <2 x double> %add4.v.i1.2, %add4.v.i1.2
; CHECK: %add5.v.i1.1 = insertelement <2 x double> undef, double %0, i32 0
diff --git a/test/Transforms/BBVectorize/search-limit.ll b/test/Transforms/BBVectorize/search-limit.ll
index d9945b563077..aeaf98865bc9 100644
--- a/test/Transforms/BBVectorize/search-limit.ll
+++ b/test/Transforms/BBVectorize/search-limit.ll
@@ -7,8 +7,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
; CHECK-SL4: @test1
; CHECK-SL4-NOT: <2 x double>
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
diff --git a/test/Transforms/BBVectorize/simple-int.ll b/test/Transforms/BBVectorize/simple-int.ll
index 68449771436e..ae1d63bfd852 100644
--- a/test/Transforms/BBVectorize/simple-int.ll
+++ b/test/Transforms/BBVectorize/simple-int.ll
@@ -17,8 +17,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2, double %C1,
ret double %R
; CHECK: @test1
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
; CHECK: %Y1.v.i2.1 = insertelement <2 x double> undef, double %C1, i32 0
@@ -43,8 +43,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
ret double %R
; CHECK: @test2
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
; CHECK: %Y1 = call <2 x double> @llvm.cos.v2f64(<2 x double> %X1)
@@ -68,8 +68,8 @@ define double @test3(double %A1, double %A2, double %B1, double %B2, i32 %P) {
ret double %R
; CHECK: @test3
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
; CHECK: %Y1 = call <2 x double> @llvm.powi.v2f64(<2 x double> %X1, i32 %P)
diff --git a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
index f992d4154779..d46f7692b6d3 100644
--- a/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr-ptrs.ll
@@ -2,6 +2,9 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -instcombine -gvn -S | FileCheck %s
; RUN: opt < %s -bb-vectorize -bb-vectorize-req-chain-depth=3 -bb-vectorize-aligned-only -instcombine -gvn -S | FileCheck %s -check-prefix=CHECK-AO
+; FIXME: re-enable this once pointer vectors work properly
+; XFAIL: *
+
; Simple 3-pair chain also with loads and stores (using ptrs and gep)
define double @test1(i64* %a, i64* %b, i64* %c) nounwind uwtable readonly {
entry:
@@ -79,3 +82,53 @@ entry:
; CHECK-AO-NOT: <2 x
}
+; Simple 3-pair chain with loads and stores (using ptrs and gep)
+; using pointer vectors.
+define void @test3(<2 x i64*>* %a, <2 x i64*>* %b, <2 x i64*>* %c) nounwind uwtable readonly {
+entry:
+ %i0 = load <2 x i64*>* %a, align 8
+ %i1 = load <2 x i64*>* %b, align 8
+ %arrayidx3 = getelementptr inbounds <2 x i64*>* %a, i64 1
+ %i3 = load <2 x i64*>* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds <2 x i64*>* %b, i64 1
+ %i4 = load <2 x i64*>* %arrayidx4, align 8
+ %j1 = extractelement <2 x i64*> %i1, i32 0
+ %j4 = extractelement <2 x i64*> %i4, i32 0
+ %o1 = load i64* %j1, align 8
+ %o4 = load i64* %j4, align 8
+ %j0 = extractelement <2 x i64*> %i0, i32 0
+ %j3 = extractelement <2 x i64*> %i3, i32 0
+ %ptr0 = getelementptr inbounds i64* %j0, i64 %o1
+ %ptr3 = getelementptr inbounds i64* %j3, i64 %o4
+ %qtr0 = insertelement <2 x i64*> undef, i64* %ptr0, i32 0
+ %rtr0 = insertelement <2 x i64*> %qtr0, i64* %ptr0, i32 1
+ %qtr3 = insertelement <2 x i64*> undef, i64* %ptr3, i32 0
+ %rtr3 = insertelement <2 x i64*> %qtr3, i64* %ptr3, i32 1
+ store <2 x i64*> %rtr0, <2 x i64*>* %c, align 8
+ %arrayidx5 = getelementptr inbounds <2 x i64*>* %c, i64 1
+ store <2 x i64*> %rtr3, <2 x i64*>* %arrayidx5, align 8
+ ret void
+; CHECK: @test3
+; CHECK: %i0.v.i0 = bitcast <2 x i64*>* %a to <4 x i64*>*
+; CHECK: %i1 = load <2 x i64*>* %b, align 8
+; CHECK: %i0 = load <4 x i64*>* %i0.v.i0, align 8
+; CHECK: %arrayidx4 = getelementptr inbounds <2 x i64*>* %b, i64 1
+; CHECK: %i4 = load <2 x i64*>* %arrayidx4, align 8
+; CHECK: %j1 = extractelement <2 x i64*> %i1, i32 0
+; CHECK: %j4 = extractelement <2 x i64*> %i4, i32 0
+; CHECK: %o1 = load i64* %j1, align 8
+; CHECK: %o4 = load i64* %j4, align 8
+; CHECK: %ptr0.v.i1.1 = insertelement <2 x i64> undef, i64 %o1, i32 0
+; CHECK: %ptr0.v.i1.2 = insertelement <2 x i64> %ptr0.v.i1.1, i64 %o4, i32 1
+; CHECK: %ptr0.v.i0 = shufflevector <4 x i64*> %i0, <4 x i64*> undef, <2 x i32> <i32 0, i32 2>
+; CHECK: %ptr0 = getelementptr inbounds <2 x i64*> %ptr0.v.i0, <2 x i64> %ptr0.v.i1.2
+; CHECK: %rtr0 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> zeroinitializer
+; CHECK: %rtr3 = shufflevector <2 x i64*> %ptr0, <2 x i64*> undef, <2 x i32> <i32 1, i32 1>
+; CHECK: %0 = bitcast <2 x i64*>* %c to <4 x i64*>*
+; CHECK: %1 = shufflevector <2 x i64*> %rtr0, <2 x i64*> %rtr3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK: store <4 x i64*> %1, <4 x i64*>* %0, align 8
+; CHECK: ret void
+; CHECK-AO: @test3
+; CHECK-AO-NOT: <4 x
+}
+
diff --git a/test/Transforms/BBVectorize/simple-ldstr.ll b/test/Transforms/BBVectorize/simple-ldstr.ll
index a5397eeb1f96..7dd77c933f6d 100644
--- a/test/Transforms/BBVectorize/simple-ldstr.ll
+++ b/test/Transforms/BBVectorize/simple-ldstr.ll
@@ -94,13 +94,13 @@ entry:
; CHECK-AO: @test3
; CHECK-AO: %i0 = load double* %a, align 8
; CHECK-AO: %i1 = load double* %b, align 8
-; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
-; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
; CHECK-AO: %arrayidx3 = getelementptr inbounds double* %a, i64 1
; CHECK-AO: %i3 = load double* %arrayidx3, align 8
; CHECK-AO: %arrayidx4 = getelementptr inbounds double* %b, i64 1
; CHECK-AO: %i4 = load double* %arrayidx4, align 8
+; CHECK-AO: %mul.v.i1.1 = insertelement <2 x double> undef, double %i1, i32 0
; CHECK-AO: %mul.v.i1.2 = insertelement <2 x double> %mul.v.i1.1, double %i4, i32 1
+; CHECK-AO: %mul.v.i0.1 = insertelement <2 x double> undef, double %i0, i32 0
; CHECK-AO: %mul.v.i0.2 = insertelement <2 x double> %mul.v.i0.1, double %i3, i32 1
; CHECK-AO: %mul = fmul <2 x double> %mul.v.i0.2, %mul.v.i1.2
; CHECK-AO: %mulf = fptrunc <2 x double> %mul to <2 x float>
@@ -108,3 +108,63 @@ entry:
; CHECK-AO: store <2 x float> %mulf, <2 x float>* %0, align 8
; CHECK-AO: ret void
}
+
+; Simple 3-pair chain with loads and stores (unreachable)
+define void @test4(i1 %bool, double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+ br i1 %bool, label %if.then1, label %if.end
+
+if.then1:
+ unreachable
+ br label %if.then
+
+if.then:
+ %i0 = load double* %a, align 8
+ %i1 = load double* %b, align 8
+ %mul = fmul double %i0, %i1
+ %arrayidx3 = getelementptr inbounds double* %a, i64 1
+ %i3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %b, i64 1
+ %i4 = load double* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ store double %mul, double* %c, align 8
+ %arrayidx5 = getelementptr inbounds double* %c, i64 1
+ store double %mul5, double* %arrayidx5, align 8
+ br label %if.end
+
+if.end:
+ ret void
+; CHECK: @test4
+; CHECK-NOT: <2 x double>
+; CHECK-AO: @test4
+; CHECK-AO-NOT: <2 x double>
+}
+
+; Simple 3-pair chain with loads and stores
+define void @test5(double* %a, double* %b, double* %c) nounwind uwtable readonly {
+entry:
+ %i0 = load double* %a, align 8
+ %i1 = load double* %b, align 8
+ %mul = fmul double %i0, %i1
+ %arrayidx3 = getelementptr inbounds double* %a, i64 1
+ %i3 = load double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double* %b, i64 1
+ %i4 = load double* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ %arrayidx5 = getelementptr inbounds double* %c, i64 1
+ store double %mul5, double* %arrayidx5, align 8
+ store double %mul, double* %c, align 4
+ ret void
+; CHECK: @test5
+; CHECK: %i0.v.i0 = bitcast double* %a to <2 x double>*
+; CHECK: %i1.v.i0 = bitcast double* %b to <2 x double>*
+; CHECK: %i0 = load <2 x double>* %i0.v.i0, align 8
+; CHECK: %i1 = load <2 x double>* %i1.v.i0, align 8
+; CHECK: %mul = fmul <2 x double> %i0, %i1
+; CHECK: %0 = bitcast double* %c to <2 x double>*
+; CHECK: store <2 x double> %mul, <2 x double>* %0, align 4
+; CHECK: ret void
+; CHECK-AO: @test5
+; CHECK-AO-NOT: <2 x double>
+}
+
diff --git a/test/Transforms/BBVectorize/simple-sel.ll b/test/Transforms/BBVectorize/simple-sel.ll
index 325792a5dca1..15ecb597025a 100644
--- a/test/Transforms/BBVectorize/simple-sel.ll
+++ b/test/Transforms/BBVectorize/simple-sel.ll
@@ -6,8 +6,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define double @test1(double %A1, double %A2, double %B1, double %B2, i1 %C1, i1 %C2) {
; CHECK: @test1
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
@@ -33,8 +33,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK: @test2
; CHECK-NB: @test2
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
diff --git a/test/Transforms/BBVectorize/simple.ll b/test/Transforms/BBVectorize/simple.ll
index 88eb9c90f7ee..3527ae75b457 100644
--- a/test/Transforms/BBVectorize/simple.ll
+++ b/test/Transforms/BBVectorize/simple.ll
@@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
define double @test1(double %A1, double %A2, double %B1, double %B2) {
; CHECK: @test1
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
@@ -29,8 +29,8 @@ define double @test1(double %A1, double %A2, double %B1, double %B2) {
define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK: @test2
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
@@ -40,12 +40,13 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
%Z1 = fadd double %Y2, %B1
%Z2 = fadd double %Y1, %B2
-; CHECK: %Z1.v.i0 = shufflevector <2 x double> %Y1, <2 x double> undef, <2 x i32> <i32 1, i32 0>
-; CHECK: %Z1 = fadd <2 x double> %Z1.v.i0, %X1.v.i1.2
+; CHECK: %Z1.v.i1.1 = insertelement <2 x double> undef, double %B2, i32 0
+; CHECK: %Z1.v.i1.2 = insertelement <2 x double> %Z1.v.i1.1, double %B1, i32 1
+; CHECK: %Z2 = fadd <2 x double> %Y1, %Z1.v.i1.2
%R = fmul double %Z1, %Z2
-; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
-; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
-; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+; CHECK: %Z2.v.r1 = extractelement <2 x double> %Z2, i32 0
+; CHECK: %Z2.v.r2 = extractelement <2 x double> %Z2, i32 1
+; CHECK: %R = fmul double %Z2.v.r2, %Z2.v.r1
ret double %R
; CHECK: ret double %R
}
@@ -54,8 +55,8 @@ define double @test2(double %A1, double %A2, double %B1, double %B2) {
define double @test3(double %A1, double %A2, double %B1, double %B2) {
; CHECK: @test3
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
@@ -79,8 +80,8 @@ define double @test3(double %A1, double %A2, double %B1, double %B2) {
define double @test4(double %A1, double %A2, double %B1, double %B2) {
; CHECK: @test4
; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
-; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
%X1 = fsub double %A1, %B1
%X2 = fsub double %A2, %B2
@@ -148,4 +149,51 @@ define <8 x i8> @test6(<8 x i8> %A1, <8 x i8> %A2, <8 x i8> %B1, <8 x i8> %B2) {
; CHECK: ret <8 x i8> %R
}
+; Basic depth-3 chain (flipped order)
+define double @test7(double %A1, double %A2, double %B1, double %B2) {
+; CHECK: @test7
+; CHECK: %X1.v.i1.1 = insertelement <2 x double> undef, double %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x double> %X1.v.i1.1, double %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x double> undef, double %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x double> %X1.v.i0.1, double %A2, i32 1
+ %X1 = fsub double %A1, %B1
+ %X2 = fsub double %A2, %B2
+; CHECK: %X1 = fsub <2 x double> %X1.v.i0.2, %X1.v.i1.2
+ %Y1 = fmul double %X1, %A1
+ %Y2 = fmul double %X2, %A2
+; CHECK: %Y1 = fmul <2 x double> %X1, %X1.v.i0.2
+ %Z2 = fadd double %Y2, %B2
+ %Z1 = fadd double %Y1, %B1
+; CHECK: %Z1 = fadd <2 x double> %Y1, %X1.v.i1.2
+ %R = fmul double %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x double> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x double> %Z1, i32 1
+; CHECK: %R = fmul double %Z1.v.r1, %Z1.v.r2
+ ret double %R
+; CHECK: ret double %R
+}
+
+; Basic depth-3 chain (subclass data)
+define i64 @test8(i64 %A1, i64 %A2, i64 %B1, i64 %B2) {
+; CHECK: @test8
+; CHECK: %X1.v.i1.1 = insertelement <2 x i64> undef, i64 %B1, i32 0
+; CHECK: %X1.v.i1.2 = insertelement <2 x i64> %X1.v.i1.1, i64 %B2, i32 1
+; CHECK: %X1.v.i0.1 = insertelement <2 x i64> undef, i64 %A1, i32 0
+; CHECK: %X1.v.i0.2 = insertelement <2 x i64> %X1.v.i0.1, i64 %A2, i32 1
+ %X1 = sub nsw i64 %A1, %B1
+ %X2 = sub i64 %A2, %B2
+; CHECK: %X1 = sub <2 x i64> %X1.v.i0.2, %X1.v.i1.2
+ %Y1 = mul i64 %X1, %A1
+ %Y2 = mul i64 %X2, %A2
+; CHECK: %Y1 = mul <2 x i64> %X1, %X1.v.i0.2
+ %Z1 = add i64 %Y1, %B1
+ %Z2 = add i64 %Y2, %B2
+; CHECK: %Z1 = add <2 x i64> %Y1, %X1.v.i1.2
+ %R = mul i64 %Z1, %Z2
+; CHECK: %Z1.v.r1 = extractelement <2 x i64> %Z1, i32 0
+; CHECK: %Z1.v.r2 = extractelement <2 x i64> %Z1, i32 1
+; CHECK: %R = mul i64 %Z1.v.r1, %Z1.v.r2
+ ret i64 %R
+; CHECK: ret i64 %R
+}
diff --git a/test/Transforms/ConstProp/loads.ll b/test/Transforms/ConstProp/loads.ll
index 74d80aa18729..6794288a0ef2 100644
--- a/test/Transforms/ConstProp/loads.ll
+++ b/test/Transforms/ConstProp/loads.ll
@@ -1,17 +1,24 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+; RUN: opt < %s -default-data-layout="e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -default-data-layout="E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64" -instcombine -S | FileCheck %s --check-prefix=BE
+; {{ 0xDEADBEEF, 0xBA }, 0xCAFEBABE}
@g1 = constant {{i32,i8},i32} {{i32,i8} { i32 -559038737, i8 186 }, i32 -889275714 }
@g2 = constant double 1.0
+; { 0x7B, 0x06B1BFF8 }
@g3 = constant {i64, i64} { i64 123, i64 112312312 }
; Simple load
define i32 @test1() {
%r = load i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0)
ret i32 %r
-; CHECK: @test1
-; CHECK: ret i32 -559038737
+
+; 0xDEADBEEF
+; LE: @test1
+; LE: ret i32 -559038737
+
+; 0xDEADBEEF
+; BE: @test1
+; BE: ret i32 -559038737
}
; PR3152
@@ -20,8 +27,13 @@ define i16 @test2() {
%r = load i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*)
ret i16 %r
-; CHECK: @test2
-; CHECK: ret i16 -16657
+; 0xBEEF
+; LE: @test2
+; LE: ret i16 -16657
+
+; 0xDEAD
+; BE: @test2
+; BE: ret i16 -8531
}
; Load of second 16 bits of 32-bit value.
@@ -29,16 +41,27 @@ define i16 @test3() {
%r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 1)
ret i16 %r
-; CHECK: @test3
-; CHECK: ret i16 -8531
+; 0xDEAD
+; LE: @test3
+; LE: ret i16 -8531
+
+; 0xBEEF
+; BE: @test3
+; BE: ret i16 -16657
}
; Load of 8 bit field + tail padding.
define i16 @test4() {
%r = load i16* getelementptr(i16* bitcast(i32* getelementptr ({{i32,i8},i32}* @g1, i32 0, i32 0, i32 0) to i16*), i32 2)
ret i16 %r
-; CHECK: @test4
-; CHECK: ret i16 186
+
+; 0x00BA
+; LE: @test4
+; LE: ret i16 186
+
+; 0xBA00
+; BE: @test4
+; BE: ret i16 -17920
}
; Load of double bits.
@@ -46,8 +69,13 @@ define i64 @test6() {
%r = load i64* bitcast(double* @g2 to i64*)
ret i64 %r
-; CHECK: @test6
-; CHECK: ret i64 4607182418800017408
+; 0x3FF_0000000000000
+; LE: @test6
+; LE: ret i64 4607182418800017408
+
+; 0x3FF_0000000000000
+; BE: @test6
+; BE: ret i64 4607182418800017408
}
; Load of double bits.
@@ -55,8 +83,13 @@ define i16 @test7() {
%r = load i16* bitcast(double* @g2 to i16*)
ret i16 %r
-; CHECK: @test7
-; CHECK: ret i16 0
+; 0x0000
+; LE: @test7
+; LE: ret i16 0
+
+; 0x3FF0
+; BE: @test7
+; BE: ret i16 16368
}
; Double load.
@@ -64,8 +97,11 @@ define double @test8() {
%r = load double* bitcast({{i32,i8},i32}* @g1 to double*)
ret double %r
-; CHECK: @test8
-; CHECK: ret double 0xBADEADBEEF
+; LE: @test8
+; LE: ret double 0xBADEADBEEF
+
+; BE: @test8
+; BE: ret double 0xDEADBEEFBA000000
}
@@ -74,8 +110,13 @@ define i128 @test9() {
%r = load i128* bitcast({i64, i64}* @g3 to i128*)
ret i128 %r
-; CHECK: @test9
-; CHECK: ret i128 2071796475790618158476296315
+; 0x00000000_06B1BFF8_00000000_0000007B
+; LE: @test9
+; LE: ret i128 2071796475790618158476296315
+
+; 0x00000000_0000007B_00000000_06B1BFF8
+; BE: @test9
+; BE: ret i128 2268949521066387161080
}
; vector load.
@@ -83,21 +124,30 @@ define <2 x i64> @test10() {
%r = load <2 x i64>* bitcast({i64, i64}* @g3 to <2 x i64>*)
ret <2 x i64> %r
-; CHECK: @test10
-; CHECK: ret <2 x i64> <i64 123, i64 112312312>
+; LE: @test10
+; LE: ret <2 x i64> <i64 123, i64 112312312>
+
+; BE: @test10
+; BE: ret <2 x i64> <i64 123, i64 112312312>
}
; PR5287
+; { 0xA1, 0x08 }
@g4 = internal constant { i8, i8 } { i8 -95, i8 8 }
define i16 @test11() nounwind {
entry:
%a = load i16* bitcast ({ i8, i8 }* @g4 to i16*)
ret i16 %a
-
-; CHECK: @test11
-; CHECK: ret i16 2209
+
+; 0x08A1
+; LE: @test11
+; LE: ret i16 2209
+
+; 0xA108
+; BE: @test11
+; BE: ret i16 -24312
}
@@ -107,8 +157,14 @@ entry:
define i16 @test12() {
%a = load i16* getelementptr inbounds ([3 x i16]* bitcast ([6 x i8]* @test12g to [3 x i16]*), i32 0, i64 1)
ret i16 %a
-; CHECK: @test12
-; CHECK: ret i16 98
+
+; 0x0062
+; LE: @test12
+; LE: ret i16 98
+
+; 0x6200
+; BE: @test12
+; BE: ret i16 25088
}
@@ -117,8 +173,12 @@ define i16 @test12() {
define i1 @test13() {
%A = load i1* bitcast (i8* @g5 to i1*)
ret i1 %A
-; CHECK: @test13
-; CHECK: ret i1 false
+
+; LE: @test13
+; LE: ret i1 false
+
+; BE: @test13
+; BE: ret i1 false
}
@g6 = constant [2 x i8*] [i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*)]
@@ -126,14 +186,22 @@ define i64 @test14() nounwind {
entry:
%tmp = load i64* bitcast ([2 x i8*]* @g6 to i64*)
ret i64 %tmp
-; CHECK: @test14
-; CHECK: ret i64 1
+
+; LE: @test14
+; LE: ret i64 1
+
+; BE: @test14
+; BE: ret i64 1
}
define i64 @test15() nounwind {
entry:
%tmp = load i64* bitcast (i8** getelementptr inbounds ([2 x i8*]* @g6, i32 0, i64 1) to i64*)
ret i64 %tmp
-; CHECK: @test15
-; CHECK: ret i64 2
+
+; LE: @test15
+; LE: ret i64 2
+
+; BE: @test15
+; BE: ret i64 2
}
diff --git a/test/Transforms/CorrelatedValuePropagation/crash.ll b/test/Transforms/CorrelatedValuePropagation/crash.ll
index 80c43d0f1da5..9723d18252a7 100644
--- a/test/Transforms/CorrelatedValuePropagation/crash.ll
+++ b/test/Transforms/CorrelatedValuePropagation/crash.ll
@@ -35,3 +35,28 @@ srf.exit.i:
func_29.exit:
ret void
}
+
+; PR13972
+define void @test3() nounwind {
+for.body:
+ br label %return
+
+for.cond.i: ; preds = %if.else.i, %for.body.i
+ %e.2.i = phi i32 [ %e.2.i, %if.else.i ], [ -8, %for.body.i ]
+ br i1 undef, label %return, label %for.body.i
+
+for.body.i: ; preds = %for.cond.i
+ switch i32 %e.2.i, label %for.cond3.i [
+ i32 -3, label %if.else.i
+ i32 0, label %for.cond.i
+ ]
+
+for.cond3.i: ; preds = %for.cond3.i, %for.body.i
+ br label %for.cond3.i
+
+if.else.i: ; preds = %for.body.i
+ br label %for.cond.i
+
+return: ; preds = %for.cond.i, %for.body
+ ret void
+}
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
new file mode 100644
index 000000000000..dcbfaaa3d77b
--- /dev/null
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -0,0 +1,64 @@
+; RUN: opt %s -deadargelim -S | FileCheck %s
+; PR14016
+
+; Check that debug info metadata for subprograms stores pointers to
+; updated LLVM functions.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = global i32 0, align 4
+
+define void @_Z3runv() uwtable {
+entry:
+ call void @_ZN12_GLOBAL__N_18dead_argEPv(i8* null), !dbg !10
+ call void (...)* @_ZN12_GLOBAL__N_111dead_varargEz(), !dbg !12
+ ret void, !dbg !13
+}
+
+; Argument will be deleted
+define internal void @_ZN12_GLOBAL__N_18dead_argEPv(i8* %foo) nounwind uwtable {
+entry:
+ %0 = load i32* @x, align 4, !dbg !14
+ %inc = add nsw i32 %0, 1, !dbg !14
+ store i32 %inc, i32* @x, align 4, !dbg !14
+ ret void, !dbg !16
+}
+
+; Vararg will be deleted
+define internal void @_ZN12_GLOBAL__N_111dead_varargEz(...) nounwind uwtable {
+entry:
+ %0 = load i32* @x, align 4, !dbg !17
+ %inc = add nsw i32 %0, 1, !dbg !17
+ store i32 %inc, i32* @x, align 4, !dbg !17
+ ret void, !dbg !19
+}
+
+!llvm.dbg.cu = !{!0}
+
+!0 = metadata !{i32 786449, i32 0, i32 4, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", metadata !"clang version 3.2 (trunk 165305)", i1 true, i1 false, metadata !"", i32 0, metadata !1, metadata !1, metadata !3, metadata !1} ; [ DW_TAG_compile_unit ] [/home/samsonov/tmp/clang-di/test.cc] [DW_LANG_C_plus_plus]
+!1 = metadata !{metadata !2}
+!2 = metadata !{i32 0}
+!3 = metadata !{metadata !4}
+!4 = metadata !{metadata !5, metadata !8, metadata !9}
+!5 = metadata !{i32 786478, i32 0, metadata !6, metadata !"run", metadata !"run", metadata !"", metadata !6, i32 8, metadata !7, i1 false, i1 true, i32 0, i32 0, null, i32 256, i1 false, void ()* @_Z3runv, null, null, metadata !1, i32 8} ; [ DW_TAG_subprogram ] [line 8] [def] [run]
+!6 = metadata !{i32 786473, metadata !"test.cc", metadata !"/home/samsonov/tmp/clang-di", null} ; [ DW_TAG_file_type ]
+!7 = metadata !{i32 786453, i32 0, metadata !"", i32 0, i32 0, i64 0, i64 0, i64 0, i32 0, null, metadata !2, i32 0, i32 0} ; [ DW_TAG_subroutine_type ] [line 0, size 0, align 0, offset 0] [from ]
+!8 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_vararg", metadata !"dead_vararg", metadata !"", metadata !6, i32 5, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (...)* @_ZN12_GLOBAL__N_111dead_varargEz, null, null, metadata !1, i32 5} ; [ DW_TAG_subprogram ] [line 5] [local] [def] [dead_vararg]
+
+; CHECK: metadata !"dead_vararg"{{.*}}void ()* @_ZN12_GLOBAL__N_111dead_varargEz
+
+!9 = metadata !{i32 786478, i32 0, metadata !6, metadata !"dead_arg", metadata !"dead_arg", metadata !"", metadata !6, i32 4, metadata !7, i1 true, i1 true, i32 0, i32 0, null, i32 256, i1 false, void (i8*)* @_ZN12_GLOBAL__N_18dead_argEPv, null, null, metadata !1, i32 4} ; [ DW_TAG_subprogram ] [line 4] [local] [def] [dead_arg]
+
+; CHECK: metadata !"dead_arg"{{.*}}void ()* @_ZN12_GLOBAL__N_18dead_argEPv
+
+!10 = metadata !{i32 8, i32 14, metadata !11, null}
+!11 = metadata !{i32 786443, metadata !5, i32 8, i32 12, metadata !6, i32 0} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!12 = metadata !{i32 8, i32 27, metadata !11, null}
+!13 = metadata !{i32 8, i32 42, metadata !11, null}
+!14 = metadata !{i32 4, i32 28, metadata !15, null}
+!15 = metadata !{i32 786443, metadata !9, i32 4, i32 26, metadata !6, i32 2} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!16 = metadata !{i32 4, i32 33, metadata !15, null}
+!17 = metadata !{i32 5, i32 25, metadata !18, null}
+!18 = metadata !{i32 786443, metadata !8, i32 5, i32 23, metadata !6, i32 1} ; [ DW_TAG_lexical_block ] [/home/samsonov/tmp/clang-di/test.cc]
+!19 = metadata !{i32 5, i32 30, metadata !18, null}
diff --git a/test/Transforms/DeadStoreElimination/libcalls.ll b/test/Transforms/DeadStoreElimination/libcalls.ll
new file mode 100644
index 000000000000..4639c0bc9628
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/libcalls.ll
@@ -0,0 +1,70 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+declare i8* @strcpy(i8* %dest, i8* %src) nounwind
+define void @test1(i8* %src) {
+; CHECK: @test1
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strcpy
+ %call = call i8* @strcpy(i8* %dest, i8* %src)
+; CHECK: ret void
+ ret void
+}
+
+declare i8* @strncpy(i8* %dest, i8* %src, i32 %n) nounwind
+define void @test2(i8* %src) {
+; CHECK: @test2
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strncpy
+ %call = call i8* @strncpy(i8* %dest, i8* %src, i32 12)
+; CHECK: ret void
+ ret void
+}
+
+declare i8* @strcat(i8* %dest, i8* %src) nounwind
+define void @test3(i8* %src) {
+; CHECK: @test3
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strcat
+ %call = call i8* @strcat(i8* %dest, i8* %src)
+; CHECK: ret void
+ ret void
+}
+
+declare i8* @strncat(i8* %dest, i8* %src, i32 %n) nounwind
+define void @test4(i8* %src) {
+; CHECK: @test4
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK-NOT: @strncat
+ %call = call i8* @strncat(i8* %dest, i8* %src, i32 12)
+; CHECK: ret void
+ ret void
+}
+
+define void @test5(i8* nocapture %src) {
+; CHECK: @test5
+ %dest = alloca [100 x i8], align 16
+ %arraydecay = getelementptr inbounds [100 x i8]* %dest, i64 0, i64 0
+ %call = call i8* @strcpy(i8* %arraydecay, i8* %src)
+; CHECK: %call = call i8* @strcpy
+ %arrayidx = getelementptr inbounds i8* %call, i64 10
+ store i8 97, i8* %arrayidx, align 1
+ ret void
+}
+
+declare void @user(i8* %p)
+define void @test6(i8* %src) {
+; CHECK: @test6
+ %B = alloca [16 x i8]
+ %dest = getelementptr inbounds [16 x i8]* %B, i64 0, i64 0
+; CHECK: @strcpy
+ %call = call i8* @strcpy(i8* %dest, i8* %src)
+; CHECK: @user
+ call void @user(i8* %dest)
+; CHECK: ret void
+ ret void
+}
+
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 7a8cdd531b55..e0eb90af9437 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -310,3 +310,17 @@ define void @test24([2 x i32]* %a, i32 %b, i32 %c) nounwind {
store i32 %c, i32* %4, align 4
ret void
}
+
+; Check another case like PR13547 where strdup is not like malloc.
+; CHECK: @test25
+; CHECK: load i8
+; CHECK: store i8 0
+; CHECK: store i8 %tmp
+define i8* @test25(i8* %p) nounwind {
+ %p.4 = getelementptr i8* %p, i64 4
+ %tmp = load i8* %p.4, align 1
+ store i8 0, i8* %p.4, align 1
+ %q = call i8* @strdup(i8* %p) nounwind optsize
+ store i8 %tmp, i8* %p.4, align 1
+ ret i8* %q
+}
diff --git a/test/Transforms/EarlyCSE/commute.ll b/test/Transforms/EarlyCSE/commute.ll
new file mode 100644
index 000000000000..f84a7dd1aae9
--- /dev/null
+++ b/test/Transforms/EarlyCSE/commute.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -S -early-cse | FileCheck %s
+
+; CHECK: @test1
+define void @test1(float %A, float %B, float* %PA, float* %PB) {
+ ; CHECK-NEXT: fadd
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: ret
+ %C = fadd float %A, %B
+ store float %C, float* %PA
+ %D = fadd float %B, %A
+ store float %D, float* %PB
+ ret void
+}
+
+; CHECK: @test2
+define void @test2(float %A, float %B, i1* %PA, i1* %PB) {
+ ; CHECK-NEXT: fcmp
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: ret
+ %C = fcmp eq float %A, %B
+ store i1 %C, i1* %PA
+ %D = fcmp eq float %B, %A
+ store i1 %D, i1* %PB
+ ret void
+}
+
+; CHECK: @test3
+define void @test3(float %A, float %B, i1* %PA, i1* %PB) {
+ ; CHECK-NEXT: fcmp
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: ret
+ %C = fcmp uge float %A, %B
+ store i1 %C, i1* %PA
+ %D = fcmp ule float %B, %A
+ store i1 %D, i1* %PB
+ ret void
+}
+
+; CHECK: @test4
+define void @test4(i32 %A, i32 %B, i1* %PA, i1* %PB) {
+ ; CHECK-NEXT: icmp
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: ret
+ %C = icmp eq i32 %A, %B
+ store i1 %C, i1* %PA
+ %D = icmp eq i32 %B, %A
+ store i1 %D, i1* %PB
+ ret void
+}
+
+; CHECK: @test5
+define void @test5(i32 %A, i32 %B, i1* %PA, i1* %PB) {
+ ; CHECK-NEXT: icmp
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: store
+ ; CHECK-NEXT: ret
+ %C = icmp sgt i32 %A, %B
+ store i1 %C, i1* %PA
+ %D = icmp slt i32 %B, %A
+ store i1 %D, i1* %PB
+ ret void
+}
diff --git a/test/Transforms/GVN/crash.ll b/test/Transforms/GVN/crash.ll
index 31eae256c6ef..4a8c8e4589c8 100644
--- a/test/Transforms/GVN/crash.ll
+++ b/test/Transforms/GVN/crash.ll
@@ -163,3 +163,39 @@ entry:
ret i8 %1
}
+
+; Test that a GEP in an unreachable block with the following form doesn't crash
+; GVN:
+;
+; %x = gep %some.type %x, ...
+
+%struct.type = type { i64, i32, i32 }
+
+define fastcc void @func() nounwind uwtable ssp align 2 {
+entry:
+ br label %reachable.bb
+
+;; Unreachable code.
+
+unreachable.bb:
+ %gep.val = getelementptr inbounds %struct.type* %gep.val, i64 1
+ br i1 undef, label %u2.bb, label %u1.bb
+
+u1.bb:
+ %tmp1 = getelementptr inbounds %struct.type* %gep.val, i64 0, i32 0
+ store i64 -1, i64* %tmp1, align 8
+ br label %unreachable.bb
+
+u2.bb:
+ %0 = load i32* undef, align 4
+ %conv.i.i.i.i.i = zext i32 %0 to i64
+ br label %u2.bb
+
+;; Reachable code.
+
+reachable.bb:
+ br label %r1.bb
+
+r1.bb:
+ br label %u2.bb
+}
diff --git a/test/Transforms/GVN/malloc-load-removal.ll b/test/Transforms/GVN/malloc-load-removal.ll
new file mode 100644
index 000000000000..66b6929d3038
--- /dev/null
+++ b/test/Transforms/GVN/malloc-load-removal.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+; RUN: opt -S -basicaa -gvn -disable-simplify-libcalls < %s | FileCheck %s -check-prefix=CHECK_NO_LIBCALLS
+; PR13694
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+declare i8* @malloc(i64) nounwind
+
+define noalias i8* @test() nounwind uwtable ssp {
+entry:
+ %call = tail call i8* @malloc(i64 100) nounwind
+ %0 = load i8* %call, align 1
+ %tobool = icmp eq i8 %0, 0
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ store i8 0, i8* %call, align 1
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ ret i8* %call
+
+; CHECK: @test
+; CHECK-NOT: load
+; CHECK-NOT: icmp
+
+; CHECK_NO_LIBCALLS: @test
+; CHECK_NO_LIBCALLS: load
+; CHECK_NO_LIBCALLS: icmp
+}
diff --git a/test/Transforms/GVN/pr14166.ll b/test/Transforms/GVN/pr14166.ll
new file mode 100644
index 000000000000..9f47e464265b
--- /dev/null
+++ b/test/Transforms/GVN/pr14166.ll
@@ -0,0 +1,27 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+target datalayout = "e-p:32:32:32"
+target triple = "i386-pc-linux-gnu"
+define <2 x i32> @test1() {
+ %v1 = alloca <2 x i32>
+ call void @anything(<2 x i32>* %v1)
+ %v2 = load <2 x i32>* %v1
+ %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
+ %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
+ store <2 x i8*> %v3, <2 x i8*>* %v4
+ %v5 = load <2 x i32>* %v1
+ ret <2 x i32> %v5
+; CHECK: @test1
+; CHECK: %v1 = alloca <2 x i32>
+; CHECK: call void @anything(<2 x i32>* %v1)
+; CHECK: %v2 = load <2 x i32>* %v1
+; CHECK: %v3 = inttoptr <2 x i32> %v2 to <2 x i8*>
+; CHECK: %v4 = bitcast <2 x i32>* %v1 to <2 x i8*>*
+; CHECK: store <2 x i8*> %v3, <2 x i8*>* %v4
+; CHECK: %1 = ptrtoint <2 x i8*> %v3 to <2 x i32>
+; CHECK: %2 = bitcast <2 x i32> %1 to i64
+; CHECK: %3 = bitcast i64 %2 to <2 x i32>
+; CHECK: ret <2 x i32> %3
+}
+
+declare void @anything(<2 x i32>*)
+
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/rle.ll
index e7641691264c..72fa819d1c73 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/rle.ll
@@ -1,7 +1,5 @@
-; RUN: opt < %s -basicaa -gvn -S -die | FileCheck %s
-
-; 32-bit little endian target.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+; RUN: opt < %s -default-data-layout="e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-n8:16:32" -basicaa -gvn -S -die | FileCheck %s
+; RUN: opt < %s -default-data-layout="E-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-n32" -basicaa -gvn -S -die | FileCheck %s
;; Trivial RLE test.
define i32 @test0(i32 %V, i32* %P) {
@@ -318,7 +316,7 @@ define i8 @coerce_offset_nonlocal0(i32* %P, i1 %cond) {
%P4 = getelementptr i8* %P3, i32 2
br i1 %cond, label %T, label %F
T:
- store i32 42, i32* %P
+ store i32 57005, i32* %P
br label %Cont
F:
diff --git a/test/Transforms/GlobalOpt/blockaddress.ll b/test/Transforms/GlobalOpt/blockaddress.ll
new file mode 100644
index 000000000000..13da76299d5d
--- /dev/null
+++ b/test/Transforms/GlobalOpt/blockaddress.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+@x = internal global i8* zeroinitializer
+
+define void @f() {
+; CHECK: @f
+
+; Check that we don't hit an assert in Constant::IsThreadDependent()
+; when storing this blockaddress into a global.
+
+ store i8* blockaddress(@g, %here), i8** @x, align 8
+ ret void
+}
+
+define void @g() {
+; CHECK: @g
+
+here:
+ ret void
+}
diff --git a/test/Transforms/GlobalOpt/load-store-global.ll b/test/Transforms/GlobalOpt/load-store-global.ll
index f824b2c11cbf..25a53370fa09 100644
--- a/test/Transforms/GlobalOpt/load-store-global.ll
+++ b/test/Transforms/GlobalOpt/load-store-global.ll
@@ -1,15 +1,38 @@
-; RUN: opt < %s -globalopt -S | not grep G
+; RUN: opt < %s -globalopt -S | FileCheck %s
@G = internal global i32 17 ; <i32*> [#uses=3]
+; CHECK-NOT: @G
define void @foo() {
%V = load i32* @G ; <i32> [#uses=1]
store i32 %V, i32* @G
ret void
+; CHECK: @foo
+; CHECK-NEXT: ret void
}
define i32 @bar() {
%X = load i32* @G ; <i32> [#uses=1]
ret i32 %X
+; CHECK: @bar
+; CHECK-NEXT: ret i32 17
+}
+
+@a = internal global i64* null, align 8
+; CHECK-NOT: @a
+
+; PR13968
+define void @qux() nounwind {
+ %b = bitcast i64** @a to i8*
+ %g = getelementptr i64** @a, i32 1
+ %cmp = icmp ne i8* null, %b
+ %cmp2 = icmp eq i8* null, %b
+ %cmp3 = icmp eq i64** null, %g
+ store i64* inttoptr (i64 1 to i64*), i64** @a, align 8
+ %l = load i64** @a, align 8
+ ret void
+; CHECK: @qux
+; CHECK-NOT: store
+; CHECK-NOT: load
}
diff --git a/test/Transforms/GlobalOpt/tls.ll b/test/Transforms/GlobalOpt/tls.ll
new file mode 100644
index 000000000000..7a410e5ed20b
--- /dev/null
+++ b/test/Transforms/GlobalOpt/tls.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
+declare void @wait()
+declare void @signal()
+declare void @start_thread(void ()*)
+
+@x = internal thread_local global [100 x i32] zeroinitializer, align 16
+@ip = internal global i32* null, align 8
+
+; PR14309: GlobalOpt would think that the value of @ip is always the address of
+; x[1]. However, that address is different for different threads so @ip cannot
+; be replaced with a constant.
+
+define i32 @f() {
+entry:
+ ; Set @ip to point to x[1] for thread 1.
+ store i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), i32** @ip, align 8
+
+ ; Run g on a new thread.
+ tail call void @start_thread(void ()* @g) nounwind
+ tail call void @wait() nounwind
+
+ ; Reset x[1] for thread 1.
+ store i32 0, i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), align 4
+
+ ; Read the value of @ip, which now points at x[1] for thread 2.
+ %0 = load i32** @ip, align 8
+
+ %1 = load i32* %0, align 4
+ ret i32 %1
+
+; CHECK: @f
+; Make sure that the load from @ip hasn't been removed.
+; CHECK: load i32** @ip
+; CHECK: ret
+}
+
+define internal void @g() nounwind uwtable {
+entry:
+ ; Set @ip to point to x[1] for thread 2.
+ store i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), i32** @ip, align 8
+
+ ; Store 50 in x[1] for thread 2.
+ store i32 50, i32* getelementptr inbounds ([100 x i32]* @x, i64 0, i64 1), align 4
+
+ tail call void @signal() nounwind
+ ret void
+
+; CHECK: @g
+; Make sure that the store to @ip hasn't been removed.
+; CHECK: store {{.*}} @ip
+; CHECK: ret
+}
diff --git a/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll b/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
index 708a961272b5..0c88e83975c1 100644
--- a/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
+++ b/test/Transforms/IndVarSimplify/2004-04-05-InvokeCastCrash.ll
@@ -39,11 +39,11 @@
%"struct.llvm::SymbolTable" = type opaque
%"struct.llvm::SymbolTableListTraits<llvm::Argument,llvm::Function,llvm::Function,llvm::ilist_traits<llvm::Argument> >" = type { %"struct.llvm::Function"*, %"struct.llvm::Function"* }
%"struct.llvm::SymbolTableListTraits<llvm::Instruction,llvm::BasicBlock,llvm::Function,llvm::ilist_traits<llvm::Instruction> >" = type { %"struct.llvm::Function"*, %"struct.llvm::BasicBlock"* }
- %"struct.llvm::TargetData" = type { %"struct.llvm::FunctionPass", i1, i8, i8, i8, i8, i8, i8, i8, i8 }
+ %"struct.llvm::DataLayout" = type { %"struct.llvm::FunctionPass", i1, i8, i8, i8, i8, i8, i8, i8, i8 }
%"struct.llvm::TargetFrameInfo" = type { i32 (...)**, i32, i32, i32 }
%"struct.llvm::TargetInstrDescriptor" = type { i8*, i32, i32, i32, i1, i32, i32, i32, i32, i32, i32*, i32* }
%"struct.llvm::TargetInstrInfo" = type { i32 (...)**, %"struct.llvm::TargetInstrDescriptor"*, i32, i32 }
- %"struct.llvm::TargetMachine" = type { i32 (...)**, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >", %"struct.llvm::TargetData", %"struct.llvm::IntrinsicLowering"* }
+ %"struct.llvm::TargetMachine" = type { i32 (...)**, %"struct.std::basic_string<char,std::char_traits<char>,std::allocator<char> >", %"struct.llvm::DataLayout", %"struct.llvm::IntrinsicLowering"* }
%"struct.llvm::TargetRegClassInfo" = type { i32 (...)**, i32, i32, i32 }
%"struct.llvm::TargetRegInfo" = type { i32 (...)**, %"struct.std::vector<const llvm::TargetRegClassInfo*,std::allocator<const llvm::TargetRegClassInfo*> >", %"struct.llvm::TargetMachine"* }
%"struct.llvm::Type" = type { %"struct.llvm::Value", i32, i32, i1, i32, %"struct.llvm::Type"*, %"struct.std::vector<llvm::PATypeHandle,std::allocator<llvm::PATypeHandle> >" }
diff --git a/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll
new file mode 100644
index 000000000000..5c478669d298
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/2012-10-19-congruent-constant.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+; PR12627
+define void @test1(i32 %x) nounwind uwtable ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %phi1 = phi i1 [ false, %entry ], [ %cmpa, %for.body ]
+ %phi2 = phi i1 [ false, %entry ], [ %cmpb, %for.body ]
+ %i.07 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ tail call void @aux(i1 %phi1, i1 %phi2) nounwind
+ %cmpa = icmp sgt i32 %i.07, 200
+ %cmpb = icmp sgt i32 %i.07, 100
+ %inc = add nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, 100
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+
+; CHECK: @test1
+; CHECK-NOT: phi i1
+; CHECK: call void @aux(i1 false, i1 false)
+}
+
+declare void @aux(i1, i1)
diff --git a/test/Transforms/IndVarSimplify/crash.ll b/test/Transforms/IndVarSimplify/crash.ll
index 3335be781dfc..1b702a3b1a3c 100644
--- a/test/Transforms/IndVarSimplify/crash.ll
+++ b/test/Transforms/IndVarSimplify/crash.ll
@@ -87,3 +87,47 @@ entry:
main.f.exit: ; preds = %"3.i"
unreachable
}
+
+
+; PR13967
+
+define void @f() nounwind ssp {
+bb:
+ br label %bb4
+
+bb4:
+ %tmp = phi i64 [ %tmp5, %bb7 ], [ undef, %bb ]
+ %tmp5 = add nsw i64 %tmp, 1
+ %extract.t1 = trunc i64 %tmp5 to i32
+ br i1 false, label %bb6, label %bb7
+
+bb6:
+ br label %bb7
+
+bb7:
+ %.off0 = phi i32 [ undef, %bb6 ], [ %extract.t1, %bb4 ]
+ %tmp8 = icmp eq i32 %.off0, 0
+ br i1 %tmp8, label %bb9, label %bb4
+
+bb9:
+ ret void
+}
+
+; PR12536
+define void @fn1() noreturn nounwind {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.end, %entry
+ %b.0 = phi i32 [ undef, %entry ], [ %conv, %for.end ]
+ br label %for.cond1
+
+for.cond1: ; preds = %for.cond1, %for.cond
+ %c.0 = phi i32 [ %b.0, %for.cond1 ], [ 0, %for.cond ]
+ br i1 undef, label %for.cond1, label %for.end
+
+for.end: ; preds = %for.cond1
+ %cmp2 = icmp slt i32 %c.0, 1
+ %conv = zext i1 %cmp2 to i32
+ br label %for.cond
+}
diff --git a/test/Transforms/IndVarSimplify/eliminate-comparison.ll b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
index 953bbdff5c62..5dca71264665 100644
--- a/test/Transforms/IndVarSimplify/eliminate-comparison.ll
+++ b/test/Transforms/IndVarSimplify/eliminate-comparison.ll
@@ -106,3 +106,106 @@ loop:
return:
ret void
}
+
+; PR14432
+; Indvars should not turn the second loop into an infinite one.
+
+; CHECK: @func_11
+; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
+; CHECK-NOT: br i1 true, label %noassert68, label %unrolledend
+
+define i32 @func_11() nounwind uwtable {
+entry:
+ br label %forcond
+
+forcond: ; preds = %noassert, %entry
+ %__key6.0 = phi i32 [ 2, %entry ], [ %tmp37, %noassert ]
+ %tmp5 = icmp slt i32 %__key6.0, 10
+ br i1 %tmp5, label %noassert, label %forcond38.preheader
+
+forcond38.preheader: ; preds = %forcond
+ br label %forcond38
+
+noassert: ; preds = %forbody
+ %tmp13 = sdiv i32 -32768, %__key6.0
+ %tmp2936 = shl i32 %tmp13, 24
+ %sext23 = shl i32 %tmp13, 24
+ %tmp32 = icmp eq i32 %tmp2936, %sext23
+ %tmp37 = add i32 %__key6.0, 1
+ br i1 %tmp32, label %forcond, label %assert33
+
+assert33: ; preds = %noassert
+ tail call void @llvm.trap()
+ unreachable
+
+forcond38: ; preds = %noassert68, %forcond38.preheader
+ %__key8.0 = phi i32 [ %tmp81, %noassert68 ], [ 2, %forcond38.preheader ]
+ %tmp46 = icmp slt i32 %__key8.0, 10
+ br i1 %tmp46, label %noassert68, label %unrolledend
+
+noassert68: ; preds = %forbody39
+ %tmp57 = sdiv i32 -32768, %__key8.0
+ %sext34 = shl i32 %tmp57, 16
+ %sext21 = shl i32 %tmp57, 16
+ %tmp76 = icmp eq i32 %sext34, %sext21
+ %tmp81 = add i32 %__key8.0, 1
+ br i1 %tmp76, label %forcond38, label %assert77
+
+assert77: ; preds = %noassert68
+ tail call void @llvm.trap()
+ unreachable
+
+unrolledend: ; preds = %forcond38
+ ret i32 0
+}
+
+declare void @llvm.trap() noreturn nounwind
+
+; In this case the second loop only has a single iteration, fold the header away
+; CHECK: @func_12
+; CHECK: %tmp5 = icmp slt i32 %__key6.0, 10
+; CHECK: br i1 true, label %noassert68, label %unrolledend
+define i32 @func_12() nounwind uwtable {
+entry:
+ br label %forcond
+
+forcond: ; preds = %noassert, %entry
+ %__key6.0 = phi i32 [ 2, %entry ], [ %tmp37, %noassert ]
+ %tmp5 = icmp slt i32 %__key6.0, 10
+ br i1 %tmp5, label %noassert, label %forcond38.preheader
+
+forcond38.preheader: ; preds = %forcond
+ br label %forcond38
+
+noassert: ; preds = %forbody
+ %tmp13 = sdiv i32 -32768, %__key6.0
+ %tmp2936 = shl i32 %tmp13, 24
+ %sext23 = shl i32 %tmp13, 24
+ %tmp32 = icmp eq i32 %tmp2936, %sext23
+ %tmp37 = add i32 %__key6.0, 1
+ br i1 %tmp32, label %forcond, label %assert33
+
+assert33: ; preds = %noassert
+ tail call void @llvm.trap()
+ unreachable
+
+forcond38: ; preds = %noassert68, %forcond38.preheader
+ %__key8.0 = phi i32 [ %tmp81, %noassert68 ], [ 2, %forcond38.preheader ]
+ %tmp46 = icmp slt i32 %__key8.0, 10
+ br i1 %tmp46, label %noassert68, label %unrolledend
+
+noassert68: ; preds = %forbody39
+ %tmp57 = sdiv i32 -32768, %__key8.0
+ %sext34 = shl i32 %tmp57, 16
+ %sext21 = shl i32 %tmp57, 16
+ %tmp76 = icmp ne i32 %sext34, %sext21
+ %tmp81 = add i32 %__key8.0, 1
+ br i1 %tmp76, label %forcond38, label %assert77
+
+assert77: ; preds = %noassert68
+ tail call void @llvm.trap()
+ unreachable
+
+unrolledend: ; preds = %forcond38
+ ret i32 0
+}
diff --git a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
index bfdd000e38eb..507f695e67c5 100644
--- a/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
+++ b/test/Transforms/IndVarSimplify/no-iv-rewrite.ll
@@ -199,7 +199,6 @@ entry:
; back to the loop iv.
;
; CHECK: loop:
-; CHECK: phi i32
; CHECK-NOT: phi
; CHECK: exit:
loop:
diff --git a/test/Transforms/IndVarSimplify/verify-scev.ll b/test/Transforms/IndVarSimplify/verify-scev.ll
new file mode 100644
index 000000000000..019f5830d520
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/verify-scev.ll
@@ -0,0 +1,421 @@
+; RUN: opt < %s -S -indvars -verify-scev
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define void @test1() nounwind uwtable ssp {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ br i1 false, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ br i1 undef, label %for.end11, label %for.body3
+
+for.body3: ; preds = %for.end
+ unreachable
+
+for.end11: ; preds = %for.end
+ br i1 undef, label %while.body, label %while.end
+
+while.body: ; preds = %for.end11
+ unreachable
+
+while.end: ; preds = %for.end11
+ br i1 undef, label %if.end115, label %for.cond109
+
+for.cond109: ; preds = %while.end
+ unreachable
+
+if.end115: ; preds = %while.end
+ br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612
+
+while.body119.lr.ph.lr.ph: ; preds = %if.end115
+ br i1 undef, label %for.cond612, label %if.end123.us
+
+if.end123.us: ; preds = %while.body119.lr.ph.lr.ph
+ br label %for.cond132.us
+
+for.cond132.us: ; preds = %for.cond132.us, %if.end123.us
+ br i1 undef, label %if.then136.us, label %for.cond132.us
+
+if.then136.us: ; preds = %for.cond132.us
+ br i1 undef, label %while.end220, label %while.body211
+
+while.body211: ; preds = %while.body211, %if.then136.us
+ br i1 undef, label %while.end220, label %while.body211
+
+while.end220: ; preds = %while.body211, %if.then136.us
+ br label %for.cond246.outer
+
+for.cond246.outer: ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220
+ br label %for.cond246
+
+for.cond246: ; preds = %for.cond372.loopexit, %for.cond246.outer
+ br i1 undef, label %for.end562, label %if.end250
+
+if.end250: ; preds = %for.cond246
+ br i1 undef, label %if.end256, label %for.end562
+
+if.end256: ; preds = %if.end250
+ %cmp272 = icmp eq i32 undef, undef
+ br i1 %cmp272, label %if.then274, label %for.cond404.preheader
+
+for.cond404.preheader: ; preds = %if.end256
+ br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph
+
+for.body409.lr.ph: ; preds = %for.cond404.preheader
+ br label %for.body409
+
+if.then274: ; preds = %if.end256
+ br i1 undef, label %for.cond246.outer, label %if.end309
+
+if.end309: ; preds = %if.then274
+ br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.body361: ; preds = %for.body361, %if.end309
+ br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.cond372.loopexit: ; preds = %for.body361, %if.end309
+ br i1 undef, label %for.cond394.preheader, label %for.cond246
+
+for.cond394.preheader: ; preds = %for.cond372.loopexit
+ br i1 undef, label %for.cond246.outer, label %for.body397
+
+for.body397: ; preds = %for.cond394.preheader
+ unreachable
+
+for.body409: ; preds = %for.inc558, %for.body409.lr.ph
+ %k.029 = phi i32 [ 1, %for.body409.lr.ph ], [ %inc559, %for.inc558 ]
+ br i1 undef, label %if.then412, label %if.else433
+
+if.then412: ; preds = %for.body409
+ br label %if.end440
+
+if.else433: ; preds = %for.body409
+ br label %if.end440
+
+if.end440: ; preds = %if.else433, %if.then412
+ br i1 undef, label %for.inc558, label %if.end461
+
+if.end461: ; preds = %if.end440
+ br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.body517: ; preds = %for.body517, %if.end461
+ br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.cond528.loopexit: ; preds = %for.body517, %if.end461
+ br label %for.inc558
+
+for.inc558: ; preds = %for.cond528.loopexit, %if.end440
+ %inc559 = add nsw i32 %k.029, 1
+ %cmp407 = icmp sgt i32 %inc559, undef
+ br i1 %cmp407, label %for.cond246.outer, label %for.body409
+
+for.end562: ; preds = %if.end250, %for.cond246
+ unreachable
+
+for.cond612: ; preds = %while.body119.lr.ph.lr.ph, %if.end115
+ unreachable
+}
+
+define void @test2() nounwind uwtable ssp {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ br i1 undef, label %for.end11, label %for.body3
+
+for.body3: ; preds = %for.end
+ unreachable
+
+for.end11: ; preds = %for.end
+ br i1 undef, label %while.body, label %while.end
+
+while.body: ; preds = %for.end11
+ unreachable
+
+while.end: ; preds = %for.end11
+ br i1 undef, label %if.end115, label %for.cond109
+
+for.cond109: ; preds = %while.end
+ unreachable
+
+if.end115: ; preds = %while.end
+ br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612
+
+while.body119.lr.ph.lr.ph: ; preds = %if.end115
+ br i1 undef, label %for.cond612, label %if.end123.us
+
+if.end123.us: ; preds = %while.body119.lr.ph.lr.ph
+ br label %for.cond132.us
+
+for.cond132.us: ; preds = %for.cond132.us, %if.end123.us
+ br i1 undef, label %if.then136.us, label %for.cond132.us
+
+if.then136.us: ; preds = %for.cond132.us
+ br i1 undef, label %while.end220, label %while.body211
+
+while.body211: ; preds = %while.body211, %if.then136.us
+ br i1 undef, label %while.end220, label %while.body211
+
+while.end220: ; preds = %while.body211, %if.then136.us
+ br label %for.cond246.outer
+
+for.cond246.outer: ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220
+ br label %for.cond246
+
+for.cond246: ; preds = %for.cond372.loopexit, %for.cond246.outer
+ br i1 undef, label %for.end562, label %if.end250
+
+if.end250: ; preds = %for.cond246
+ br i1 undef, label %if.end256, label %for.end562
+
+if.end256: ; preds = %if.end250
+ %0 = load i32* undef, align 4
+ br i1 undef, label %if.then274, label %for.cond404.preheader
+
+for.cond404.preheader: ; preds = %if.end256
+ %add406 = add i32 0, %0
+ br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph
+
+for.body409.lr.ph: ; preds = %for.cond404.preheader
+ br label %for.body409
+
+if.then274: ; preds = %if.end256
+ br i1 undef, label %for.cond246.outer, label %if.end309
+
+if.end309: ; preds = %if.then274
+ br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.body361: ; preds = %for.body361, %if.end309
+ br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.cond372.loopexit: ; preds = %for.body361, %if.end309
+ br i1 undef, label %for.cond394.preheader, label %for.cond246
+
+for.cond394.preheader: ; preds = %for.cond372.loopexit
+ br i1 undef, label %for.cond246.outer, label %for.body397
+
+for.body397: ; preds = %for.cond394.preheader
+ unreachable
+
+for.body409: ; preds = %for.inc558, %for.body409.lr.ph
+ %k.029 = phi i32 [ 1, %for.body409.lr.ph ], [ %inc559, %for.inc558 ]
+ br i1 undef, label %if.then412, label %if.else433
+
+if.then412: ; preds = %for.body409
+ br label %if.end440
+
+if.else433: ; preds = %for.body409
+ br label %if.end440
+
+if.end440: ; preds = %if.else433, %if.then412
+ br i1 undef, label %for.inc558, label %if.end461
+
+if.end461: ; preds = %if.end440
+ br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.body517: ; preds = %for.body517, %if.end461
+ br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.cond528.loopexit: ; preds = %for.body517, %if.end461
+ br label %for.inc558
+
+for.inc558: ; preds = %for.cond528.loopexit, %if.end440
+ %inc559 = add nsw i32 %k.029, 1
+ %cmp407 = icmp sgt i32 %inc559, %add406
+ br i1 %cmp407, label %for.cond246.outer, label %for.body409
+
+for.end562: ; preds = %if.end250, %for.cond246
+ unreachable
+
+for.cond612: ; preds = %while.body119.lr.ph.lr.ph, %if.end115
+ unreachable
+}
+
+define void @test3() nounwind uwtable ssp {
+entry:
+ br i1 undef, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %entry
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ br i1 undef, label %for.end11, label %for.body3
+
+for.body3: ; preds = %for.end
+ unreachable
+
+for.end11: ; preds = %for.end
+ br i1 undef, label %while.body, label %while.end
+
+while.body: ; preds = %for.end11
+ unreachable
+
+while.end: ; preds = %for.end11
+ br i1 undef, label %if.end115, label %for.cond109
+
+for.cond109: ; preds = %while.end
+ unreachable
+
+if.end115: ; preds = %while.end
+ br i1 undef, label %while.body119.lr.ph.lr.ph, label %for.cond612
+
+while.body119.lr.ph.lr.ph: ; preds = %if.end115
+ br i1 undef, label %for.cond612, label %if.end123.us
+
+if.end123.us: ; preds = %while.body119.lr.ph.lr.ph
+ br label %for.cond132.us
+
+for.cond132.us: ; preds = %for.cond132.us, %if.end123.us
+ br i1 undef, label %if.then136.us, label %for.cond132.us
+
+if.then136.us: ; preds = %for.cond132.us
+ br i1 undef, label %while.end220, label %while.body211
+
+while.body211: ; preds = %while.body211, %if.then136.us
+ br i1 undef, label %while.end220, label %while.body211
+
+while.end220: ; preds = %while.body211, %if.then136.us
+ br label %for.cond246.outer
+
+for.cond246.outer: ; preds = %for.inc558, %for.cond394.preheader, %if.then274, %for.cond404.preheader, %while.end220
+ br label %for.cond246
+
+for.cond246: ; preds = %for.cond372.loopexit, %for.cond246.outer
+ br i1 undef, label %for.end562, label %if.end250
+
+if.end250: ; preds = %for.cond246
+ br i1 undef, label %if.end256, label %for.end562
+
+if.end256: ; preds = %if.end250
+ br i1 undef, label %if.then274, label %for.cond404.preheader
+
+for.cond404.preheader: ; preds = %if.end256
+ br i1 undef, label %for.cond246.outer, label %for.body409.lr.ph
+
+for.body409.lr.ph: ; preds = %for.cond404.preheader
+ br label %for.body409
+
+if.then274: ; preds = %if.end256
+ br i1 undef, label %for.cond246.outer, label %if.end309
+
+if.end309: ; preds = %if.then274
+ br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.body361: ; preds = %for.body361, %if.end309
+ br i1 undef, label %for.cond372.loopexit, label %for.body361
+
+for.cond372.loopexit: ; preds = %for.body361, %if.end309
+ br i1 undef, label %for.cond394.preheader, label %for.cond246
+
+for.cond394.preheader: ; preds = %for.cond372.loopexit
+ br i1 undef, label %for.cond246.outer, label %for.body397
+
+for.body397: ; preds = %for.cond394.preheader
+ unreachable
+
+for.body409: ; preds = %for.inc558, %for.body409.lr.ph
+ br i1 undef, label %if.then412, label %if.else433
+
+if.then412: ; preds = %for.body409
+ br label %if.end440
+
+if.else433: ; preds = %for.body409
+ br label %if.end440
+
+if.end440: ; preds = %if.else433, %if.then412
+ br i1 undef, label %for.inc558, label %if.end461
+
+if.end461: ; preds = %if.end440
+ br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.body517: ; preds = %for.body517, %if.end461
+ br i1 undef, label %for.cond528.loopexit, label %for.body517
+
+for.cond528.loopexit: ; preds = %for.body517, %if.end461
+ br label %for.inc558
+
+for.inc558: ; preds = %for.cond528.loopexit, %if.end440
+ br i1 undef, label %for.cond246.outer, label %for.body409
+
+for.end562: ; preds = %if.end250, %for.cond246
+ unreachable
+
+for.cond612: ; preds = %while.body119.lr.ph.lr.ph, %if.end115
+ unreachable
+}
+
+define void @test4() nounwind uwtable ssp {
+entry:
+ br i1 undef, label %if.end8, label %if.else
+
+if.else: ; preds = %entry
+ br label %if.end8
+
+if.end8: ; preds = %if.else, %entry
+ br i1 undef, label %if.end26, label %if.else22
+
+if.else22: ; preds = %if.end8
+ br label %if.end26
+
+if.end26: ; preds = %if.else22, %if.end8
+ br i1 undef, label %if.end35, label %if.else31
+
+if.else31: ; preds = %if.end26
+ br label %if.end35
+
+if.end35: ; preds = %if.else31, %if.end26
+ br i1 undef, label %for.end226, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %if.end35
+ br label %for.body48
+
+for.body48: ; preds = %for.inc221, %for.body.lr.ph
+ br i1 undef, label %for.inc221, label %for.body65.lr.ph
+
+for.body65.lr.ph: ; preds = %for.body48
+ %0 = load i32* undef, align 4
+ br label %for.body65.us
+
+for.body65.us: ; preds = %for.inc219.us, %for.body65.lr.ph
+ %k.09.us = phi i32 [ %inc.us, %for.inc219.us ], [ 1, %for.body65.lr.ph ]
+ %idxprom66.us = sext i32 %k.09.us to i64
+ br i1 undef, label %for.inc219.us, label %if.end72.us
+
+if.end72.us: ; preds = %for.body65.us
+ br i1 undef, label %if.end93.us, label %if.then76.us
+
+if.then76.us: ; preds = %if.end72.us
+ br label %if.end93.us
+
+if.end93.us: ; preds = %if.then76.us, %if.end72.us
+ br i1 undef, label %if.end110.us, label %for.inc219.us
+
+if.end110.us: ; preds = %if.end93.us
+ br i1 undef, label %for.inc219.us, label %for.body142.us
+
+for.body142.us: ; preds = %for.cond139.loopexit.us, %if.end110.us
+ br label %for.cond152.us
+
+for.cond152.us: ; preds = %for.cond152.us, %for.body142.us
+ br i1 undef, label %for.cond139.loopexit.us, label %for.cond152.us
+
+for.inc219.us: ; preds = %for.cond139.loopexit.us, %if.end110.us, %if.end93.us, %for.body65.us
+ %inc.us = add nsw i32 %k.09.us, 1
+ %cmp64.us = icmp sgt i32 %inc.us, %0
+ br i1 %cmp64.us, label %for.inc221, label %for.body65.us
+
+for.cond139.loopexit.us: ; preds = %for.cond152.us
+ br i1 undef, label %for.inc219.us, label %for.body142.us
+
+for.inc221: ; preds = %for.inc219.us, %for.body48
+ br label %for.body48
+
+for.end226: ; preds = %if.end35
+ ret void
+}
diff --git a/test/Transforms/Inline/recursive.ll b/test/Transforms/Inline/recursive.ll
new file mode 100644
index 000000000000..5fe8d1639ca3
--- /dev/null
+++ b/test/Transforms/Inline/recursive.ll
@@ -0,0 +1,38 @@
+; RUN: opt %s -inline -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+target triple = "i386-apple-darwin10.0"
+
+; rdar://10853263
+
+; Make sure that the callee is still here.
+; CHECK: define i32 @callee
+define i32 @callee(i32 %param) {
+ %yyy = alloca [100000 x i8]
+ %r = bitcast [100000 x i8]* %yyy to i8*
+ call void @foo2(i8* %r)
+ ret i32 4
+}
+
+; CHECK: define i32 @caller
+; CHECK-NEXT: entry:
+; CHECK-NOT: alloca
+; CHECK: ret
+define i32 @caller(i32 %param) {
+entry:
+ %t = call i32 @foo(i32 %param)
+ %cmp = icmp eq i32 %t, -1
+ br i1 %cmp, label %exit, label %cont
+
+cont:
+ %r = call i32 @caller(i32 %t)
+ %f = call i32 @callee(i32 %r)
+ br label %cont
+exit:
+ ret i32 4
+}
+
+declare void @foo2(i8* %in)
+
+declare i32 @foo(i32 %param)
+
diff --git a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
index 73e5a6653e80..18aab7f27efd 100644
--- a/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
+++ b/test/Transforms/InstCombine/2012-07-25-LoadPart.ll
@@ -1,12 +1,14 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; RUN: opt < %s -default-data-layout="e-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=LE
+; RUN: opt < %s -default-data-layout="E-p:32:32:32" -instcombine -S | FileCheck %s --check-prefix=BE
; PR13442
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
-
@test = constant [4 x i32] [i32 1, i32 2, i32 3, i32 4]
define i64 @foo() {
%ret = load i64* bitcast (i8* getelementptr (i8* bitcast ([4 x i32]* @test to i8*), i64 2) to i64*), align 1
ret i64 %ret
- ; CHECK: ret i64 844424930263040
+ ; 0x00030000_00020000 in [01 00/00 00 02 00 00 00 03 00/00 00 04 00 00 00]
+ ; LE: ret i64 844424930263040
+ ; 0x00000200_00000300 in [00 00/00 01 00 00 00 02 00 00/00 03 00 00 00 04]
+ ; BE: ret i64 281474976841728
}
diff --git a/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
new file mode 100644
index 000000000000..4efaf8c17255
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-08-28-udiv_ashl.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; rdar://12182093
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: @udiv400
+; CHECK: udiv i32 %x, 400
+; CHECK: ret
+define i32 @udiv400(i32 %x) {
+entry:
+ %div = lshr i32 %x, 2
+ %div1 = udiv i32 %div, 100
+ ret i32 %div1
+}
+
+
+; CHECK: @udiv400_no
+; CHECK: ashr
+; CHECK: div
+; CHECK: ret
+define i32 @udiv400_no(i32 %x) {
+entry:
+ %div = ashr i32 %x, 2
+ %div1 = udiv i32 %div, 100
+ ret i32 %div1
+}
+
+; CHECK: @sdiv400_yes
+; CHECK: udiv i32 %x, 400
+; CHECK: ret
+define i32 @sdiv400_yes(i32 %x) {
+entry:
+ %div = lshr i32 %x, 2
+ ; The sign bits of both operands are zero (i.e. we can prove they are
+ ; unsigned inputs), turn this into a udiv.
+ ; Next, optimize this just like sdiv.
+ %div1 = sdiv i32 %div, 100
+ ret i32 %div1
+}
+
+
+; CHECK: @udiv_i80
+; CHECK: udiv i80 %x, 400
+; CHECK: ret
+define i80 @udiv_i80(i80 %x) {
+ %div = lshr i80 %x, 2
+ %div1 = udiv i80 %div, 100
+ ret i80 %div1
+}
+
+define i32 @no_crash_notconst_udiv(i32 %x, i32 %notconst) {
+ %div = lshr i32 %x, %notconst
+ %div1 = udiv i32 %div, 100
+ ret i32 %div1
+}
diff --git a/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll b/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll
new file mode 100644
index 000000000000..ba025e92b010
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-09-17-ZeroSizedAlloca.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+; When merging zero sized alloca check that requested alignments of the allocas
+; are obeyed.
+
+@x = global i8* null, align 8
+@y = global i8* null, align 8
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; CHECK: @f
+; CHECK-NEXT: alloca [0 x i8], align 1024
+; CHECK-NOT: alloca
+; CHECK: ret void
+define void @f() {
+ %1 = alloca [0 x i8], align 1
+ %2 = alloca [0 x i8], align 1024
+ %3 = getelementptr inbounds [0 x i8]* %1, i64 0, i64 0
+ %4 = getelementptr inbounds [0 x i8]* %2, i64 0, i64 0
+ store i8* %3, i8** @x, align 8
+ store i8* %4, i8** @y, align 8
+ ret void
+}
diff --git a/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll
new file mode 100644
index 000000000000..4cd60b42fbe1
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-09-24-MemcpyFromGlobalCrash.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Check we don't crash due to lack of target data.
+
+@G = constant [100 x i8] zeroinitializer
+
+declare void @bar(i8*)
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define void @test() {
+; CHECK: @test
+; CHECK: llvm.memcpy
+; CHECK: ret void
+ %A = alloca [100 x i8]
+ %a = getelementptr inbounds [100 x i8]* %A, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* getelementptr inbounds ([100 x i8]* @G, i64 0, i32 0), i64 100, i32 4, i1 false)
+ call void @bar(i8* %a) readonly
+ ret void
+}
diff --git a/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll b/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
new file mode 100644
index 000000000000..20ea28268742
--- /dev/null
+++ b/test/Transforms/InstCombine/2012-10-25-vector-of-pointers.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -instcombine -S
+
+; Make sure that we don't crash when optimizing the vectors of pointers.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+%struct.hoge = type { double*, double*, double*, double** }
+
+define void @widget(%struct.hoge* nocapture %arg) nounwind uwtable ssp {
+bb:
+ %tmp = getelementptr inbounds %struct.hoge* %arg, i64 0, i32 0
+ br i1 undef, label %bb1, label %bb17
+
+bb1: ; preds = %bb
+ br i1 undef, label %bb2, label %bb3
+
+bb2: ; preds = %bb1
+ br label %bb17
+
+bb3: ; preds = %bb1
+ %tmp4 = bitcast double** %tmp to <2 x double*>*
+ %tmp5 = load <2 x double*>* %tmp4, align 8
+ %tmp6 = ptrtoint <2 x double*> %tmp5 to <2 x i64>
+ %tmp7 = sub <2 x i64> zeroinitializer, %tmp6
+ %tmp8 = ashr exact <2 x i64> %tmp7, <i64 3, i64 3>
+ %tmp9 = extractelement <2 x i64> %tmp8, i32 0
+ %tmp10 = add nsw i64 undef, %tmp9
+ br i1 undef, label %bb11, label %bb12
+
+bb11: ; preds = %bb3
+ br label %bb13
+
+bb12: ; preds = %bb3
+ br label %bb13
+
+bb13: ; preds = %bb12, %bb11
+ br i1 undef, label %bb16, label %bb14
+
+bb14: ; preds = %bb13
+ br i1 undef, label %bb16, label %bb15
+
+bb15: ; preds = %bb14
+ br label %bb16
+
+bb16: ; preds = %bb15, %bb14, %bb13
+ unreachable
+
+bb17: ; preds = %bb2, %bb
+ ret void
+}
diff --git a/test/Transforms/InstCombine/align-addr.ll b/test/Transforms/InstCombine/align-addr.ll
index 27916b986030..4ea1bd9beb3b 100644
--- a/test/Transforms/InstCombine/align-addr.ll
+++ b/test/Transforms/InstCombine/align-addr.ll
@@ -58,3 +58,19 @@ define double @test2(double* %p, double %n) nounwind {
store double %n, double* %p
ret double %t
}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+declare void @use(i8*)
+
+%struct.s = type { i32, i32, i32, i32 }
+
+define void @test3(%struct.s* sret %a4) {
+; Check that the alignment is bumped up the alignment of the sret type.
+; CHECK: @test3
+ %a4.cast = bitcast %struct.s* %a4 to i8*
+ call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 1, i1 false)
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %a4.cast, i8 0, i64 16, i32 4, i1 false)
+ call void @use(i8* %a4.cast)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/alloca.ll b/test/Transforms/InstCombine/alloca.ll
index 50e03479f650..68a671cec88a 100644
--- a/test/Transforms/InstCombine/alloca.ll
+++ b/test/Transforms/InstCombine/alloca.ll
@@ -94,3 +94,19 @@ entry:
tail call void @f(i32* %b)
ret void
}
+
+; PR14371
+%opaque_type = type opaque
+%real_type = type { { i32, i32* } }
+
+@opaque_global = external constant %opaque_type, align 4
+
+define void @test7() {
+entry:
+ %0 = alloca %real_type, align 4
+ %1 = bitcast %real_type* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %1, i8* bitcast (%opaque_type* @opaque_global to i8*), i32 8, i32 1, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll
index 838c2f73fb91..40c44c09a8c0 100644
--- a/test/Transforms/InstCombine/and-fcmp.ll
+++ b/test/Transforms/InstCombine/and-fcmp.ll
@@ -10,7 +10,7 @@ define zeroext i8 @t1(float %x, float %y) nounwind {
; CHECK: fcmp oeq float %x, %y
; CHECK-NOT: fcmp ueq float %x, %y
; CHECK-NOT: fcmp ord float %x, %y
-; CHECK-NOW: and
+; CHECK-NOT: and
}
define zeroext i8 @t2(float %x, float %y) nounwind {
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 56e5ca3ff720..b4eb69d4363d 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -694,3 +694,209 @@ define i1 @test67(i1 %a, i32 %b) {
; CHECK: @test67
; CHECK: ret i1 false
}
+
+%s = type { i32, i32, i32 }
+
+define %s @test68(%s *%p, i64 %i) {
+; CHECK: @test68
+ %o = mul i64 %i, 12
+ %q = bitcast %s* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr %s*
+ %r = bitcast i8* %pp to %s*
+ %l = load %s* %r
+; CHECK-NEXT: load %s*
+ ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define double @test69(double *%p, i64 %i) {
+; CHECK: @test69
+ %o = shl nsw i64 %i, 3
+ %q = bitcast double* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr inbounds double*
+ %r = bitcast i8* %pp to double*
+ %l = load double* %r
+; CHECK-NEXT: load double*
+ ret double %l
+; CHECK-NEXT: ret double
+}
+
+define %s @test70(%s *%p, i64 %i) {
+; CHECK: @test70
+ %o = mul nsw i64 %i, 36
+; CHECK-NEXT: mul nsw i64 %i, 3
+ %q = bitcast %s* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr inbounds %s*
+ %r = bitcast i8* %pp to %s*
+ %l = load %s* %r
+; CHECK-NEXT: load %s*
+ ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define double @test71(double *%p, i64 %i) {
+; CHECK: @test71
+ %o = shl i64 %i, 5
+; CHECK-NEXT: shl i64 %i, 2
+ %q = bitcast double* %p to i8*
+ %pp = getelementptr i8* %q, i64 %o
+; CHECK-NEXT: getelementptr double*
+ %r = bitcast i8* %pp to double*
+ %l = load double* %r
+; CHECK-NEXT: load double*
+ ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test72(double *%p, i32 %i) {
+; CHECK: @test72
+ %so = mul nsw i32 %i, 8
+ %o = sext i32 %so to i64
+; CHECK-NEXT: sext i32 %i to i64
+ %q = bitcast double* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr inbounds double*
+ %r = bitcast i8* %pp to double*
+ %l = load double* %r
+; CHECK-NEXT: load double*
+ ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test73(double *%p, i128 %i) {
+; CHECK: @test73
+ %lo = mul nsw i128 %i, 8
+ %o = trunc i128 %lo to i64
+; CHECK-NEXT: trunc i128 %i to i64
+ %q = bitcast double* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %o
+; CHECK-NEXT: getelementptr double*
+ %r = bitcast i8* %pp to double*
+ %l = load double* %r
+; CHECK-NEXT: load double*
+ ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test74(double *%p, i64 %i) {
+; CHECK: @test74
+ %q = bitcast double* %p to i64*
+ %pp = getelementptr inbounds i64* %q, i64 %i
+; CHECK-NEXT: getelementptr inbounds double*
+ %r = bitcast i64* %pp to double*
+ %l = load double* %r
+; CHECK-NEXT: load double*
+ ret double %l
+; CHECK-NEXT: ret double
+}
+
+define i32* @test75(i32* %p, i32 %x) {
+; CHECK: @test75
+ %y = shl i32 %x, 3
+; CHECK-NEXT: shl i32 %x, 3
+ %z = sext i32 %y to i64
+; CHECK-NEXT: sext i32 %y to i64
+ %q = bitcast i32* %p to i8*
+ %r = getelementptr i8* %q, i64 %z
+ %s = bitcast i8* %r to i32*
+ ret i32* %s
+}
+
+define %s @test76(%s *%p, i64 %i, i64 %j) {
+; CHECK: @test76
+ %o = mul i64 %i, 12
+ %o2 = mul nsw i64 %o, %j
+; CHECK-NEXT: %o2 = mul i64 %i, %j
+ %q = bitcast %s* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %o2
+; CHECK-NEXT: getelementptr %s* %p, i64 %o2
+ %r = bitcast i8* %pp to %s*
+ %l = load %s* %r
+; CHECK-NEXT: load %s*
+ ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define %s @test77(%s *%p, i64 %i, i64 %j) {
+; CHECK: @test77
+ %o = mul nsw i64 %i, 36
+ %o2 = mul nsw i64 %o, %j
+; CHECK-NEXT: %o = mul nsw i64 %i, 3
+; CHECK-NEXT: %o2 = mul nsw i64 %o, %j
+ %q = bitcast %s* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %o2
+; CHECK-NEXT: getelementptr inbounds %s* %p, i64 %o2
+ %r = bitcast i8* %pp to %s*
+ %l = load %s* %r
+; CHECK-NEXT: load %s*
+ ret %s %l
+; CHECK-NEXT: ret %s
+}
+
+define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) {
+; CHECK: @test78
+ %a = mul nsw i32 %k, 36
+; CHECK-NEXT: mul nsw i32 %k, 3
+ %b = mul nsw i32 %a, %l
+; CHECK-NEXT: mul nsw i32 %a, %l
+ %c = sext i32 %b to i128
+; CHECK-NEXT: sext i32 %b to i128
+ %d = mul nsw i128 %c, %m
+; CHECK-NEXT: mul nsw i128 %c, %m
+ %e = mul i128 %d, %n
+; CHECK-NEXT: mul i128 %d, %n
+ %f = trunc i128 %e to i64
+; CHECK-NEXT: trunc i128 %e to i64
+ %g = mul nsw i64 %f, %i
+; CHECK-NEXT: mul i64 %f, %i
+ %h = mul nsw i64 %g, %j
+; CHECK-NEXT: mul i64 %g, %j
+ %q = bitcast %s* %p to i8*
+ %pp = getelementptr inbounds i8* %q, i64 %h
+; CHECK-NEXT: getelementptr %s* %p, i64 %h
+ %r = bitcast i8* %pp to %s*
+ %load = load %s* %r
+; CHECK-NEXT: load %s*
+ ret %s %load
+; CHECK-NEXT: ret %s
+}
+
+define %s @test79(%s *%p, i64 %i, i32 %j) {
+; CHECK: @test79
+ %a = mul nsw i64 %i, 36
+; CHECK: mul nsw i64 %i, 36
+ %b = trunc i64 %a to i32
+ %c = mul i32 %b, %j
+ %q = bitcast %s* %p to i8*
+; CHECK: bitcast
+ %pp = getelementptr inbounds i8* %q, i32 %c
+ %r = bitcast i8* %pp to %s*
+ %l = load %s* %r
+ ret %s %l
+}
+
+define double @test80([100 x double]* %p, i32 %i) {
+; CHECK: @test80
+ %tmp = mul nsw i32 %i, 8
+; CHECK-NEXT: sext i32 %i to i64
+ %q = bitcast [100 x double]* %p to i8*
+ %pp = getelementptr i8* %q, i32 %tmp
+; CHECK-NEXT: getelementptr [100 x double]*
+ %r = bitcast i8* %pp to double*
+ %l = load double* %r
+; CHECK-NEXT: load double*
+ ret double %l
+; CHECK-NEXT: ret double
+}
+
+define double @test81(double *%p, float %f) {
+ %i = fptosi float %f to i64
+ %q = bitcast double* %p to i8*
+ %pp = getelementptr i8* %q, i64 %i
+ %r = bitcast i8* %pp to double*
+ %l = load double* %r
+ ret double %l
+}
diff --git a/test/Transforms/InstCombine/disable-simplify-libcalls.ll b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
new file mode 100644
index 000000000000..d81e9ae5bd73
--- /dev/null
+++ b/test/Transforms/InstCombine/disable-simplify-libcalls.ll
@@ -0,0 +1,236 @@
+; Test that -disable-simplify-libcalls is wired up correctly.
+;
+; RUN: opt < %s -instcombine -disable-simplify-libcalls -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@.str = constant [1 x i8] zeroinitializer, align 1
+@.str1 = constant [13 x i8] c"hello, world\00", align 1
+@.str2 = constant [4 x i8] c"foo\00", align 1
+@.str3 = constant [4 x i8] c"bar\00", align 1
+@.str4 = constant [6 x i8] c"123.4\00", align 1
+@.str5 = constant [5 x i8] c"1234\00", align 1
+@empty = constant [1 x i8] c"\00", align 1
+
+declare double @ceil(double)
+declare double @copysign(double, double)
+declare double @cos(double)
+declare double @fabs(double)
+declare double @floor(double)
+declare i8* @strcat(i8*, i8*)
+declare i8* @strncat(i8*, i8*, i32)
+declare i8* @strchr(i8*, i32)
+declare i8* @strrchr(i8*, i32)
+declare i32 @strcmp(i8*, i8*)
+declare i32 @strncmp(i8*, i8*, i64)
+declare i8* @strcpy(i8*, i8*)
+declare i8* @stpcpy(i8*, i8*)
+declare i8* @strncpy(i8*, i8*, i64)
+declare i64 @strlen(i8*)
+declare i8* @strpbrk(i8*, i8*)
+declare i64 @strspn(i8*, i8*)
+declare double @strtod(i8*, i8**)
+declare float @strtof(i8*, i8**)
+declare x86_fp80 @strtold(i8*, i8**)
+declare i64 @strtol(i8*, i8**, i32)
+declare i64 @strtoll(i8*, i8**, i32)
+declare i64 @strtoul(i8*, i8**, i32)
+declare i64 @strtoull(i8*, i8**, i32)
+declare i64 @strcspn(i8*, i8*)
+
+define double @t1(double %x) {
+; CHECK: @t1
+ %ret = call double @ceil(double %x)
+ ret double %ret
+; CHECK: call double @ceil
+}
+
+define double @t2(double %x, double %y) {
+; CHECK: @t2
+ %ret = call double @copysign(double %x, double %y)
+ ret double %ret
+; CHECK: call double @copysign
+}
+
+define double @t3(double %x) {
+; CHECK: @t3
+ %call = call double @cos(double %x)
+ ret double %call
+; CHECK: call double @cos
+}
+
+define double @t4(double %x) {
+; CHECK: @t4
+ %ret = call double @fabs(double %x)
+ ret double %ret
+; CHECK: call double @fabs
+}
+
+define double @t5(double %x) {
+; CHECK: @t5
+ %ret = call double @floor(double %x)
+ ret double %ret
+; CHECK: call double @floor
+}
+
+define i8* @t6(i8* %x) {
+; CHECK: @t6
+ %empty = getelementptr [1 x i8]* @empty, i32 0, i32 0
+ %ret = call i8* @strcat(i8* %x, i8* %empty)
+ ret i8* %ret
+; CHECK: call i8* @strcat
+}
+
+define i8* @t7(i8* %x) {
+; CHECK: @t7
+ %empty = getelementptr [1 x i8]* @empty, i32 0, i32 0
+ %ret = call i8* @strncat(i8* %x, i8* %empty, i32 1)
+ ret i8* %ret
+; CHECK: call i8* @strncat
+}
+
+define i8* @t8() {
+; CHECK: @t8
+ %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+ %ret = call i8* @strchr(i8* %x, i32 119)
+ ret i8* %ret
+; CHECK: call i8* @strchr
+}
+
+define i8* @t9() {
+; CHECK: @t9
+ %x = getelementptr inbounds [13 x i8]* @.str1, i32 0, i32 0
+ %ret = call i8* @strrchr(i8* %x, i32 119)
+ ret i8* %ret
+; CHECK: call i8* @strrchr
+}
+
+define i32 @t10() {
+; CHECK: @t10
+ %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
+ %y = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0
+ %ret = call i32 @strcmp(i8* %x, i8* %y)
+ ret i32 %ret
+; CHECK: call i32 @strcmp
+}
+
+define i32 @t11() {
+; CHECK: @t11
+ %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
+ %y = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0
+ %ret = call i32 @strncmp(i8* %x, i8* %y, i64 3)
+ ret i32 %ret
+; CHECK: call i32 @strncmp
+}
+
+define i8* @t12(i8* %x) {
+; CHECK: @t12
+ %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
+ %ret = call i8* @strcpy(i8* %x, i8* %y)
+ ret i8* %ret
+; CHECK: call i8* @strcpy
+}
+
+define i8* @t13(i8* %x) {
+; CHECK: @t13
+ %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
+ %ret = call i8* @stpcpy(i8* %x, i8* %y)
+ ret i8* %ret
+; CHECK: call i8* @stpcpy
+}
+
+define i8* @t14(i8* %x) {
+; CHECK: @t14
+ %y = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
+ %ret = call i8* @strncpy(i8* %x, i8* %y, i64 3)
+ ret i8* %ret
+; CHECK: call i8* @strncpy
+}
+
+define i64 @t15() {
+; CHECK: @t15
+ %x = getelementptr inbounds [4 x i8]* @.str2, i32 0, i32 0
+ %ret = call i64 @strlen(i8* %x)
+ ret i64 %ret
+; CHECK: call i64 @strlen
+}
+
+define i8* @t16(i8* %x) {
+; CHECK: @t16
+ %y = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0
+ %ret = call i8* @strpbrk(i8* %x, i8* %y)
+ ret i8* %ret
+; CHECK: call i8* @strpbrk
+}
+
+define i64 @t17(i8* %x) {
+; CHECK: @t17
+ %y = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0
+ %ret = call i64 @strspn(i8* %x, i8* %y)
+ ret i64 %ret
+; CHECK: call i64 @strspn
+}
+
+define double @t18(i8** %y) {
+; CHECK: @t18
+ %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0
+ %ret = call double @strtod(i8* %x, i8** %y)
+ ret double %ret
+; CHECK: call double @strtod
+}
+
+define float @t19(i8** %y) {
+; CHECK: @t19
+ %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0
+ %ret = call float @strtof(i8* %x, i8** %y)
+ ret float %ret
+; CHECK: call float @strtof
+}
+
+define x86_fp80 @t20(i8** %y) {
+; CHECK: @t20
+ %x = getelementptr inbounds [6 x i8]* @.str4, i64 0, i64 0
+ %ret = call x86_fp80 @strtold(i8* %x, i8** %y)
+ ret x86_fp80 %ret
+; CHECK: call x86_fp80 @strtold
+}
+
+define i64 @t21(i8** %y) {
+; CHECK: @t21
+ %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
+ %ret = call i64 @strtol(i8* %x, i8** %y, i32 10)
+ ret i64 %ret
+; CHECK: call i64 @strtol
+}
+
+define i64 @t22(i8** %y) {
+; CHECK: @t22
+ %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
+ %ret = call i64 @strtoll(i8* %x, i8** %y, i32 10)
+ ret i64 %ret
+; CHECK: call i64 @strtoll
+}
+
+define i64 @t23(i8** %y) {
+; CHECK: @t23
+ %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
+ %ret = call i64 @strtoul(i8* %x, i8** %y, i32 10)
+ ret i64 %ret
+; CHECK: call i64 @strtoul
+}
+
+define i64 @t24(i8** %y) {
+; CHECK: @t24
+ %x = getelementptr inbounds [5 x i8]* @.str5, i64 0, i64 0
+ %ret = call i64 @strtoull(i8* %x, i8** %y, i32 10)
+ ret i64 %ret
+; CHECK: call i64 @strtoull
+}
+
+define i64 @t25(i8* %y) {
+; CHECK: @t25
+ %x = getelementptr [1 x i8]* @empty, i32 0, i32 0
+ %ret = call i64 @strcspn(i8* %x, i8* %y)
+ ret i64 %ret
+; CHECK: call i64 @strcspn
+}
diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll
index a07f3ea94914..e0372ebac184 100644
--- a/test/Transforms/InstCombine/div-shift.ll
+++ b/test/Transforms/InstCombine/div-shift.ll
@@ -21,3 +21,17 @@ define i64 @t2(i64 %x, i32 %y) nounwind {
%3 = udiv i64 %x, %2
ret i64 %3
}
+
+; PR13250
+define i64 @t3(i64 %x, i32 %y) nounwind {
+; CHECK: t3
+; CHECK-NOT: udiv
+; CHECK-NEXT: %1 = add i32 %y, 2
+; CHECK-NEXT: %2 = zext i32 %1 to i64
+; CHECK-NEXT: %3 = lshr i64 %x, %2
+; CHECK-NEXT: ret i64 %3
+ %1 = shl i32 4, %y
+ %2 = zext i32 %1 to i64
+ %3 = udiv i64 %x, %2
+ ret i64 %3
+}
diff --git a/test/Transforms/InstCombine/fcmp.ll b/test/Transforms/InstCombine/fcmp.ll
index d08cbf574a23..376fa079d24c 100644
--- a/test/Transforms/InstCombine/fcmp.ll
+++ b/test/Transforms/InstCombine/fcmp.ll
@@ -54,9 +54,8 @@ define i1 @test7(float %x) nounwind readnone ssp noredzone {
%ext = fpext float %x to ppc_fp128
%cmp = fcmp ogt ppc_fp128 %ext, 0xM00000000000000000000000000000000
ret i1 %cmp
-; Can't convert ppc_fp128
; CHECK: @test7
-; CHECK-NEXT: fpext float %x to ppc_fp128
+; CHECK-NEXT: fcmp ogt float %x, 0.000000e+00
}
define float @test8(float %x) nounwind readnone optsize ssp {
@@ -69,3 +68,93 @@ define float @test8(float %x) nounwind readnone optsize ssp {
; CHECK: @test8
; CHECK-NEXT: fcmp olt float %x, 0.000000e+00
}
+
+declare double @fabs(double) nounwind readnone
+
+define i32 @test9(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp olt double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test9
+; CHECK-NOT: fabs
+; CHECK: ret i32 0
+}
+
+define i32 @test10(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp ole double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test10
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
+define i32 @test11(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp ogt double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test11
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
+define i32 @test12(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp oge double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test12
+; CHECK-NOT: fabs
+; CHECK: fcmp ord double %a, 0.000000e+00
+}
+
+define i32 @test13(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp une double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test13
+; CHECK-NOT: fabs
+; CHECK: fcmp une double %a, 0.000000e+00
+}
+
+define i32 @test14(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp oeq double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test14
+; CHECK-NOT: fabs
+; CHECK: fcmp oeq double %a, 0.000000e+00
+}
+
+define i32 @test15(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp one double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test15
+; CHECK-NOT: fabs
+; CHECK: fcmp one double %a, 0.000000e+00
+}
+
+define i32 @test16(double %a) nounwind {
+ %call = tail call double @fabs(double %a) nounwind
+ %cmp = fcmp ueq double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+; CHECK: @test16
+; CHECK-NOT: fabs
+; CHECK: fcmp ueq double %a, 0.000000e+00
+}
+
+; Don't crash.
+define i32 @test17(double %a, double (double)* %p) nounwind {
+ %call = tail call double %p(double %a) nounwind
+ %cmp = fcmp ueq double %call, 0.000000e+00
+ %conv = zext i1 %cmp to i32
+ ret i32 %conv
+}
diff --git a/test/Transforms/InstCombine/fold-vector-select.ll b/test/Transforms/InstCombine/fold-vector-select.ll
index 3f22522a6ce4..2cb970bf4177 100644
--- a/test/Transforms/InstCombine/fold-vector-select.ll
+++ b/test/Transforms/InstCombine/fold-vector-select.ll
@@ -1,13 +1,148 @@
; RUN: opt < %s -instcombine -S | not grep select
-define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D) {
- %r = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 1, i32 2, i32 3, i32 4>, <4 x i32> zeroinitializer
- %g = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 6, i32 9, i32 1>
- %b = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 7, i32 1, i32 4, i32 9>
- %a = select <4 x i1> zeroinitializer, <4 x i32> zeroinitializer, <4 x i32> <i32 3, i32 2, i32 8, i32 5>
- store <4 x i32> %r, <4 x i32>* %A
- store <4 x i32> %g, <4 x i32>* %B
- store <4 x i32> %b, <4 x i32>* %C
- store <4 x i32> %a, <4 x i32>* %D
+define void @foo(<4 x i32> *%A, <4 x i32> *%B, <4 x i32> *%C, <4 x i32> *%D,
+ <4 x i32> *%E, <4 x i32> *%F, <4 x i32> *%G, <4 x i32> *%H,
+ <4 x i32> *%I, <4 x i32> *%J, <4 x i32> *%K, <4 x i32> *%L,
+ <4 x i32> *%M, <4 x i32> *%N, <4 x i32> *%O, <4 x i32> *%P,
+ <4 x i32> *%Q, <4 x i32> *%R, <4 x i32> *%S, <4 x i32> *%T,
+ <4 x i32> *%U, <4 x i32> *%V, <4 x i32> *%W, <4 x i32> *%X,
+ <4 x i32> *%Y, <4 x i32> *%Z, <4 x i32> *%BA, <4 x i32> *%BB,
+ <4 x i32> *%BC, <4 x i32> *%BD, <4 x i32> *%BE, <4 x i32> *%BF,
+ <4 x i32> *%BG, <4 x i32> *%BH, <4 x i32> *%BI, <4 x i32> *%BJ,
+ <4 x i32> *%BK, <4 x i32> *%BL, <4 x i32> *%BM, <4 x i32> *%BN,
+ <4 x i32> *%BO, <4 x i32> *%BP, <4 x i32> *%BQ, <4 x i32> *%BR,
+ <4 x i32> *%BS, <4 x i32> *%BT, <4 x i32> *%BU, <4 x i32> *%BV,
+ <4 x i32> *%BW, <4 x i32> *%BX, <4 x i32> *%BY, <4 x i32> *%BZ,
+ <4 x i32> *%CA, <4 x i32> *%CB, <4 x i32> *%CC, <4 x i32> *%CD,
+ <4 x i32> *%CE, <4 x i32> *%CF, <4 x i32> *%CG, <4 x i32> *%CH,
+ <4 x i32> *%CI, <4 x i32> *%CJ, <4 x i32> *%CK, <4 x i32> *%CL) {
+ %a = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 9, i32 87, i32 57, i32 8>
+ %b = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 99, i32 49, i32 29>
+ %c = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 18, i32 53, i32 84>
+ %d = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 29, i32 82, i32 45, i32 16>
+ %e = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 11, i32 15, i32 32, i32 99>
+ %f = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 19, i32 86, i32 29, i32 33>
+ %g = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 44, i32 10, i32 26, i32 45>
+ %h = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> <i32 88, i32 70, i32 90, i32 48>
+ %i = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 30, i32 53, i32 42, i32 12>
+ %j = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 46, i32 24, i32 93, i32 26>
+ %k = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 33, i32 99, i32 15, i32 57>
+ %l = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 51, i32 60, i32 60, i32 50>
+ %m = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 50, i32 12, i32 7, i32 45>
+ %n = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 15, i32 65, i32 36, i32 36>
+ %o = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 54, i32 0, i32 17, i32 78>
+ %p = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> <i32 56, i32 13, i32 64, i32 48>
+ %q = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 52, i32 69, i32 88, i32 11>, <4 x i32> zeroinitializer
+ %r = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 5, i32 87, i32 68, i32 14>, <4 x i32> zeroinitializer
+ %s = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 47, i32 17, i32 66, i32 63>, <4 x i32> zeroinitializer
+ %t = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 64, i32 25, i32 73, i32 81>, <4 x i32> zeroinitializer
+ %u = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 51, i32 41, i32 61, i32 63>, <4 x i32> zeroinitializer
+ %v = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 39, i32 59, i32 17, i32 0>, <4 x i32> zeroinitializer
+ %w = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 91, i32 99, i32 97, i32 29>, <4 x i32> zeroinitializer
+ %x = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 89, i32 45, i32 89, i32 10>, <4 x i32> zeroinitializer
+ %y = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 25, i32 70, i32 21, i32 27>, <4 x i32> zeroinitializer
+ %z = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 40, i32 12, i32 27, i32 88>, <4 x i32> zeroinitializer
+ %ba = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 36, i32 35, i32 90, i32 23>, <4 x i32> zeroinitializer
+ %bb = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 83, i32 3, i32 64, i32 82>, <4 x i32> zeroinitializer
+ %bc = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 15, i32 72, i32 2, i32 54>, <4 x i32> zeroinitializer
+ %bd = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 32, i32 47, i32 100, i32 84>, <4 x i32> zeroinitializer
+ %be = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 92, i32 57, i32 82, i32 1>, <4 x i32> zeroinitializer
+ %bf = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 42, i32 14, i32 22, i32 89>, <4 x i32> zeroinitializer
+ %bg = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> <i32 33, i32 10, i32 67, i32 66>, <4 x i32> <i32 42, i32 91, i32 47, i32 40>
+ %bh = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> <i32 8, i32 13, i32 48, i32 0>, <4 x i32> <i32 84, i32 66, i32 87, i32 84>
+ %bi = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> <i32 85, i32 96, i32 1, i32 94>, <4 x i32> <i32 54, i32 57, i32 7, i32 92>
+ %bj = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> <i32 55, i32 21, i32 92, i32 68>, <4 x i32> <i32 51, i32 61, i32 62, i32 39>
+ %bk = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> <i32 42, i32 18, i32 77, i32 74>, <4 x i32> <i32 82, i32 33, i32 30, i32 7>
+ %bl = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> <i32 80, i32 92, i32 61, i32 84>, <4 x i32> <i32 43, i32 89, i32 92, i32 6>
+ %bm = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> <i32 49, i32 14, i32 62, i32 62>, <4 x i32> <i32 35, i32 33, i32 92, i32 59>
+ %bn = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> <i32 3, i32 97, i32 49, i32 18>, <4 x i32> <i32 56, i32 64, i32 19, i32 75>
+ %bo = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> <i32 91, i32 57, i32 0, i32 1>, <4 x i32> <i32 43, i32 63, i32 64, i32 11>
+ %bp = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> <i32 41, i32 65, i32 18, i32 11>, <4 x i32> <i32 86, i32 26, i32 31, i32 3>
+ %bq = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> <i32 31, i32 46, i32 32, i32 68>, <4 x i32> <i32 100, i32 59, i32 62, i32 6>
+ %br = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> <i32 76, i32 67, i32 87, i32 7>, <4 x i32> <i32 63, i32 48, i32 97, i32 24>
+ %bs = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> <i32 83, i32 89, i32 19, i32 4>, <4 x i32> <i32 21, i32 2, i32 40, i32 21>
+ %bt = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> <i32 45, i32 76, i32 81, i32 100>, <4 x i32> <i32 65, i32 26, i32 100, i32 46>
+ %bu = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> <i32 16, i32 75, i32 31, i32 17>, <4 x i32> <i32 37, i32 66, i32 86, i32 65>
+ %bv = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> <i32 13, i32 25, i32 43, i32 59>, <4 x i32> <i32 82, i32 78, i32 60, i32 52>
+ %bw = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %bx = select <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %by = select <4 x i1> <i1 false, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %bz = select <4 x i1> <i1 true, i1 true, i1 false, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ca = select <4 x i1> <i1 false, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cb = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cc = select <4 x i1> <i1 false, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cd = select <4 x i1> <i1 true, i1 true, i1 true, i1 false>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ce = select <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cf = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cg = select <4 x i1> <i1 false, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ch = select <4 x i1> <i1 true, i1 true, i1 false, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ci = select <4 x i1> <i1 false, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cj = select <4 x i1> <i1 true, i1 false, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %ck = select <4 x i1> <i1 false, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ %cl = select <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x i32> zeroinitializer, <4 x i32> zeroinitializer
+ store <4 x i32> %a, <4 x i32>* %A
+ store <4 x i32> %b, <4 x i32>* %B
+ store <4 x i32> %c, <4 x i32>* %C
+ store <4 x i32> %d, <4 x i32>* %D
+ store <4 x i32> %e, <4 x i32>* %E
+ store <4 x i32> %f, <4 x i32>* %F
+ store <4 x i32> %g, <4 x i32>* %G
+ store <4 x i32> %h, <4 x i32>* %H
+ store <4 x i32> %i, <4 x i32>* %I
+ store <4 x i32> %j, <4 x i32>* %J
+ store <4 x i32> %k, <4 x i32>* %K
+ store <4 x i32> %l, <4 x i32>* %L
+ store <4 x i32> %m, <4 x i32>* %M
+ store <4 x i32> %n, <4 x i32>* %N
+ store <4 x i32> %o, <4 x i32>* %O
+ store <4 x i32> %p, <4 x i32>* %P
+ store <4 x i32> %q, <4 x i32>* %Q
+ store <4 x i32> %r, <4 x i32>* %R
+ store <4 x i32> %s, <4 x i32>* %S
+ store <4 x i32> %t, <4 x i32>* %T
+ store <4 x i32> %u, <4 x i32>* %U
+ store <4 x i32> %v, <4 x i32>* %V
+ store <4 x i32> %w, <4 x i32>* %W
+ store <4 x i32> %x, <4 x i32>* %X
+ store <4 x i32> %y, <4 x i32>* %Y
+ store <4 x i32> %z, <4 x i32>* %Z
+ store <4 x i32> %ba, <4 x i32>* %BA
+ store <4 x i32> %bb, <4 x i32>* %BB
+ store <4 x i32> %bc, <4 x i32>* %BC
+ store <4 x i32> %bd, <4 x i32>* %BD
+ store <4 x i32> %be, <4 x i32>* %BE
+ store <4 x i32> %bf, <4 x i32>* %BF
+ store <4 x i32> %bg, <4 x i32>* %BG
+ store <4 x i32> %bh, <4 x i32>* %BH
+ store <4 x i32> %bi, <4 x i32>* %BI
+ store <4 x i32> %bj, <4 x i32>* %BJ
+ store <4 x i32> %bk, <4 x i32>* %BK
+ store <4 x i32> %bl, <4 x i32>* %BL
+ store <4 x i32> %bm, <4 x i32>* %BM
+ store <4 x i32> %bn, <4 x i32>* %BN
+ store <4 x i32> %bo, <4 x i32>* %BO
+ store <4 x i32> %bp, <4 x i32>* %BP
+ store <4 x i32> %bq, <4 x i32>* %BQ
+ store <4 x i32> %br, <4 x i32>* %BR
+ store <4 x i32> %bs, <4 x i32>* %BS
+ store <4 x i32> %bt, <4 x i32>* %BT
+ store <4 x i32> %bu, <4 x i32>* %BU
+ store <4 x i32> %bv, <4 x i32>* %BV
+ store <4 x i32> %bw, <4 x i32>* %BW
+ store <4 x i32> %bx, <4 x i32>* %BX
+ store <4 x i32> %by, <4 x i32>* %BY
+ store <4 x i32> %bz, <4 x i32>* %BZ
+ store <4 x i32> %ca, <4 x i32>* %CA
+ store <4 x i32> %cb, <4 x i32>* %CB
+ store <4 x i32> %cc, <4 x i32>* %CC
+ store <4 x i32> %cd, <4 x i32>* %CD
+ store <4 x i32> %ce, <4 x i32>* %CE
+ store <4 x i32> %cf, <4 x i32>* %CF
+ store <4 x i32> %cg, <4 x i32>* %CG
+ store <4 x i32> %ch, <4 x i32>* %CH
+ store <4 x i32> %ci, <4 x i32>* %CI
+ store <4 x i32> %cj, <4 x i32>* %CJ
+ store <4 x i32> %ck, <4 x i32>* %CK
+ store <4 x i32> %cl, <4 x i32>* %CL
ret void
}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index eaff87d695ed..8e064a4f2fc9 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -659,3 +659,21 @@ define i1 @test64(i8 %a, i32 %b) nounwind {
; CHECK-NEXT: %c = icmp eq i8 %1, %a
; CHECK-NEXT: ret i1 %c
}
+
+define i1 @test65(i64 %A, i64 %B) {
+ %s1 = add i64 %A, %B
+ %s2 = add i64 %A, %B
+ %cmp = icmp eq i64 %s1, %s2
+; CHECK: @test65
+; CHECK-NEXT: ret i1 true
+ ret i1 %cmp
+}
+
+define i1 @test66(i64 %A, i64 %B) {
+ %s1 = add i64 %A, %B
+ %s2 = add i64 %B, %A
+ %cmp = icmp eq i64 %s1, %s2
+; CHECK: @test66
+; CHECK-NEXT: ret i1 true
+ ret i1 %cmp
+}
diff --git a/test/Transforms/InstCombine/memcmp-1.ll b/test/Transforms/InstCombine/memcmp-1.ll
new file mode 100644
index 000000000000..4238c5f8fb15
--- /dev/null
+++ b/test/Transforms/InstCombine/memcmp-1.ll
@@ -0,0 +1,72 @@
+; Test that the memcmp library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@foo = constant [4 x i8] c"foo\00"
+@hel = constant [4 x i8] c"hel\00"
+@hello_u = constant [8 x i8] c"hello_u\00"
+
+declare i32 @memcmp(i8*, i8*, i32)
+
+; Check memcmp(mem, mem, size) -> 0.
+
+define i32 @test_simplify1(i8* %mem, i32 %size) {
+; CHECK: @test_simplify1
+ %ret = call i32 @memcmp(i8* %mem, i8* %mem, i32 %size)
+ ret i32 %ret
+; CHECK: ret i32 0
+}
+
+; Check memcmp(mem1, mem2, 0) -> 0.
+
+define i32 @test_simplify2(i8* %mem1, i8* %mem2) {
+; CHECK: @test_simplify2
+ %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 0)
+ ret i32 %ret
+; CHECK: ret i32 0
+}
+
+;; Check memcmp(mem1, mem2, 1) -> *(unsigned char*)mem1 - *(unsigned char*)mem2.
+
+define i32 @test_simplify3(i8* %mem1, i8* %mem2) {
+; CHECK: @test_simplify3
+ %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 1)
+; CHECK: [[LOAD1:%[a-z]+]] = load i8* %mem1, align 1
+; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
+; CHECK: [[LOAD2:%[a-z]+]] = load i8* %mem2, align 1
+; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
+; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+ ret i32 %ret
+; CHECK: ret i32 [[RET]]
+}
+
+; Check memcmp(mem1, mem2, size) -> cnst, where all arguments are constants.
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+ %mem1 = getelementptr [4 x i8]* @hel, i32 0, i32 0
+ %mem2 = getelementptr [8 x i8]* @hello_u, i32 0, i32 0
+ %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
+ ret i32 %ret
+; CHECK: ret i32 0
+}
+
+define i32 @test_simplify5() {
+; CHECK: @test_simplify5
+ %mem1 = getelementptr [4 x i8]* @hel, i32 0, i32 0
+ %mem2 = getelementptr [4 x i8]* @foo, i32 0, i32 0
+ %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
+ ret i32 %ret
+; CHECK: ret i32 {{[0-9]+}}
+}
+
+define i32 @test_simplify6() {
+; CHECK: @test_simplify6
+ %mem1 = getelementptr [4 x i8]* @foo, i32 0, i32 0
+ %mem2 = getelementptr [4 x i8]* @hel, i32 0, i32 0
+ %ret = call i32 @memcmp(i8* %mem1, i8* %mem2, i32 3)
+ ret i32 %ret
+; CHECK: ret i32 {{-[0-9]+}}
+}
diff --git a/test/Transforms/InstCombine/memcmp-2.ll b/test/Transforms/InstCombine/memcmp-2.ll
new file mode 100644
index 000000000000..3796117bc24c
--- /dev/null
+++ b/test/Transforms/InstCombine/memcmp-2.ll
@@ -0,0 +1,17 @@
+; Test that the memcmp library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i32* @memcmp(i8*, i8*, i32)
+
+; Check that memcmp functions with the wrong prototype aren't simplified.
+
+define i32* @test_no_simplify1(i8* %mem, i32 %size) {
+; CHECK: @test_no_simplify1
+ %ret = call i32* @memcmp(i8* %mem, i8* %mem, i32 %size)
+; CHECK-NEXT: call i32* @memcmp
+ ret i32* %ret
+; CHECK-NEXT: ret i32* %ret
+}
diff --git a/test/Transforms/InstCombine/memcpy-1.ll b/test/Transforms/InstCombine/memcpy-1.ll
new file mode 100644
index 000000000000..65b79ad03df4
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy-1.ll
@@ -0,0 +1,17 @@
+; Test that the memcpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8* @memcpy(i8*, i8*, i32)
+
+; Check memcpy(mem1, mem2, size) -> llvm.memcpy(mem1, mem2, size, 1).
+
+define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK: @test_simplify1
+ %ret = call i8* @memcpy(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK: call void @llvm.memcpy
+ ret i8* %ret
+; CHECK: ret i8* %mem1
+}
diff --git a/test/Transforms/InstCombine/memcpy-2.ll b/test/Transforms/InstCombine/memcpy-2.ll
new file mode 100644
index 000000000000..4a8a02018f5e
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy-2.ll
@@ -0,0 +1,17 @@
+; Test that the memcpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8 @memcpy(i8*, i8*, i32)
+
+; Check that memcpy functions with the wrong prototype aren't simplified.
+
+define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK: @test_no_simplify1
+ %ret = call i8 @memcpy(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK: call i8 @memcpy
+ ret i8 %ret
+; CHECK: ret i8 %ret
+}
diff --git a/test/Transforms/ScalarRepl/memcpy-from-global.ll b/test/Transforms/InstCombine/memcpy-from-global.ll
index 5557a8fd8754..83c893e17dd6 100644
--- a/test/Transforms/ScalarRepl/memcpy-from-global.ll
+++ b/test/Transforms/InstCombine/memcpy-from-global.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
@C.0.1248 = internal constant [128 x float] [ float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float -1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float -1.000000e+00, float 1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float -1.000000e+00, float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00 ], align 32 ; <[128 x float]*> [#uses=1]
@@ -6,13 +6,11 @@ define float @test1(i32 %hash, float %x, float %y, float %z, float %w) {
entry:
%lookupTable = alloca [128 x float], align 16 ; <[128 x float]*> [#uses=5]
%lookupTable1 = bitcast [128 x float]* %lookupTable to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.i32( i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i32 512, i32 16 )
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %lookupTable1, i8* bitcast ([128 x float]* @C.0.1248 to i8*), i64 512, i32 16, i1 false)
; CHECK: @test1
; CHECK-NOT: alloca
; CHECK-NOT: call{{.*}}@llvm.memcpy
-; CHECK: %lookupTable1 = bitcast [128 x float]* @C.0.1248 to i8*
-; CHECK-NOT: call{{.*}}@llvm.memcpy
%tmp3 = shl i32 %hash, 2 ; <i32> [#uses=1]
%tmp5 = and i32 %tmp3, 124 ; <i32> [#uses=4]
@@ -38,10 +36,6 @@ entry:
ret float %tmp43
}
-declare void @llvm.memcpy.i32(i8*, i8*, i32, i32)
-
-
-
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
%T = type { i8, [123 x i8] }
@@ -59,10 +53,11 @@ define void @test2() {
; CHECK: @test2
; %A alloca is deleted
-; CHECK-NEXT: %B = alloca %T
+; CHECK-NEXT: alloca [124 x i8]
+; CHECK-NEXT: getelementptr inbounds [124 x i8]*
; use @G instead of %A
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* %{{.*}}, i8* getelementptr inbounds (%T* @G, i64 0, i32 0)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %b, i8* %a, i64 124, i32 4, i1 false)
call void @bar(i8* %b)
@@ -79,8 +74,7 @@ define void @test3() {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
call void @bar(i8* %a) readonly
; CHECK: @test3
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
-; CHECK-NEXT: call void @bar(i8* %a)
+; CHECK-NEXT: call void @bar(i8* getelementptr inbounds (%T* @G, i64 0, i32 0))
ret void
}
@@ -90,8 +84,7 @@ define void @test4() {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
call void @baz(i8* byval %a)
; CHECK: @test4
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
-; CHECK-NEXT: call void @baz(i8* byval %a)
+; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
ret void
}
@@ -103,8 +96,7 @@ define void @test5() {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%T* @G to i8*), i64 124, i32 4, i1 false)
call void @baz(i8* byval %a)
; CHECK: @test5
-; CHECK-NEXT: %a = bitcast %T* @G to i8*
-; CHECK-NEXT: call void @baz(i8* byval %a)
+; CHECK-NEXT: call void @baz(i8* byval getelementptr inbounds (%T* @G, i64 0, i32 0))
ret void
}
@@ -118,8 +110,7 @@ define void @test6() {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast ([2 x %U]* @H to i8*), i64 20, i32 16, i1 false)
call void @bar(i8* %a) readonly
; CHECK: @test6
-; CHECK-NEXT: %a = bitcast
-; CHECK-NEXT: call void @bar(i8* %a)
+; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
ret void
}
@@ -129,8 +120,7 @@ define void @test7() {
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a, i8* bitcast (%U* getelementptr ([2 x %U]* @H, i64 0, i32 0) to i8*), i64 20, i32 4, i1 false)
call void @bar(i8* %a) readonly
; CHECK: @test7
-; CHECK-NEXT: %a = bitcast
-; CHECK-NEXT: call void @bar(i8* %a)
+; CHECK-NEXT: call void @bar(i8* bitcast ([2 x %U]* @H to i8*))
ret void
}
diff --git a/test/Transforms/InstCombine/memcpy_chk-1.ll b/test/Transforms/InstCombine/memcpy_chk-1.ll
new file mode 100644
index 000000000000..7c7d91808a37
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy_chk-1.ll
@@ -0,0 +1,60 @@
+; Test lib call simplification of __memcpy_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+@t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T3* @t3 to i8*
+ %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+ ret void
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64, i64)
diff --git a/test/Transforms/InstCombine/memcpy_chk-2.ll b/test/Transforms/InstCombine/memcpy_chk-2.ll
new file mode 100644
index 000000000000..aa43029d47fc
--- /dev/null
+++ b/test/Transforms/InstCombine/memcpy_chk-2.ll
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memcpy_chk
+ call i8* @__memcpy_chk(i8* %dst, i8* %src, i64 1824)
+ ret void
+}
+
+declare i8* @__memcpy_chk(i8*, i8*, i64)
diff --git a/test/Transforms/InstCombine/memmove-1.ll b/test/Transforms/InstCombine/memmove-1.ll
new file mode 100644
index 000000000000..53f2f116c777
--- /dev/null
+++ b/test/Transforms/InstCombine/memmove-1.ll
@@ -0,0 +1,17 @@
+; Test that the memmove library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8* @memmove(i8*, i8*, i32)
+
+; Check memmove(mem1, mem2, size) -> llvm.memmove(mem1, mem2, size, 1).
+
+define i8* @test_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK: @test_simplify1
+ %ret = call i8* @memmove(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK: call void @llvm.memmove
+ ret i8* %ret
+; CHECK: ret i8* %mem1
+}
diff --git a/test/Transforms/InstCombine/memmove-2.ll b/test/Transforms/InstCombine/memmove-2.ll
new file mode 100644
index 000000000000..23887bce31d8
--- /dev/null
+++ b/test/Transforms/InstCombine/memmove-2.ll
@@ -0,0 +1,17 @@
+; Test that the memmove library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8 @memmove(i8*, i8*, i32)
+
+; Check that memmove functions with the wrong prototype aren't simplified.
+
+define i8 @test_no_simplify1(i8* %mem1, i8* %mem2, i32 %size) {
+; CHECK: @test_no_simplify1
+ %ret = call i8 @memmove(i8* %mem1, i8* %mem2, i32 %size)
+; CHECK: call i8 @memmove
+ ret i8 %ret
+; CHECK: ret i8 %ret
+}
diff --git a/test/Transforms/InstCombine/memmove_chk-1.ll b/test/Transforms/InstCombine/memmove_chk-1.ll
new file mode 100644
index 000000000000..f9ff9a103a30
--- /dev/null
+++ b/test/Transforms/InstCombine/memmove_chk-1.ll
@@ -0,0 +1,60 @@
+; Test lib call simplification of __memmove_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T3 = type { [100 x i32], [100 x i32], [2048 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+@t3 = common global %struct.T3 zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T3* @t3 to i8*
+
+; CHECK-NEXT: call void @llvm.memmove.p0i8.p0i8.i64
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824, i64 2848)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T3* @t3 to i8*
+ %src = bitcast %struct.T1* @t1 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 2848, i64 1824)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1024, i64 0)
+ ret void
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64, i64)
diff --git a/test/Transforms/InstCombine/memmove_chk-2.ll b/test/Transforms/InstCombine/memmove_chk-2.ll
new file mode 100644
index 000000000000..f0a915fde2e9
--- /dev/null
+++ b/test/Transforms/InstCombine/memmove_chk-2.ll
@@ -0,0 +1,24 @@
+; Test that lib call simplification doesn't simplify __memmove_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T1 = type { [100 x i32], [100 x i32], [1024 x i8] }
+%struct.T2 = type { [100 x i32], [100 x i32], [1024 x i8] }
+
+@t1 = common global %struct.T1 zeroinitializer
+@t2 = common global %struct.T2 zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T1* @t1 to i8*
+ %src = bitcast %struct.T2* @t2 to i8*
+
+; CHECK-NEXT: call i8* @__memmove_chk
+ call i8* @__memmove_chk(i8* %dst, i8* %src, i64 1824)
+ ret void
+}
+
+declare i8* @__memmove_chk(i8*, i8*, i64)
diff --git a/test/Transforms/InstCombine/memset-1.ll b/test/Transforms/InstCombine/memset-1.ll
new file mode 100644
index 000000000000..48b433e137c0
--- /dev/null
+++ b/test/Transforms/InstCombine/memset-1.ll
@@ -0,0 +1,17 @@
+; Test that the memset library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8* @memset(i8*, i32, i32)
+
+; Check memset(mem1, val, size) -> llvm.memset(mem1, val, size, 1).
+
+define i8* @test_simplify1(i8* %mem, i32 %val, i32 %size) {
+; CHECK: @test_simplify1
+ %ret = call i8* @memset(i8* %mem, i32 %val, i32 %size)
+; CHECK: call void @llvm.memset
+ ret i8* %ret
+; CHECK: ret i8* %mem
+}
diff --git a/test/Transforms/InstCombine/memset-2.ll b/test/Transforms/InstCombine/memset-2.ll
new file mode 100644
index 000000000000..8a9033302d04
--- /dev/null
+++ b/test/Transforms/InstCombine/memset-2.ll
@@ -0,0 +1,17 @@
+; Test that the memset library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i8 @memset(i8*, i32, i32)
+
+; Check that memset functions with the wrong prototype aren't simplified.
+
+define i8 @test_no_simplify1(i8* %mem, i32 %val, i32 %size) {
+; CHECK: @test_no_simplify1
+ %ret = call i8 @memset(i8* %mem, i32 %val, i32 %size)
+; CHECK: call i8 @memset
+ ret i8 %ret
+; CHECK: ret i8 %ret
+}
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
new file mode 100644
index 000000000000..be4c1cfccdb2
--- /dev/null
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -0,0 +1,61 @@
+; Test lib call simplification of __memset_chk calls with various values
+; for dstlen and len.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; rdar://7719085
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+@t = common global %struct.T zeroinitializer
+
+; Check cases where dstlen >= len.
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 1824)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 3648)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 -1)
+ ret void
+}
+
+; Check cases where dstlen < len.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 400)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824, i64 0)
+ ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64, i64)
diff --git a/test/Transforms/InstCombine/memset_chk-2.ll b/test/Transforms/InstCombine/memset_chk-2.ll
new file mode 100644
index 000000000000..60fbf163c212
--- /dev/null
+++ b/test/Transforms/InstCombine/memset_chk-2.ll
@@ -0,0 +1,20 @@
+; Test that lib call simplification doesn't simplify __memset_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+%struct.T = type { [100 x i32], [100 x i32], [1024 x i8] }
+@t = common global %struct.T zeroinitializer
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = bitcast %struct.T* @t to i8*
+
+; CHECK-NEXT: call i8* @__memset_chk
+ call i8* @__memset_chk(i8* %dst, i32 0, i64 1824)
+ ret void
+}
+
+declare i8* @__memset_chk(i8*, i32, i64)
diff --git a/test/Transforms/InstCombine/memset_chk.ll b/test/Transforms/InstCombine/memset_chk.ll
deleted file mode 100644
index 58ecda582fd1..000000000000
--- a/test/Transforms/InstCombine/memset_chk.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; rdar://7719085
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-%struct.data = type { [100 x i32], [100 x i32], [1024 x i8] }
-
-define i32 @t() nounwind ssp {
-; CHECK: @t
-; CHECK: @llvm.memset.p0i8.i64
-entry:
- %0 = alloca %struct.data, align 8 ; <%struct.data*> [#uses=1]
- %1 = bitcast %struct.data* %0 to i8* ; <i8*> [#uses=1]
- %2 = call i8* @__memset_chk(i8* %1, i32 0, i64 1824, i64 1824) nounwind ; <i8*> [#uses=0]
- ret i32 0
-}
-
-declare i8* @__memset_chk(i8*, i32, i64, i64) nounwind
diff --git a/test/Transforms/InstCombine/obfuscated_splat.ll b/test/Transforms/InstCombine/obfuscated_splat.ll
new file mode 100644
index 000000000000..c25dade168a4
--- /dev/null
+++ b/test/Transforms/InstCombine/obfuscated_splat.ll
@@ -0,0 +1,11 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+
+define void @test(<4 x float> *%in_ptr, <4 x float> *%out_ptr) {
+ %A = load <4 x float>* %in_ptr, align 16
+ %B = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 undef, i32 undef>
+ %C = shufflevector <4 x float> %B, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 4, i32 undef>
+ %D = shufflevector <4 x float> %C, <4 x float> %A, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
+; CHECK: %D = shufflevector <4 x float> %A, <4 x float> undef, <4 x i32> zeroinitializer
+ store <4 x float> %D, <4 x float> *%out_ptr
+ ret void
+}
diff --git a/test/Transforms/InstCombine/objsize.ll b/test/Transforms/InstCombine/objsize.ll
index d7e292155cd7..31a3cb46e459 100644
--- a/test/Transforms/InstCombine/objsize.ll
+++ b/test/Transforms/InstCombine/objsize.ll
@@ -247,7 +247,8 @@ entry:
; technically reachable, but this malformed IR may appear as a result of constant propagation
xpto:
- %gep = getelementptr i8* %gep, i32 1
+ %gep2 = getelementptr i8* %gep, i32 1
+ %gep = getelementptr i8* %gep2, i32 1
%o = call i32 @llvm.objectsize.i32(i8* %gep, i1 true)
; CHECK: ret i32 undef
ret i32 %o
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index 4baae2618dde..cc3aacdce3c8 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -829,3 +829,37 @@ define i1 @test63(i1 %A, i1 %B) {
; CHECK: %C = or i1 %B, %not
; CHECK: ret i1 %C
}
+
+; PR14131
+define void @test64(i32 %p, i16 %b) noreturn nounwind {
+entry:
+ %p.addr.0.insert.mask = and i32 %p, -65536
+ %conv2 = and i32 %p, 65535
+ br i1 undef, label %lor.rhs, label %lor.end
+
+lor.rhs:
+ %p.addr.0.extract.trunc = trunc i32 %p.addr.0.insert.mask to i16
+ %phitmp = zext i16 %p.addr.0.extract.trunc to i32
+ br label %lor.end
+
+lor.end:
+ %t.1 = phi i32 [ 0, %entry ], [ %phitmp, %lor.rhs ]
+ %conv6 = zext i16 %b to i32
+ %div = udiv i32 %conv6, %t.1
+ %tobool8 = icmp eq i32 %div, 0
+ %cmp = icmp eq i32 %t.1, 0
+ %cmp12 = icmp ult i32 %conv2, 2
+ %cmp.sink = select i1 %tobool8, i1 %cmp12, i1 %cmp
+ br i1 %cmp.sink, label %cond.end17, label %cond.false16
+
+cond.false16:
+ br label %cond.end17
+
+cond.end17:
+ br label %while.body
+
+while.body:
+ br label %while.body
+; CHECK: @test64
+; CHECK-NOT: select
+}
diff --git a/test/Transforms/InstCombine/stpcpy-1.ll b/test/Transforms/InstCombine/stpcpy-1.ll
new file mode 100644
index 000000000000..8b6bb0e0d509
--- /dev/null
+++ b/test/Transforms/InstCombine/stpcpy-1.ll
@@ -0,0 +1,46 @@
+; Test that the stpcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @stpcpy(i8*, i8*)
+
+define i8* @test_simplify1() {
+; CHECK: @test_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ %ret = call i8* @stpcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+; CHECK-NEXT: getelementptr inbounds ([32 x i8]* @a, i32 0, i32 5)
+ ret i8* %ret
+}
+
+define i8* @test_simplify2() {
+; CHECK: @test_simplify2
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+
+ %ret = call i8* @stpcpy(i8* %dst, i8* %dst)
+; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
+; CHECK-NEXT: getelementptr inbounds [32 x i8]* @a, i32 0, i32 [[LEN]]
+ ret i8* %ret
+}
+
+define i8* @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [32 x i8]* @b, i32 0, i32 0
+
+ %ret = call i8* @stpcpy(i8* %dst, i8* %src)
+; CHECK: call i8* @stpcpy
+ ret i8* %ret
+}
diff --git a/test/Transforms/InstCombine/stpcpy-2.ll b/test/Transforms/InstCombine/stpcpy-2.ll
new file mode 100644
index 000000000000..2e92c0895ed4
--- /dev/null
+++ b/test/Transforms/InstCombine/stpcpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the stpcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @stpcpy(i8*, i8*)
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i16* @stpcpy(i8* %dst, i8* %src)
+; CHECK: call i16* @stpcpy
+ ret void
+}
diff --git a/test/Transforms/InstCombine/stpcpy_chk-1.ll b/test/Transforms/InstCombine/stpcpy_chk-1.ll
new file mode 100644
index 000000000000..05603918c642
--- /dev/null
+++ b/test/Transforms/InstCombine/stpcpy_chk-1.ll
@@ -0,0 +1,96 @@
+; Test lib call simplification of __stpcpy_chk calls with various values
+; for src, dst, and slen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [12 x i8] c"abcdefghijk\00"
+
+; Check cases where slen >= strlen (src).
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 60)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 12)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+; Check cases where there are no string constants.
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @stpcpy
+ call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+; Check case where the string length is not constant.
+
+define i8* @test_simplify5() {
+; CHECK: @test_simplify5
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK: @__memcpy_chk
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 %len)
+; CHECK: ret i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 11)
+ ret i8* %ret
+}
+
+; Check case where the source and destination are the same.
+
+define i8* @test_simplify6() {
+; CHECK: @test_simplify6
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+
+; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen
+; CHECK-NEXT: getelementptr inbounds [60 x i8]* @a, i32 0, i32 [[LEN]]
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ %ret = call i8* @__stpcpy_chk(i8* %dst, i8* %dst, i32 %len)
+ ret i8* %ret
+}
+
+; Check case where slen < strlen (src).
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__stpcpy_chk
+ call i8* @__stpcpy_chk(i8* %dst, i8* %src, i32 8)
+ ret void
+}
+
+declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/stpcpy_chk-2.ll b/test/Transforms/InstCombine/stpcpy_chk-2.ll
new file mode 100644
index 000000000000..46c2139276e2
--- /dev/null
+++ b/test/Transforms/InstCombine/stpcpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __stpcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@.str = private constant [8 x i8] c"abcdefg\00"
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strcpy_chk
+ call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8)
+ ret void
+}
+
+declare i16* @__strcpy_chk(i16*, i8*, i32)
diff --git a/test/Transforms/InstCombine/strcat-1.ll b/test/Transforms/InstCombine/strcat-1.ll
new file mode 100644
index 000000000000..3c05d6b06fa0
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-1.ll
@@ -0,0 +1,38 @@
+; Test that the strcat libcall simplifier works correctly per the
+; bug found in PR3661.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strcat(i8*, i8*)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK: @main
+; CHECK-NOT: call i8* @strcat
+; CHECK: call i32 @puts
+
+ %target = alloca [1024 x i8]
+ %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+ store i8 0, i8* %arg1
+
+ ; rslt1 = strcat(target, "hello\00")
+ %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ %rslt1 = call i8* @strcat(i8* %arg1, i8* %arg2)
+
+ ; rslt2 = strcat(rslt1, "\00")
+ %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %rslt2 = call i8* @strcat(i8* %rslt1, i8* %arg3)
+
+ ; rslt3 = strcat(rslt2, "\00hello\00")
+ %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+ %rslt3 = call i8* @strcat(i8* %rslt2, i8* %arg4)
+
+ call i32 @puts( i8* %rslt3 )
+ ret i32 0
+}
diff --git a/test/Transforms/InstCombine/strcat-2.ll b/test/Transforms/InstCombine/strcat-2.ll
new file mode 100644
index 000000000000..379ee7495317
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-2.ll
@@ -0,0 +1,32 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcat(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strcat
+; CHECK: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ call i8* @strcat(i8* %dst, i8* %src)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+; CHECK-NEXT: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [1 x i8]* @empty, i32 0, i32 0
+ call i8* @strcat(i8* %dst, i8* %src)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strcat-3.ll b/test/Transforms/InstCombine/strcat-3.ll
new file mode 100644
index 000000000000..15aff2f1aa28
--- /dev/null
+++ b/test/Transforms/InstCombine/strcat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strcat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcat(i8*, i8*)
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i16* @strcat
+; CHECK: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ call i16* @strcat(i8* %dst, i8* %src)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strchr-1.ll b/test/Transforms/InstCombine/strchr-1.ll
new file mode 100644
index 000000000000..5efab9ec4bee
--- /dev/null
+++ b/test/Transforms/InstCombine/strchr-1.ll
@@ -0,0 +1,54 @@
+; Test that the strchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@chp = global i8* zeroinitializer
+
+declare i8* @strchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+ %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8* @strchr(i8* %str, i32 119)
+ store i8* %dst, i8** @chp
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+ %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %dst = call i8* @strchr(i8* %str, i32 119)
+ store i8* %dst, i8** @chp
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+ %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8* @strchr(i8* %src, i32 0)
+ store i8* %dst, i8** @chp
+ ret void
+}
+
+define void @test_simplify4(i32 %chr) {
+; CHECK: call i8* @memchr
+; CHECK-NOT: call i8* @strchr
+; CHECK: ret void
+
+ %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8* @strchr(i8* %src, i32 %chr)
+ store i8* %dst, i8** @chp
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strchr-2.ll b/test/Transforms/InstCombine/strchr-2.ll
new file mode 100644
index 000000000000..35bbd23e6d4d
--- /dev/null
+++ b/test/Transforms/InstCombine/strchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strchr
+; CHECK: ret void
+
+ %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8 @strchr(i8* %str, i32 119)
+ store i8 %dst, i8* @chr
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strcmp-1.ll b/test/Transforms/InstCombine/strcmp-1.ll
new file mode 100644
index 000000000000..0679246e0915
--- /dev/null
+++ b/test/Transforms/InstCombine/strcmp-1.ll
@@ -0,0 +1,82 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strcmp(i8*, i8*)
+
+; strcmp("", x) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK: @test1
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub i32 0, %1
+; CHECK: ret i32 %2
+
+ %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+ %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+ ret i32 %temp1
+
+}
+
+; strcmp(x, "") -> *x
+define i32 @test2(i8* %str1) {
+; CHECK: @test2
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+ %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+ %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+ ret i32 %temp1
+}
+
+; strcmp(x, y) -> cnst
+define i32 @test3() {
+; CHECK: @test3
+; CHECK: ret i32 -1
+
+ %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+ %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+ ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK: @test4
+; CHECK: ret i32 1
+
+ %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+ %temp1 = call i32 @strcmp(i8* %str1, i8* %str2)
+ ret i32 %temp1
+}
+
+; strcmp(x, y) -> memcmp(x, y, <known length>)
+; (This transform is rather difficult to trigger in a useful manner)
+define i32 @test5(i1 %b) {
+; CHECK: @test5
+; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %str2, i32 5)
+; CHECK: ret i32 %memcmp
+
+ %str1 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+ %temp1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %temp2 = getelementptr inbounds [5 x i8]* @bell, i32 0, i32 0
+ %str2 = select i1 %b, i8* %temp1, i8* %temp2
+ %temp3 = call i32 @strcmp(i8* %str1, i8* %str2)
+ ret i32 %temp3
+}
+
+; strcmp(x,x) -> 0
+define i32 @test6(i8* %str) {
+; CHECK: @test6
+; CHECK: ret i32 0
+
+ %temp1 = call i32 @strcmp(i8* %str, i8* %str)
+ ret i32 %temp1
+}
diff --git a/test/Transforms/InstCombine/strcmp-2.ll b/test/Transforms/InstCombine/strcmp-2.ll
new file mode 100644
index 000000000000..20518960f302
--- /dev/null
+++ b/test/Transforms/InstCombine/strcmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strcmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strcmp(i8*, i8*)
+
+define i16 @test_nosimplify() {
+; CHECK: @test_nosimplify
+; CHECK: call i16 @strcmp
+; CHECK: ret i16 %temp1
+
+ %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+ %temp1 = call i16 @strcmp(i8* %str1, i8* %str2)
+ ret i16 %temp1
+}
diff --git a/test/Transforms/InstCombine/strcpy-1.ll b/test/Transforms/InstCombine/strcpy-1.ll
new file mode 100644
index 000000000000..b6cf048b2a81
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy-1.ll
@@ -0,0 +1,45 @@
+; Test that the strcpy library call simplifier works correctly.
+; rdar://6839935
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strcpy(i8*, i8*)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: @llvm.memcpy.p0i8.p0i8.i32
+ ret void
+}
+
+define i8* @test_simplify2() {
+; CHECK: @test_simplify2
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+
+ %ret = call i8* @strcpy(i8* %dst, i8* %dst)
+; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0)
+ ret i8* %ret
+}
+
+define i8* @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [32 x i8]* @b, i32 0, i32 0
+
+ %ret = call i8* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i8* @strcpy
+ ret i8* %ret
+}
diff --git a/test/Transforms/InstCombine/strcpy-2.ll b/test/Transforms/InstCombine/strcpy-2.ll
new file mode 100644
index 000000000000..779e9fdd9598
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the strcpy library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This transformation requires the pointer size, as it assumes that size_t is
+; the size of a pointer.
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strcpy(i8*, i8*)
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i16* @strcpy(i8* %dst, i8* %src)
+; CHECK: call i16* @strcpy
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strcpy_chk-1.ll b/test/Transforms/InstCombine/strcpy_chk-1.ll
new file mode 100644
index 000000000000..3e48f4fd3057
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy_chk-1.ll
@@ -0,0 +1,94 @@
+; Test lib call simplification of __strcpy_chk calls with various values
+; for src, dst, and slen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [12 x i8] c"abcdefghijk\00"
+
+; Check cases where slen >= strlen (src).
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 60)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 12)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+; Check cases where there are no string constants.
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strcpy
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 -1)
+ ret void
+}
+
+; Check case where the string length is not constant.
+
+define void @test_simplify5() {
+; CHECK: @test_simplify5
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK: @__memcpy_chk
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 %len)
+ ret void
+}
+
+; Check case where the source and destination are the same.
+
+define i8* @test_simplify6() {
+; CHECK: @test_simplify6
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+
+; CHECK: getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0)
+ %len = call i32 @llvm.objectsize.i32(i8* %dst, i1 false)
+ %ret = call i8* @__strcpy_chk(i8* %dst, i8* %dst, i32 %len)
+ ret i8* %ret
+}
+
+; Check case where slen < strlen (src).
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strcpy_chk
+ call i8* @__strcpy_chk(i8* %dst, i8* %src, i32 8)
+ ret void
+}
+
+declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
+declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
diff --git a/test/Transforms/InstCombine/strcpy_chk-2.ll b/test/Transforms/InstCombine/strcpy_chk-2.ll
new file mode 100644
index 000000000000..d76ea5d068bc
--- /dev/null
+++ b/test/Transforms/InstCombine/strcpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strcpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@.str = private constant [8 x i8] c"abcdefg\00"
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [8 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strcpy_chk
+ call i16* @__strcpy_chk(i16* %dst, i8* %src, i32 8)
+ ret void
+}
+
+declare i16* @__strcpy_chk(i16*, i8*, i32)
diff --git a/test/Transforms/InstCombine/strcpy_chk.ll b/test/Transforms/InstCombine/strcpy_chk.ll
deleted file mode 100644
index 8835a0ba467c..000000000000
--- a/test/Transforms/InstCombine/strcpy_chk.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-@a = common global [60 x i8] zeroinitializer, align 1 ; <[60 x i8]*> [#uses=1]
-@.str = private constant [8 x i8] c"abcdefg\00" ; <[8 x i8]*> [#uses=1]
-
-define i8* @foo() nounwind {
-; CHECK: @foo
-; CHECK-NEXT: call i8* @strcpy
- %call = call i8* @__strcpy_chk(i8* getelementptr inbounds ([60 x i8]* @a, i32 0, i32 0), i8* getelementptr inbounds ([8 x i8]* @.str, i32 0, i32 0), i32 60) ; <i8*> [#uses=1]
- ret i8* %call
-}
-
-declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
diff --git a/test/Transforms/InstCombine/strcspn-1.ll b/test/Transforms/InstCombine/strcspn-1.ll
new file mode 100644
index 000000000000..60fad897b2c8
--- /dev/null
+++ b/test/Transforms/InstCombine/strcspn-1.ll
@@ -0,0 +1,57 @@
+; Test that the strcspn library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@abcba = constant [6 x i8] c"abcba\00"
+@abc = constant [4 x i8] c"abc\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i64 @strcspn(i8*, i8*)
+
+; Check strcspn(s, "") -> strlen(s).
+
+define i64 @test_simplify1(i8* %str) {
+; CHECK: @test_simplify1
+ %pat = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ %ret = call i64 @strcspn(i8* %str, i8* %pat)
+; CHECK-NEXT: [[VAR:%[a-z]+]] = call i64 @strlen(i8* %str)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 [[VAR]]
+}
+
+; Check strcspn("", s) -> 0.
+
+define i64 @test_simplify2(i8* %pat) {
+; CHECK: @test_simplify2
+ %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ %ret = call i64 @strcspn(i8* %str, i8* %pat)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check strcspn(s1, s2), where s1 and s2 are constants.
+
+define i64 @test_simplify3() {
+; CHECK: @test_simplify3
+ %str = getelementptr [6 x i8]* @abcba, i32 0, i32 0
+ %pat = getelementptr [4 x i8]* @abc, i32 0, i32 0
+
+ %ret = call i64 @strcspn(i8* %str, i8* %pat)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check cases that shouldn't be simplified.
+
+define i64 @test_no_simplify1(i8* %str, i8* %pat) {
+; CHECK: @test_no_simplify1
+
+ %ret = call i64 @strcspn(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i64 @strcspn(i8* %str, i8* %pat)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
diff --git a/test/Transforms/InstCombine/strcspn-2.ll b/test/Transforms/InstCombine/strcspn-2.ll
new file mode 100644
index 000000000000..4e2393686c7d
--- /dev/null
+++ b/test/Transforms/InstCombine/strcspn-2.ll
@@ -0,0 +1,21 @@
+; Test that the strcspn library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@null = constant [1 x i8] zeroinitializer
+
+declare double @strcspn(i8*, i8*)
+
+; Check that strcspn functions with the wrong prototype aren't simplified.
+
+define double @test_no_simplify1(i8* %pat) {
+; CHECK: @test_no_simplify1
+ %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ %ret = call double @strcspn(i8* %str, i8* %pat)
+; CHECK-NEXT: call double @strcspn
+ ret double %ret
+; CHECK-NEXT: ret double %ret
+}
diff --git a/test/Transforms/InstCombine/strlen-1.ll b/test/Transforms/InstCombine/strlen-1.ll
new file mode 100644
index 000000000000..6d7464a4cc80
--- /dev/null
+++ b/test/Transforms/InstCombine/strlen-1.ll
@@ -0,0 +1,97 @@
+; Test that the strlen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@nullstring = constant i8 0
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i32 @strlen(i8*)
+
+; Check strlen(string constant) -> integer constant.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+ %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ %hello_l = call i32 @strlen(i8* %hello_p)
+ ret i32 %hello_l
+; CHECK-NEXT: ret i32 5
+}
+
+define i32 @test_simplify2() {
+; CHECK: @test_simplify2
+ %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %null_l = call i32 @strlen(i8* %null_p)
+ ret i32 %null_l
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify3() {
+; CHECK: @test_simplify3
+ %null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+ %null_hello_l = call i32 @strlen(i8* %null_hello_p)
+ ret i32 %null_hello_l
+; CHECK-NEXT: ret i32 0
+}
+
+define i32 @test_simplify4() {
+; CHECK: @test_simplify4
+ %len = tail call i32 @strlen(i8* @nullstring) nounwind
+ ret i32 %len
+; CHECK-NEXT: ret i32 0
+}
+
+; Check strlen(x) == 0 --> *x == 0.
+
+define i1 @test_simplify5() {
+; CHECK: @test_simplify5
+ %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ %hello_l = call i32 @strlen(i8* %hello_p)
+ %eq_hello = icmp eq i32 %hello_l, 0
+ ret i1 %eq_hello
+; CHECK-NEXT: ret i1 false
+}
+
+define i1 @test_simplify6() {
+; CHECK: @test_simplify6
+ %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %null_l = call i32 @strlen(i8* %null_p)
+ %eq_null = icmp eq i32 %null_l, 0
+ ret i1 %eq_null
+; CHECK-NEXT: ret i1 true
+}
+
+; Check strlen(x) != 0 --> *x != 0.
+
+define i1 @test_simplify7() {
+; CHECK: @test_simplify7
+ %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ %hello_l = call i32 @strlen(i8* %hello_p)
+ %ne_hello = icmp ne i32 %hello_l, 0
+ ret i1 %ne_hello
+; CHECK-NEXT: ret i1 true
+}
+
+define i1 @test_simplify8() {
+; CHECK: @test_simplify8
+ %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %null_l = call i32 @strlen(i8* %null_p)
+ %ne_null = icmp ne i32 %null_l, 0
+ ret i1 %ne_null
+; CHECK-NEXT: ret i1 false
+}
+
+; Check cases that shouldn't be simplified.
+
+define i32 @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %a_p = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %a_l = call i32 @strlen(i8* %a_p)
+; CHECK-NEXT: %a_l = call i32 @strlen
+ ret i32 %a_l
+; CHECK-NEXT: ret i32 %a_l
+}
diff --git a/test/Transforms/InstCombine/strlen-2.ll b/test/Transforms/InstCombine/strlen-2.ll
new file mode 100644
index 000000000000..c4fd54c06db9
--- /dev/null
+++ b/test/Transforms/InstCombine/strlen-2.ll
@@ -0,0 +1,18 @@
+; Test that the strlen library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+
+declare i32 @strlen(i8*, i32)
+
+define i32 @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ %hello_l = call i32 @strlen(i8* %hello_p, i32 187)
+; CHECK-NEXT: %hello_l = call i32 @strlen
+ ret i32 %hello_l
+; CHECK-NEXT: ret i32 %hello_l
+}
diff --git a/test/Transforms/InstCombine/strncat-1.ll b/test/Transforms/InstCombine/strncat-1.ll
new file mode 100644
index 000000000000..ad2a18b1465d
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-1.ll
@@ -0,0 +1,37 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+
+declare i8* @strncat(i8*, i8*, i32)
+declare i32 @puts(i8*)
+
+define i32 @main() {
+; CHECK: @main
+; CHECK-NOT: call i8* @strncat
+; CHECK: call i32 @puts
+
+ %target = alloca [1024 x i8]
+ %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+ store i8 0, i8* %arg1
+
+ ; rslt1 = strncat(target, "hello\00")
+ %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ %rslt1 = call i8* @strncat(i8* %arg1, i8* %arg2, i32 6)
+
+ ; rslt2 = strncat(rslt1, "\00")
+ %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %rslt2 = call i8* @strncat(i8* %rslt1, i8* %arg3, i32 42)
+
+ ; rslt3 = strncat(rslt2, "\00hello\00")
+ %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+ %rslt3 = call i8* @strncat(i8* %rslt2, i8* %arg4, i32 42)
+
+ call i32 @puts(i8* %rslt3)
+ ret i32 0
+}
diff --git a/test/Transforms/InstCombine/strncat-2.ll b/test/Transforms/InstCombine/strncat-2.ll
new file mode 100644
index 000000000000..c56deacd39bb
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-2.ll
@@ -0,0 +1,53 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strncat(i8*, i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strncat
+; CHECK: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ call i8* @strncat(i8* %dst, i8* %src, i32 13)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+; CHECK-NEXT: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [1 x i8]* @empty, i32 0, i32 0
+ call i8* @strncat(i8* %dst, i8* %src, i32 13)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+; CHECK-NEXT: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ call i8* @strncat(i8* %dst, i8* %src, i32 0)
+ ret void
+}
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i8* @strncat
+; CHECK: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ call i8* @strncat(i8* %dst, i8* %src, i32 1)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strncat-3.ll b/test/Transforms/InstCombine/strncat-3.ll
new file mode 100644
index 000000000000..3cd797168705
--- /dev/null
+++ b/test/Transforms/InstCombine/strncat-3.ll
@@ -0,0 +1,22 @@
+; Test that the strncat libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@empty = constant [1 x i8] c"\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strncat(i8*, i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: @test_nosimplify1
+; CHECK: call i16* @strncat
+; CHECK: ret void
+
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ call i16* @strncat(i8* %dst, i8* %src, i32 13)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strncmp-1.ll b/test/Transforms/InstCombine/strncmp-1.ll
new file mode 100644
index 000000000000..187c2fa50e82
--- /dev/null
+++ b/test/Transforms/InstCombine/strncmp-1.ll
@@ -0,0 +1,99 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+@bell = constant [5 x i8] c"bell\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i32 @strncmp(i8*, i8*, i32)
+
+; strncmp("", x, n) -> -*x
+define i32 @test1(i8* %str2) {
+; CHECK: @test1
+; CHECK: %strcmpload = load i8* %str
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: %2 = sub i32 0, %1
+; CHECK: ret i32 %2
+
+ %str1 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+ %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+ ret i32 %temp1
+}
+
+; strncmp(x, "", n) -> *x
+define i32 @test2(i8* %str1) {
+; CHECK: @test2
+; CHECK: %strcmpload = load i8* %str1
+; CHECK: %1 = zext i8 %strcmpload to i32
+; CHECK: ret i32 %1
+
+ %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+ %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+ ret i32 %temp1
+}
+
+; strncmp(x, y, n) -> cnst
+define i32 @test3() {
+; CHECK: @test3
+; CHECK: ret i32 -1
+
+ %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+ %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+ ret i32 %temp1
+}
+
+define i32 @test4() {
+; CHECK: @test4
+; CHECK: ret i32 1
+
+ %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %str2 = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+ %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 10)
+ ret i32 %temp1
+}
+
+define i32 @test5() {
+; CHECK: @test5
+; CHECK: ret i32 0
+
+ %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+ %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 4)
+ ret i32 %temp1
+}
+
+; strncmp(x,y,1) -> memcmp(x,y,1)
+define i32 @test6(i8* %str1, i8* %str2) {
+; CHECK: @test6
+; CHECK: [[LOAD1:%[a-z]+]] = load i8* %str1, align 1
+; CHECK: [[ZEXT1:%[a-z]+]] = zext i8 [[LOAD1]] to i32
+; CHECK: [[LOAD2:%[a-z]+]] = load i8* %str2, align 1
+; CHECK: [[ZEXT2:%[a-z]+]] = zext i8 [[LOAD2]] to i32
+; CHECK: [[RET:%[a-z]+]] = sub i32 [[ZEXT1]], [[ZEXT2]]
+; CHECK: ret i32 [[RET]]
+
+ %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
+ ret i32 %temp1
+}
+
+; strncmp(x,y,0) -> 0
+define i32 @test7(i8* %str1, i8* %str2) {
+; CHECK: @test7
+; CHECK: ret i32 0
+
+ %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
+ ret i32 %temp1
+}
+
+; strncmp(x,x,n) -> 0
+define i32 @test8(i8* %str, i32 %n) {
+; CHECK: @test8
+; CHECK: ret i32 0
+
+ %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
+ ret i32 %temp1
+}
diff --git a/test/Transforms/InstCombine/strncmp-2.ll b/test/Transforms/InstCombine/strncmp-2.ll
new file mode 100644
index 000000000000..3fc43a6fd4f5
--- /dev/null
+++ b/test/Transforms/InstCombine/strncmp-2.ll
@@ -0,0 +1,20 @@
+; Test that the strncmp library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@hell = constant [5 x i8] c"hell\00"
+
+declare i16 @strncmp(i8*, i8*, i32)
+
+define i16 @test_nosimplify() {
+; CHECK: @test_nosimplify
+; CHECK: call i16 @strncmp
+; CHECK: ret i16 %temp1
+
+ %str1 = getelementptr inbounds [5 x i8]* @hell, i32 0, i32 0
+ %str2 = getelementptr inbounds [6 x i8]* @hello, i32 0, i32 0
+ %temp1 = call i16 @strncmp(i8* %str1, i8* %str2, i32 10)
+ ret i16 %temp1
+}
diff --git a/test/Transforms/InstCombine/strncpy-1.ll b/test/Transforms/InstCombine/strncpy-1.ll
new file mode 100644
index 000000000000..3ce2b9b5eecc
--- /dev/null
+++ b/test/Transforms/InstCombine/strncpy-1.ll
@@ -0,0 +1,95 @@
+; Test that the strncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@null = constant [1 x i8] zeroinitializer
+@null_hello = constant [7 x i8] c"\00hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+@b = common global [32 x i8] zeroinitializer, align 1
+
+declare i8* @strncpy(i8*, i8*, i32)
+declare i32 @puts(i8*)
+
+; Check a bunch of strncpy invocations together.
+
+define i32 @test_simplify1() {
+; CHECK: @test_simplify1
+; CHECK-NOT: call i8* @strncpy
+; CHECK: call i32 @puts
+ %target = alloca [1024 x i8]
+ %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
+ store i8 0, i8* %arg1
+
+ %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
+ %rslt1 = call i8* @strncpy(i8* %arg1, i8* %arg2, i32 6)
+
+ %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %rslt2 = call i8* @strncpy(i8* %rslt1, i8* %arg3, i32 42)
+
+ %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0
+ %rslt3 = call i8* @strncpy(i8* %rslt2, i8* %arg4, i32 42)
+
+ call i32 @puts( i8* %rslt3 )
+ ret i32 0
+}
+
+; Check strncpy(x, "", y) -> memset(x, '\0', y, 1).
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+; CHECK: call void @llvm.memset.p0i8.i32
+ ret void
+}
+
+; Check strncpy(x, y, 0) -> x.
+
+define i8* @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ %ret = call i8* @strncpy(i8* %dst, i8* %src, i32 0)
+ ret i8* %ret
+; CHECK: ret i8* getelementptr inbounds ([32 x i8]* @a, i32 0, i32 0)
+}
+
+; Check strncpy(x, s, c) -> memcpy(x, s, c, 1) [s and c are constant].
+
+define void @test_simplify4() {
+; CHECK: @test_simplify4
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i8* @strncpy(i8* %dst, i8* %src, i32 6)
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i32
+ ret void
+}
+
+; Check cases that shouldn't be simplified.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [32 x i8]* @b, i32 0, i32 0
+
+ call i8* @strncpy(i8* %dst, i8* %src, i32 32)
+; CHECK: call i8* @strncpy
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i8* @strncpy(i8* %dst, i8* %src, i32 8)
+; CHECK: call i8* @strncpy
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strncpy-2.ll b/test/Transforms/InstCombine/strncpy-2.ll
new file mode 100644
index 000000000000..ac28ea655009
--- /dev/null
+++ b/test/Transforms/InstCombine/strncpy-2.ll
@@ -0,0 +1,22 @@
+; Test that the strncpy library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [6 x i8] c"hello\00"
+@a = common global [32 x i8] zeroinitializer, align 1
+
+declare i16* @strncpy(i8*, i8*, i32)
+
+; Check that 'strncpy' functions with the wrong prototype aren't simplified.
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr [32 x i8]* @a, i32 0, i32 0
+ %src = getelementptr [6 x i8]* @hello, i32 0, i32 0
+
+ call i16* @strncpy(i8* %dst, i8* %src, i32 6)
+; CHECK: call i16* @strncpy
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strncpy_chk-1.ll b/test/Transforms/InstCombine/strncpy_chk-1.ll
new file mode 100644
index 000000000000..aadff4268ec2
--- /dev/null
+++ b/test/Transforms/InstCombine/strncpy_chk-1.ll
@@ -0,0 +1,66 @@
+; Test lib call simplification of __strncpy_chk calls with various values
+; for len and dstlen.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i8] zeroinitializer, align 1
+@b = common global [60 x i8] zeroinitializer, align 1
+@.str = private constant [12 x i8] c"abcdefghijk\00"
+
+; Check cases where dstlen >= len
+
+define void @test_simplify1() {
+; CHECK: @test_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: @test_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 12)
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: @test_simplify3
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @strncpy
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 12, i32 60)
+ ret void
+}
+
+; Check cases where dstlen < len
+
+define void @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [12 x i8]* @.str, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strncpy_chk
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 4)
+ ret void
+}
+
+define void @test_no_simplify2() {
+; CHECK: @test_no_simplify2
+ %dst = getelementptr inbounds [60 x i8]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i8]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i8* @__strncpy_chk
+ call i8* @__strncpy_chk(i8* %dst, i8* %src, i32 8, i32 0)
+ ret void
+}
+
+declare i8* @__strncpy_chk(i8*, i8*, i32, i32)
diff --git a/test/Transforms/InstCombine/strncpy_chk-2.ll b/test/Transforms/InstCombine/strncpy_chk-2.ll
new file mode 100644
index 000000000000..a0f132ebf63b
--- /dev/null
+++ b/test/Transforms/InstCombine/strncpy_chk-2.ll
@@ -0,0 +1,21 @@
+; Test that lib call simplification doesn't simplify __strncpy_chk calls
+; with the wrong prototype.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@a = common global [60 x i16] zeroinitializer, align 1
+@b = common global [60 x i16] zeroinitializer, align 1
+
+define void @test_no_simplify() {
+; CHECK: @test_no_simplify
+ %dst = getelementptr inbounds [60 x i16]* @a, i32 0, i32 0
+ %src = getelementptr inbounds [60 x i16]* @b, i32 0, i32 0
+
+; CHECK-NEXT: call i16* @__strncpy_chk
+ call i16* @__strncpy_chk(i16* %dst, i16* %src, i32 60, i32 60)
+ ret void
+}
+
+declare i16* @__strncpy_chk(i16*, i16*, i32, i32)
diff --git a/test/Transforms/InstCombine/strpbrk-1.ll b/test/Transforms/InstCombine/strpbrk-1.ll
new file mode 100644
index 000000000000..a5d0d86501b1
--- /dev/null
+++ b/test/Transforms/InstCombine/strpbrk-1.ll
@@ -0,0 +1,68 @@
+; Test that the strpbrk library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [12 x i8] c"hello world\00"
+@w = constant [2 x i8] c"w\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i8* @strpbrk(i8*, i8*)
+
+; Check strpbrk(s, "") -> NULL.
+
+define i8* @test_simplify1(i8* %str) {
+; CHECK: @test_simplify1
+ %pat = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+ ret i8* %ret
+; CHECK-NEXT: ret i8* null
+}
+
+; Check strpbrk("", s) -> NULL.
+
+define i8* @test_simplify2(i8* %pat) {
+; CHECK: @test_simplify2
+ %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+ ret i8* %ret
+; CHECK-NEXT: ret i8* null
+}
+
+; Check strpbrk(s1, s2), where s1 and s2 are constants.
+
+define i8* @test_simplify3() {
+; CHECK: @test_simplify3
+ %str = getelementptr [12 x i8]* @hello, i32 0, i32 0
+ %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
+
+ %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+ ret i8* %ret
+; CHECK-NEXT: ret i8* getelementptr inbounds ([12 x i8]* @hello, i32 0, i32 6)
+}
+
+; Check strpbrk(s, "a") -> strchr(s, 'a').
+
+define i8* @test_simplify4(i8* %str) {
+; CHECK: @test_simplify4
+ %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
+
+ %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: [[VAR:%[a-z]+]] = call i8* @strchr(i8* %str, i32 119)
+ ret i8* %ret
+; CHECK-NEXT: ret i8* [[VAR]]
+}
+
+; Check cases that shouldn't be simplified.
+
+define i8* @test_no_simplify1(i8* %str, i8* %pat) {
+; CHECK: @test_no_simplify1
+
+ %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i8* @strpbrk(i8* %str, i8* %pat)
+ ret i8* %ret
+; CHECK-NEXT: ret i8* %ret
+}
diff --git a/test/Transforms/InstCombine/strpbrk-2.ll b/test/Transforms/InstCombine/strpbrk-2.ll
new file mode 100644
index 000000000000..31ac2905df2c
--- /dev/null
+++ b/test/Transforms/InstCombine/strpbrk-2.ll
@@ -0,0 +1,23 @@
+; Test that the strpbrk library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [12 x i8] c"hello world\00"
+@w = constant [2 x i8] c"w\00"
+
+declare i16* @strpbrk(i8*, i8*)
+
+; Check that 'strpbrk' functions with the wrong prototype aren't simplified.
+
+define i16* @test_no_simplify1() {
+; CHECK: @test_no_simplify1
+ %str = getelementptr [12 x i8]* @hello, i32 0, i32 0
+ %pat = getelementptr [2 x i8]* @w, i32 0, i32 0
+
+ %ret = call i16* @strpbrk(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i16* @strpbrk
+ ret i16* %ret
+; CHECK-NEXT: ret i16* %ret
+}
diff --git a/test/Transforms/InstCombine/strrchr-1.ll b/test/Transforms/InstCombine/strrchr-1.ll
new file mode 100644
index 000000000000..854ce45bffb2
--- /dev/null
+++ b/test/Transforms/InstCombine/strrchr-1.ll
@@ -0,0 +1,54 @@
+; Test that the strrchr library call simplifier works correctly.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@null = constant [1 x i8] zeroinitializer
+@chp = global i8* zeroinitializer
+
+declare i8* @strrchr(i8*, i32)
+
+define void @test_simplify1() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 6)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+ %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8* @strrchr(i8* %str, i32 119)
+ store i8* %dst, i8** @chp
+ ret void
+}
+
+define void @test_simplify2() {
+; CHECK: store i8* null, i8** @chp, align 4
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+ %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+ %dst = call i8* @strrchr(i8* %str, i32 119)
+ store i8* %dst, i8** @chp
+ ret void
+}
+
+define void @test_simplify3() {
+; CHECK: store i8* getelementptr inbounds ([14 x i8]* @hello, i32 0, i32 13)
+; CHECK-NOT: call i8* @strrchr
+; CHECK: ret void
+
+ %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8* @strrchr(i8* %src, i32 0)
+ store i8* %dst, i8** @chp
+ ret void
+}
+
+define void @test_nosimplify1(i32 %chr) {
+; CHECK: @test_nosimplify1
+; CHECK: call i8* @strrchr
+; CHECK: ret void
+
+ %src = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8* @strrchr(i8* %src, i32 %chr)
+ store i8* %dst, i8** @chp
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strrchr-2.ll b/test/Transforms/InstCombine/strrchr-2.ll
new file mode 100644
index 000000000000..1974f6ca6033
--- /dev/null
+++ b/test/Transforms/InstCombine/strrchr-2.ll
@@ -0,0 +1,21 @@
+; Test that the strrchr libcall simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@hello = constant [14 x i8] c"hello world\5Cn\00"
+@chr = global i8 zeroinitializer
+
+declare i8 @strrchr(i8*, i32)
+
+define void @test_nosimplify1() {
+; CHECK: test_nosimplify1
+; CHECK: call i8 @strrchr
+; CHECK: ret void
+
+ %str = getelementptr [14 x i8]* @hello, i32 0, i32 0
+ %dst = call i8 @strrchr(i8* %str, i32 119)
+ store i8 %dst, i8* @chr
+ ret void
+}
diff --git a/test/Transforms/InstCombine/strspn-1.ll b/test/Transforms/InstCombine/strspn-1.ll
new file mode 100644
index 000000000000..393f88735bd4
--- /dev/null
+++ b/test/Transforms/InstCombine/strspn-1.ll
@@ -0,0 +1,56 @@
+; Test that the strspn library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+@abcba = constant [6 x i8] c"abcba\00"
+@abc = constant [4 x i8] c"abc\00"
+@null = constant [1 x i8] zeroinitializer
+
+declare i64 @strspn(i8*, i8*)
+
+; Check strspn(s, "") -> 0.
+
+define i64 @test_simplify1(i8* %str) {
+; CHECK: @test_simplify1
+ %pat = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ %ret = call i64 @strspn(i8* %str, i8* %pat)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check strspn("", s) -> 0.
+
+define i64 @test_simplify2(i8* %pat) {
+; CHECK: @test_simplify2
+ %str = getelementptr [1 x i8]* @null, i32 0, i32 0
+
+ %ret = call i64 @strspn(i8* %str, i8* %pat)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 0
+}
+
+; Check strspn(s1, s2), where s1 and s2 are constants.
+
+define i64 @test_simplify3() {
+; CHECK: @test_simplify3
+ %str = getelementptr [6 x i8]* @abcba, i32 0, i32 0
+ %pat = getelementptr [4 x i8]* @abc, i32 0, i32 0
+
+ %ret = call i64 @strspn(i8* %str, i8* %pat)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 5
+}
+
+; Check cases that shouldn't be simplified.
+
+define i64 @test_no_simplify1(i8* %str, i8* %pat) {
+; CHECK: @test_no_simplify1
+
+ %ret = call i64 @strspn(i8* %str, i8* %pat)
+; CHECK-NEXT: %ret = call i64 @strspn(i8* %str, i8* %pat)
+ ret i64 %ret
+; CHECK-NEXT: ret i64 %ret
+}
diff --git a/test/Transforms/InstCombine/strstr-1.ll b/test/Transforms/InstCombine/strstr-1.ll
new file mode 100644
index 000000000000..81f52718747d
--- /dev/null
+++ b/test/Transforms/InstCombine/strstr-1.ll
@@ -0,0 +1,65 @@
+; Test that the strstr library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@.str = private constant [1 x i8] zeroinitializer
+@.str1 = private constant [2 x i8] c"a\00"
+@.str2 = private constant [6 x i8] c"abcde\00"
+@.str3 = private constant [4 x i8] c"bcd\00"
+
+declare i8* @strstr(i8*, i8*)
+
+; Check strstr(str, "") -> str.
+
+define i8* @test_simplify1(i8* %str) {
+; CHECK: @test_simplify1
+ %pat = getelementptr inbounds [1 x i8]* @.str, i32 0, i32 0
+ %ret = call i8* @strstr(i8* %str, i8* %pat)
+ ret i8* %ret
+; CHECK-NEXT: ret i8* %str
+}
+
+; Check strstr(str, "a") -> strchr(str, 'a').
+
+define i8* @test_simplify2(i8* %str) {
+; CHECK: @test_simplify2
+ %pat = getelementptr inbounds [2 x i8]* @.str1, i32 0, i32 0
+ %ret = call i8* @strstr(i8* %str, i8* %pat)
+ ret i8* %ret
+; CHECK-NEXT: @strchr(i8* %str, i32 97)
+}
+
+; Check strstr("abcde", "bcd") -> "abcde" + 1.
+
+define i8* @test_simplify3() {
+; CHECK: @test_simplify3
+ %str = getelementptr inbounds [6 x i8]* @.str2, i32 0, i32 0
+ %pat = getelementptr inbounds [4 x i8]* @.str3, i32 0, i32 0
+ %ret = call i8* @strstr(i8* %str, i8* %pat)
+ ret i8* %ret
+; CHECK-NEXT: getelementptr inbounds ([6 x i8]* @.str2, i64 0, i64 1)
+}
+
+; Check strstr(str, str) -> str.
+
+define i8* @test_simplify4(i8* %str) {
+; CHECK: @test_simplify4
+ %ret = call i8* @strstr(i8* %str, i8* %str)
+ ret i8* %ret
+; CHECK-NEXT: ret i8* %str
+}
+
+; Check strstr(str, pat) == str -> strncmp(str, pat, strlen(str)) == 0.
+
+define i1 @test_simplify5(i8* %str, i8* %pat) {
+; CHECK: @test_simplify5
+ %ret = call i8* @strstr(i8* %str, i8* %pat)
+ %cmp = icmp eq i8* %ret, %str
+ ret i1 %cmp
+; CHECK: [[LEN:%[a-z]+]] = call {{i[0-9]+}} @strlen(i8* %pat)
+; CHECK: [[NCMP:%[a-z]+]] = call {{i[0-9]+}} @strncmp(i8* %str, i8* %pat, {{i[0-9]+}} [[LEN]])
+; CHECK: icmp eq {{i[0-9]+}} [[NCMP]], 0
+; CHECK: ret i1
+}
diff --git a/test/Transforms/InstCombine/strstr-2.ll b/test/Transforms/InstCombine/strstr-2.ll
new file mode 100644
index 000000000000..5092f9b4f803
--- /dev/null
+++ b/test/Transforms/InstCombine/strstr-2.ll
@@ -0,0 +1,18 @@
+; Test that the strstr library call simplifier works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+@null = private constant [1 x i8] zeroinitializer
+
+declare i8 @strstr(i8*, i8*)
+
+define i8 @test_no_simplify1(i8* %str) {
+; CHECK: @test_no_simplify1
+ %pat = getelementptr inbounds [1 x i8]* @null, i32 0, i32 0
+ %ret = call i8 @strstr(i8* %str, i8* %pat)
+; CHECK-NEXT: call i8 @strstr
+ ret i8 %ret
+; CHECK-NEXT: ret i8 %ret
+}
diff --git a/test/Transforms/InstCombine/strto-1.ll b/test/Transforms/InstCombine/strto-1.ll
new file mode 100644
index 000000000000..16c0c67970db
--- /dev/null
+++ b/test/Transforms/InstCombine/strto-1.ll
@@ -0,0 +1,82 @@
+; Test that the strto* library call simplifiers works correctly.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
+
+declare i64 @strtol(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtol(i8*, i8**, i32)
+
+declare double @strtod(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare double @strtod(i8*, i8**, i32)
+
+declare float @strtof(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare float @strtof(i8*, i8**, i32)
+
+declare i64 @strtoul(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoul(i8*, i8**, i32)
+
+declare i64 @strtoll(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoll(i8*, i8**, i32)
+
+declare double @strtold(i8* %s, i8** %endptr)
+; CHECK: declare double @strtold(i8*, i8**)
+
+declare i64 @strtoull(i8* %s, i8** %endptr, i32 %base)
+; CHECK: declare i64 @strtoull(i8*, i8**, i32)
+
+define void @test_simplify1(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify1
+ call i64 @strtol(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtol(i8* nocapture %x, i8** null, i32 10)
+ ret void
+}
+
+define void @test_simplify2(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify2
+ call double @strtod(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call double @strtod(i8* nocapture %x, i8** null, i32 10)
+ ret void
+}
+
+define void @test_simplify3(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify3
+ call float @strtof(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call float @strtof(i8* nocapture %x, i8** null, i32 10)
+ ret void
+}
+
+define void @test_simplify4(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify4
+ call i64 @strtoul(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoul(i8* nocapture %x, i8** null, i32 10)
+ ret void
+}
+
+define void @test_simplify5(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify5
+ call i64 @strtoll(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoll(i8* nocapture %x, i8** null, i32 10)
+ ret void
+}
+
+define void @test_simplify6(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify6
+ call double @strtold(i8* %x, i8** null)
+; CHECK-NEXT: call double @strtold(i8* nocapture %x, i8** null)
+ ret void
+}
+
+define void @test_simplify7(i8* %x, i8** %endptr) {
+; CHECK: @test_simplify7
+ call i64 @strtoull(i8* %x, i8** null, i32 10)
+; CHECK-NEXT: call i64 @strtoull(i8* nocapture %x, i8** null, i32 10)
+ ret void
+}
+
+define void @test_no_simplify1(i8* %x, i8** %endptr) {
+; CHECK: @test_no_simplify1
+ call i64 @strtol(i8* %x, i8** %endptr, i32 10)
+; CHECK-NEXT: call i64 @strtol(i8* %x, i8** %endptr, i32 10)
+ ret void
+}
diff --git a/test/Transforms/InstCombine/struct-assign-tbaa.ll b/test/Transforms/InstCombine/struct-assign-tbaa.ll
new file mode 100644
index 000000000000..33a771e6d8b6
--- /dev/null
+++ b/test/Transforms/InstCombine/struct-assign-tbaa.ll
@@ -0,0 +1,44 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+; Verify that instcombine preserves TBAA tags when converting a memcpy into
+; a scalar load and store.
+
+%struct.test1 = type { float }
+
+; CHECK: @test
+; CHECK: %2 = load float* %0, align 4, !tbaa !0
+; CHECK: store float %2, float* %1, align 4, !tbaa !0
+; CHECK: ret
+define void @test1(%struct.test1* nocapture %a, %struct.test1* nocapture %b) {
+entry:
+ %0 = bitcast %struct.test1* %a to i8*
+ %1 = bitcast %struct.test1* %b to i8*
+ tail call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 4, i32 4, i1 false), !tbaa.struct !3
+ ret void
+}
+
+%struct.test2 = type { i32 (i8*, i32*, double*)** }
+
+define i32 (i8*, i32*, double*)*** @test2() {
+; CHECK: @test2
+; CHECK-NOT: memcpy
+; CHECK: ret
+ %tmp = alloca %struct.test2, align 8
+ %tmp1 = bitcast %struct.test2* %tmp to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* undef, i64 8, i32 8, i1 false), !tbaa.struct !4
+ %tmp2 = getelementptr %struct.test2* %tmp, i32 0, i32 0
+ %tmp3 = load i32 (i8*, i32*, double*)*** %tmp2
+ ret i32 (i8*, i32*, double*)*** %tmp2
+}
+
+; CHECK: !0 = metadata !{metadata !"float", metadata !1}
+
+!0 = metadata !{metadata !"Simple C/C++ TBAA"}
+!1 = metadata !{metadata !"omnipotent char", metadata !0}
+!2 = metadata !{metadata !"float", metadata !0}
+!3 = metadata !{i64 0, i64 4, metadata !2}
+!4 = metadata !{i64 0, i64 8, null}
diff --git a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
index d95e8f835908..74f2fdd7cc63 100644
--- a/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
+++ b/test/Transforms/InstCombine/udiv-simplify-bug-1.ll
@@ -6,9 +6,9 @@
; The udiv instructions shouldn't be optimized away, and the
; sext instructions should be optimized to zext.
-define i64 @bar(i32 %x) nounwind {
+define i64 @bar(i32 %x, i32 %g) nounwind {
%y = lshr i32 %x, 30
- %r = udiv i32 %y, 3
+ %r = udiv i32 %y, %g
%z = sext i32 %r to i64
ret i64 %z
}
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 0019a57627cb..2d90750a2f1e 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -196,7 +196,7 @@ define <4 x float> @test_select(float %f, float %g) {
; CHECK-NOT: insertelement
; CHECK: %a3 = insertelement <4 x float> %a0, float 3.000000e+00, i32 3
; CHECK-NOT: insertelement
-; CHECK: %ret = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>
+; CHECK: shufflevector <4 x float> %a3, <4 x float> <float undef, float 4.000000e+00, float 5.000000e+00, float undef>, <4 x i32> <i32 0, i32 5, i32 6, i32 3>
%a0 = insertelement <4 x float> undef, float %f, i32 0
%a1 = insertelement <4 x float> %a0, float 1.000000e+00, i32 1
%a2 = insertelement <4 x float> %a1, float 2.000000e+00, i32 2
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index 8f78c2e6bd50..14f532195d7c 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -153,3 +153,46 @@ define <8 x i8> @test12a(<8 x i8> %tmp6, <8 x i8> %tmp2) nounwind {
ret <8 x i8> %tmp3
}
+; We should form a shuffle out of a select with constant condition.
+define <4 x i16> @test13a(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13a
+; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 false, i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13b(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13b
+; CHECK-NEXT: ret <4 x i16> %lhs
+ %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 true>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13c(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13c
+; CHECK-NEXT: shufflevector <4 x i16> %lhs, <4 x i16> %rhs, <4 x i32> <i32 0, i32 undef, i32 2, i32 7>
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 undef, i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13d(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13d
+; CHECK: select
+; CHECK-NEXT: ret
+ %A = select <4 x i1> <i1 true, i1 icmp ugt (<4 x i16>(<4 x i16>, <4 x i16>)* @test13a, <4 x i16>(<4 x i16>, <4 x i16>)* @test13b), i1 true, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
+
+define <4 x i16> @test13e(<4 x i16> %lhs, <4 x i16> %rhs) {
+; CHECK: @test13e
+; CHECK-NEXT: ret <4 x i16> %rhs
+ %A = select <4 x i1> <i1 false, i1 false, i1 false, i1 false>,
+ <4 x i16> %lhs, <4 x i16> %rhs
+ ret <4 x i16> %A
+}
diff --git a/test/Transforms/InstCombine/vector_gep2.ll b/test/Transforms/InstCombine/vector_gep2.ll
new file mode 100644
index 000000000000..20165b110016
--- /dev/null
+++ b/test/Transforms/InstCombine/vector_gep2.ll
@@ -0,0 +1,11 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define <2 x i8*> @testa(<2 x i8*> %a) {
+; CHECK: @testa
+ %g = getelementptr <2 x i8*> %a, <2 x i32> <i32 0, i32 1>
+; CHECK: getelementptr <2 x i8*> %a, <2 x i64> <i64 0, i64 1>
+ ret <2 x i8*> %g
+}
diff --git a/test/Transforms/InstCombine/weak-symbols.ll b/test/Transforms/InstCombine/weak-symbols.ll
new file mode 100644
index 000000000000..0039b5962f74
--- /dev/null
+++ b/test/Transforms/InstCombine/weak-symbols.ll
@@ -0,0 +1,33 @@
+; PR4738 - Test that the library call simplifier doesn't assume anything about
+; weak symbols.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@real_init = weak_odr constant [2 x i8] c"y\00"
+@fake_init = weak constant [2 x i8] c"y\00"
+@.str = private constant [2 x i8] c"y\00"
+
+define i32 @foo() nounwind {
+; CHECK: define i32 @foo
+; CHECK: call i32 @strcmp
+; CHECK: ret i32 %temp1
+
+entry:
+ %str1 = getelementptr inbounds [2 x i8]* @fake_init, i64 0, i64 0
+ %str2 = getelementptr inbounds [2 x i8]* @.str, i64 0, i64 0
+ %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+ ret i32 %temp1
+}
+
+define i32 @bar() nounwind {
+; CHECK: define i32 @bar
+; CHECK: ret i32 0
+
+entry:
+ %str1 = getelementptr inbounds [2 x i8]* @real_init, i64 0, i64 0
+ %str2 = getelementptr inbounds [2 x i8]* @.str, i64 0, i64 0
+ %temp1 = call i32 @strcmp(i8* %str1, i8* %str2) nounwind readonly
+ ret i32 %temp1
+}
+
+declare i32 @strcmp(i8*, i8*) nounwind readonly
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index ced74bd4be9b..ce2bb799c813 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -266,6 +266,15 @@ define i1 @add5(i32 %x, i32 %y) {
; CHECK: ret i1 true
}
+define i1 @add6(i64 %A, i64 %B) {
+; CHECK: @add6
+ %s1 = add i64 %A, %B
+ %s2 = add i64 %B, %A
+ %cmp = icmp eq i64 %s1, %s2
+ ret i1 %cmp
+; CHECK: ret i1 true
+}
+
define i1 @addpowtwo(i32 %x, i32 %y) {
; CHECK: @addpowtwo
%l = lshr i32 %x, 1
diff --git a/test/Transforms/Internalize/2008-05-09-AllButMain.ll b/test/Transforms/Internalize/2008-05-09-AllButMain.ll
index a85e834582d7..c07abb0c6365 100644
--- a/test/Transforms/Internalize/2008-05-09-AllButMain.ll
+++ b/test/Transforms/Internalize/2008-05-09-AllButMain.ll
@@ -1,27 +1,55 @@
-; No arguments means internalize all but main
-; RUN: opt < %s -internalize -S | grep internal | count 4
+; No arguments means internalize everything
+; RUN: opt < %s -internalize -S | FileCheck --check-prefix=NOARGS %s
+
; Internalize all but foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | grep internal | count 3
-; Non existent files should be treated as if they were empty (so internalize all but main)
-; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 4
-; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null -S | grep internal | count 3
+; RUN: opt < %s -internalize -internalize-public-api-list foo -internalize-public-api-list j -S | FileCheck --check-prefix=LIST %s
+
+; Non existent files should be treated as if they were empty (so internalize
+; everything)
+; RUN: opt < %s -internalize -internalize-public-api-file /nonexistent/file 2> /dev/null -S | FileCheck --check-prefix=EMPTYFILE %s
+
+; RUN: opt < %s -S -internalize -internalize-public-api-list bar -internalize-public-api-list foo -internalize-public-api-file /nonexistent/file 2> /dev/null | FileCheck --check-prefix=LIST2 %s
+
; -file and -list options should be merged, the .apifile contains foo and j
-; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | grep internal | count 2
+; RUN: opt < %s -internalize -internalize-public-api-list bar -internalize-public-api-file %s.apifile -S | FileCheck --check-prefix=MERGE %s
+
+; NOARGS: @i = internal global
+; LIST: @i = internal global
+; EMPTYFILE: @i = internal global
+; LIST2: @i = internal global
+; MERGE: @i = internal global
+@i = global i32 0
-@i = weak global i32 0 ; <i32*> [#uses=0]
-@j = weak global i32 0 ; <i32*> [#uses=0]
+; NOARGS: @j = internal global
+; LIST: @j = global
+; EMPTYFILE: @j = internal global
+; LIST2: @j = internal global
+; MERGE: @j = global
+@j = global i32 0
-define void @main(...) {
-entry:
+; NOARGS: define internal void @main
+; LIST: define internal void @main
+; EMPTYFILE: define internal void @main
+; LIST2: define internal void @main
+; MERGE: define internal void @main
+define void @main() {
ret void
}
-define void @foo(...) {
-entry:
+; NOARGS: define internal void @foo
+; LIST: define void @foo
+; EMPTYFILE: define internal void @foo
+; LIST2: define void @foo
+; MERGE: define void @foo
+define void @foo() {
ret void
}
-define void @bar(...) {
-entry:
+; NOARGS: define internal void @bar
+; LIST: define internal void @bar
+; EMPTYFILE: define internal void @bar
+; LIST2: define void @bar
+; MERGE: define void @bar
+define void @bar() {
ret void
}
diff --git a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
index 7b18a04e1160..47cf3f0373e4 100644
--- a/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
+++ b/test/Transforms/Internalize/2009-01-05-InternalizeAliases.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -internalize -S | grep internal | count 3
+; RUN: opt < %s -internalize -internalize-public-api-list main -S | grep internal | count 3
@A = global i32 0
@B = alias i32* @A
diff --git a/test/Transforms/JumpThreading/crash.ll b/test/Transforms/JumpThreading/crash.ll
index b9c03544db81..2fe87464c117 100644
--- a/test/Transforms/JumpThreading/crash.ll
+++ b/test/Transforms/JumpThreading/crash.ll
@@ -511,3 +511,56 @@ lbl_260: ; preds = %for.cond, %entry
if.end: ; preds = %for.cond
ret void
}
+
+define void @PR14233(i1 %cmp, i1 %cmp2, i1 %cmp3, i1 %cmp4) {
+entry:
+ br i1 %cmp, label %cond.true, label %cond.false
+
+cond.true:
+ br label %if.end
+
+cond.false:
+ br label %if.end
+
+if.end:
+ %A = phi i64 [ 0, %cond.true ], [ 1, %cond.false ]
+ br i1 %cmp2, label %bb, label %if.end2
+
+bb:
+ br label %if.end2
+
+if.end2:
+ %B = phi i64 [ ptrtoint (i8* ()* @PR14233.f1 to i64), %bb ], [ %A, %if.end ]
+ %cmp.ptr = icmp eq i64 %B, ptrtoint (i8* ()* @PR14233.f2 to i64)
+ br i1 %cmp.ptr, label %cond.true2, label %if.end3
+
+cond.true2:
+ br i1 %cmp3, label %bb2, label %ur
+
+bb2:
+ br i1 %cmp4, label %if.end4, label %if.end3
+
+if.end4:
+ unreachable
+
+if.end3:
+ %cmp.ptr2 = icmp eq i64 %B, ptrtoint (i8* ()* @PR14233.f2 to i64)
+ br i1 %cmp.ptr2, label %ur, label %if.then601
+
+if.then601:
+ %C = icmp eq i64 %B, 0
+ br i1 %C, label %bb3, label %bb4
+
+bb3:
+ unreachable
+
+bb4:
+ unreachable
+
+ur:
+ unreachable
+}
+
+declare i8* @PR14233.f1()
+
+declare i8* @PR14233.f2()
diff --git a/test/Transforms/JumpThreading/select.ll b/test/Transforms/JumpThreading/select.ll
index 8a81857736a7..9676efec9df2 100644
--- a/test/Transforms/JumpThreading/select.ll
+++ b/test/Transforms/JumpThreading/select.ll
@@ -121,3 +121,39 @@ L4:
call void @quux()
br label %L0
}
+
+; Make sure the edge value of %0 from entry to L2 includes 0 and L3 is
+; reachable.
+; CHECK: test_switch_default
+; CHECK: entry:
+; CHECK: load
+; CHECK: switch
+; CHECK: [[THREADED:[A-Za-z.0-9]+]]:
+; CHECK: store
+; CHECK: br
+; CHECK: L2:
+; CHECK: icmp
+define void @test_switch_default(i32* nocapture %status) nounwind {
+entry:
+ %0 = load i32* %status, align 4
+ switch i32 %0, label %L2 [
+ i32 5061, label %L1
+ i32 0, label %L2
+ ]
+
+L1:
+ store i32 10025, i32* %status, align 4
+ br label %L2
+
+L2:
+ %1 = load i32* %status, align 4
+ %cmp57.i = icmp eq i32 %1, 0
+ br i1 %cmp57.i, label %L3, label %L4
+
+L3:
+ store i32 10000, i32* %status, align 4
+ br label %L4
+
+L4:
+ ret void
+}
diff --git a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
index 67c3951d74e4..fe8d44531322 100644
--- a/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
+++ b/test/Transforms/LICM/2003-12-11-SinkingToPHI.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -licm | lli
+; RUN: opt < %s -licm | lli %defaultjit
define i32 @main() {
entry:
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 6f28d53af66e..98f93345e3c3 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -29,7 +29,7 @@ Out: ; preds = %LoopTail
}
-declare void @foo2(i32)
+declare void @foo2(i32) nounwind
;; It is ok and desirable to hoist this potentially trapping instruction.
@@ -64,3 +64,29 @@ Out: ; preds = %Loop
%C = sub i32 %A, %B ; <i32> [#uses=1]
ret i32 %C
}
+
+; CHECK: @test4
+; CHECK: call
+; CHECK: sdiv
+; CHECK: ret
+define i32 @test4(i32 %x, i32 %y) nounwind uwtable ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %n.01 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ call void @foo_may_call_exit(i32 0)
+ %div = sdiv i32 %x, %y
+ %add = add nsw i32 %n.01, %div
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %n.0.lcssa = phi i32 [ %add, %for.body ]
+ ret i32 %n.0.lcssa
+}
+
+declare void @foo_may_call_exit(i32)
+
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index 46ab7e5542b6..06a5bd90864d 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -383,4 +383,37 @@ for.end: ; preds = %for.inc
}
+define void @PR14241(i32* %s, i64 %size) {
+; Ensure that we don't form a memcpy for strided loops. Briefly, when we taught
+; LoopIdiom about memmove and strided loops, this got miscompiled into a memcpy
+; instead of a memmove. If we get the memmove transform back, this will catch
+; regressions.
+;
+; CHECK: @PR14241
+entry:
+ %end.idx = add i64 %size, -1
+ %end.ptr = getelementptr inbounds i32* %s, i64 %end.idx
+ br label %while.body
+; CHECK-NOT: memcpy
+;
+; FIXME: When we regain the ability to form a memmove here, this test should be
+; reversed and turned into a positive assertion.
+; CHECK-NOT: memmove
+
+while.body:
+ %phi.ptr = phi i32* [ %s, %entry ], [ %next.ptr, %while.body ]
+ %src.ptr = getelementptr inbounds i32* %phi.ptr, i64 1
+ %val = load i32* %src.ptr, align 4
+; CHECK: load
+ %dst.ptr = getelementptr inbounds i32* %phi.ptr, i64 0
+ store i32 %val, i32* %dst.ptr, align 4
+; CHECK: store
+ %next.ptr = getelementptr inbounds i32* %phi.ptr, i64 1
+ %cmp = icmp eq i32* %next.ptr, %end.ptr
+ br i1 %cmp, label %exit, label %while.body
+
+exit:
+ ret void
+; CHECK: ret void
+}
diff --git a/test/Transforms/LoopIdiom/crash.ll b/test/Transforms/LoopIdiom/crash.ll
new file mode 100644
index 000000000000..969adbcd7635
--- /dev/null
+++ b/test/Transforms/LoopIdiom/crash.ll
@@ -0,0 +1,25 @@
+; RUN: opt -basicaa -loop-idiom -S < %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; Don't crash inside DependenceAnalysis
+; PR14219
+define void @test1(i64* %iwork, i64 %x) {
+bb0:
+ %mul116 = mul nsw i64 %x, %x
+ %incdec.ptr6.sum175 = add i64 42, %x
+ %arrayidx135 = getelementptr inbounds i64* %iwork, i64 %incdec.ptr6.sum175
+ br label %bb1
+bb1:
+ %storemerge4226 = phi i64 [ 0, %bb0 ], [ %inc139, %bb1 ]
+ store i64 1, i64* %arrayidx135, align 8
+ %incdec.ptr6.sum176 = add i64 %mul116, %storemerge4226
+ %arrayidx137 = getelementptr inbounds i64* %iwork, i64 %incdec.ptr6.sum176
+ store i64 1, i64* %arrayidx137, align 8
+ %inc139 = add nsw i64 %storemerge4226, 1
+ %cmp131 = icmp sgt i64 %storemerge4226, 42
+ br i1 %cmp131, label %bb2, label %bb1
+bb2:
+ ret void
+}
+
diff --git a/test/Transforms/LoopIdiom/non-canonical-loop.ll b/test/Transforms/LoopIdiom/non-canonical-loop.ll
new file mode 100644
index 000000000000..a6a4f9227f9a
--- /dev/null
+++ b/test/Transforms/LoopIdiom/non-canonical-loop.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -loop-idiom < %s
+; Don't crash
+; PR13892
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test(i32* %currMB) nounwind uwtable {
+entry:
+ br i1 undef, label %start.exit, label %if.then.i
+
+if.then.i: ; preds = %entry
+ unreachable
+
+start.exit: ; preds = %entry
+ indirectbr i8* undef, [label %0, label %for.bodyprime]
+
+; <label>:0 ; preds = %start.exit
+ unreachable
+
+for.bodyprime: ; preds = %for.bodyprime, %start.exit
+ %i.057375 = phi i32 [ 0, %start.exit ], [ %1, %for.bodyprime ]
+ %arrayidx8prime = getelementptr inbounds i32* %currMB, i32 %i.057375
+ store i32 0, i32* %arrayidx8prime, align 4
+ %1 = add i32 %i.057375, 1
+ %cmp5prime = icmp slt i32 %1, 4
+ br i1 %cmp5prime, label %for.bodyprime, label %for.endprime
+
+for.endprime: ; preds = %for.bodyprime
+ br label %for.body23prime
+
+for.body23prime: ; preds = %for.body23prime, %for.endprime
+ br label %for.body23prime
+}
diff --git a/test/Transforms/LoopIdiom/scev-invalidation.ll b/test/Transforms/LoopIdiom/scev-invalidation.ll
new file mode 100644
index 000000000000..a244d9a280b9
--- /dev/null
+++ b/test/Transforms/LoopIdiom/scev-invalidation.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -indvars -loop-idiom < %s
+; PR14214
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @quote_arg() nounwind {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %backslashes.0 = phi i32 [ undef, %entry ], [ %backslashes.2, %for.inc ]
+ %p.0 = phi i8* [ undef, %entry ], [ %incdec.ptr3, %for.inc ]
+ %q.0 = phi i8* [ undef, %entry ], [ %q.2, %for.inc ]
+ %0 = load i8* %p.0, align 1
+ switch i8 %0, label %while.cond.preheader [
+ i8 0, label %for.cond4.preheader
+ i8 92, label %for.inc
+ ]
+
+while.cond.preheader: ; preds = %for.cond
+ %tobool210 = icmp eq i32 %backslashes.0, 0
+ br i1 %tobool210, label %for.inc.loopexit, label %while.body.lr.ph
+
+while.body.lr.ph: ; preds = %while.cond.preheader
+ %1 = add i32 %backslashes.0, -1
+ %2 = zext i32 %1 to i64
+ br label %while.body
+
+for.cond4.preheader: ; preds = %for.cond
+ %tobool57 = icmp eq i32 %backslashes.0, 0
+ br i1 %tobool57, label %for.end10, label %for.body6.lr.ph
+
+for.body6.lr.ph: ; preds = %for.cond4.preheader
+ br label %for.body6
+
+while.body: ; preds = %while.body.lr.ph, %while.body
+ %q.112 = phi i8* [ %q.0, %while.body.lr.ph ], [ %incdec.ptr, %while.body ]
+ %backslashes.111 = phi i32 [ %backslashes.0, %while.body.lr.ph ], [ %dec, %while.body ]
+ %incdec.ptr = getelementptr inbounds i8* %q.112, i64 1
+ store i8 92, i8* %incdec.ptr, align 1
+ %dec = add nsw i32 %backslashes.111, -1
+ %tobool2 = icmp eq i32 %dec, 0
+ br i1 %tobool2, label %while.cond.for.inc.loopexit_crit_edge, label %while.body
+
+while.cond.for.inc.loopexit_crit_edge: ; preds = %while.body
+ %scevgep.sum = add i64 %2, 1
+ %scevgep13 = getelementptr i8* %q.0, i64 %scevgep.sum
+ br label %for.inc.loopexit
+
+for.inc.loopexit: ; preds = %while.cond.for.inc.loopexit_crit_edge, %while.cond.preheader
+ %q.1.lcssa = phi i8* [ %scevgep13, %while.cond.for.inc.loopexit_crit_edge ], [ %q.0, %while.cond.preheader ]
+ br label %for.inc
+
+for.inc: ; preds = %for.inc.loopexit, %for.cond
+ %backslashes.2 = phi i32 [ %backslashes.0, %for.cond ], [ 0, %for.inc.loopexit ]
+ %q.2 = phi i8* [ %q.0, %for.cond ], [ %q.1.lcssa, %for.inc.loopexit ]
+ %incdec.ptr3 = getelementptr inbounds i8* %p.0, i64 1
+ br label %for.cond
+
+for.body6: ; preds = %for.body6.lr.ph, %for.body6
+ %q.39 = phi i8* [ %q.0, %for.body6.lr.ph ], [ %incdec.ptr7, %for.body6 ]
+ %backslashes.38 = phi i32 [ %backslashes.0, %for.body6.lr.ph ], [ %dec9, %for.body6 ]
+ %incdec.ptr7 = getelementptr inbounds i8* %q.39, i64 1
+ store i8 92, i8* %incdec.ptr7, align 1
+ %dec9 = add nsw i32 %backslashes.38, -1
+ %tobool5 = icmp eq i32 %dec9, 0
+ br i1 %tobool5, label %for.cond4.for.end10_crit_edge, label %for.body6
+
+for.cond4.for.end10_crit_edge: ; preds = %for.body6
+ br label %for.end10
+
+for.end10: ; preds = %for.cond4.for.end10_crit_edge, %for.cond4.preheader
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopRotate/multiple-exits.ll b/test/Transforms/LoopRotate/multiple-exits.ll
new file mode 100644
index 000000000000..675d71f60da4
--- /dev/null
+++ b/test/Transforms/LoopRotate/multiple-exits.ll
@@ -0,0 +1,236 @@
+; RUN: opt -S -loop-rotate < %s -verify-loop-info -verify-dom-info | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+; PR7447
+define i32 @test1([100 x i32]* nocapture %a) nounwind readonly {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond1, %entry
+ %sum.0 = phi i32 [ 0, %entry ], [ %sum.1, %for.cond1 ]
+ %i.0 = phi i1 [ true, %entry ], [ false, %for.cond1 ]
+ br i1 %i.0, label %for.cond1, label %return
+
+for.cond1: ; preds = %for.cond, %land.rhs
+ %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.0, %for.cond ]
+ %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond ]
+ %cmp2 = icmp ult i32 %i.1, 100
+ br i1 %cmp2, label %land.rhs, label %for.cond
+
+land.rhs: ; preds = %for.cond1
+ %conv = zext i32 %i.1 to i64
+ %arrayidx = getelementptr inbounds [100 x i32]* %a, i64 0, i64 %conv
+ %0 = load i32* %arrayidx, align 4
+ %add = add i32 %0, %sum.1
+ %cmp4 = icmp ugt i32 %add, 1000
+ %inc = add i32 %i.1, 1
+ br i1 %cmp4, label %return, label %for.cond1
+
+return: ; preds = %for.cond, %land.rhs
+ %retval.0 = phi i32 [ 1000, %land.rhs ], [ %sum.0, %for.cond ]
+ ret i32 %retval.0
+
+; CHECK: @test1
+; CHECK: for.cond1.preheader:
+; CHECK: %sum.04 = phi i32 [ 0, %entry ], [ %sum.1.lcssa, %for.cond.loopexit ]
+; CHECK: br label %for.cond1
+
+; CHECK: for.cond1:
+; CHECK: %sum.1 = phi i32 [ %add, %land.rhs ], [ %sum.04, %for.cond1.preheader ]
+; CHECK: %i.1 = phi i32 [ %inc, %land.rhs ], [ 0, %for.cond1.preheader ]
+; CHECK: %cmp2 = icmp ult i32 %i.1, 100
+; CHECK: br i1 %cmp2, label %land.rhs, label %for.cond.loopexit
+}
+
+define void @test2(i32 %x) nounwind {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %if.end, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %if.end ]
+ %cmp = icmp eq i32 %i.0, %x
+ br i1 %cmp, label %return.loopexit, label %for.body
+
+for.body: ; preds = %for.cond
+ %call = tail call i32 @foo(i32 %i.0) nounwind
+ %tobool = icmp eq i32 %call, 0
+ br i1 %tobool, label %if.end, label %a
+
+if.end: ; preds = %for.body
+ %call1 = tail call i32 @foo(i32 42) nounwind
+ %inc = add i32 %i.0, 1
+ br label %for.cond
+
+a: ; preds = %for.body
+ %call2 = tail call i32 @bar(i32 1) nounwind
+ br label %return
+
+return.loopexit: ; preds = %for.cond
+ br label %return
+
+return: ; preds = %return.loopexit, %a
+ ret void
+
+; CHECK: @test2
+; CHECK: if.end:
+; CHECK: %inc = add i32 %i.02, 1
+; CHECK: %cmp = icmp eq i32 %inc, %x
+; CHECK: br i1 %cmp, label %for.cond.return.loopexit_crit_edge, label %for.body
+}
+
+declare i32 @foo(i32)
+
+declare i32 @bar(i32)
+
+@_ZTIi = external constant i8*
+
+; Verify dominators.
+define void @test3(i32 %x) {
+entry:
+ %cmp2 = icmp eq i32 0, %x
+ br i1 %cmp2, label %try.cont.loopexit, label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.inc
+ %i.03 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.inc ]
+ invoke void @_Z3fooi(i32 %i.03)
+ to label %for.inc unwind label %lpad
+
+for.inc: ; preds = %for.body
+ %inc = add i32 %i.03, 1
+ %cmp = icmp eq i32 %inc, %x
+ br i1 %cmp, label %for.cond.try.cont.loopexit_crit_edge, label %for.body
+
+lpad: ; preds = %for.body
+ %0 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %1 = extractvalue { i8*, i32 } %0, 0
+ %2 = extractvalue { i8*, i32 } %0, 1
+ %3 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*)) nounwind
+ %matches = icmp eq i32 %2, %3
+ br i1 %matches, label %catch, label %eh.resume
+
+catch: ; preds = %lpad
+ %4 = tail call i8* @__cxa_begin_catch(i8* %1) nounwind
+ br i1 true, label %invoke.cont2.loopexit, label %for.body.i.lr.ph
+
+for.body.i.lr.ph: ; preds = %catch
+ br label %for.body.i
+
+for.body.i: ; preds = %for.body.i.lr.ph, %for.inc.i
+ %i.0.i1 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc.i, %for.inc.i ]
+ invoke void @_Z3fooi(i32 %i.0.i1)
+ to label %for.inc.i unwind label %lpad.i
+
+for.inc.i: ; preds = %for.body.i
+ %inc.i = add i32 %i.0.i1, 1
+ %cmp.i = icmp eq i32 %inc.i, 0
+ br i1 %cmp.i, label %for.cond.i.invoke.cont2.loopexit_crit_edge, label %for.body.i
+
+lpad.i: ; preds = %for.body.i
+ %5 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %6 = extractvalue { i8*, i32 } %5, 0
+ %7 = extractvalue { i8*, i32 } %5, 1
+ %matches.i = icmp eq i32 %7, %3
+ br i1 %matches.i, label %catch.i, label %lpad1.body
+
+catch.i: ; preds = %lpad.i
+ %8 = tail call i8* @__cxa_begin_catch(i8* %6) nounwind
+ invoke void @test3(i32 0)
+ to label %invoke.cont2.i unwind label %lpad1.i
+
+invoke.cont2.i: ; preds = %catch.i
+ tail call void @__cxa_end_catch() nounwind
+ br label %invoke.cont2
+
+lpad1.i: ; preds = %catch.i
+ %9 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ %10 = extractvalue { i8*, i32 } %9, 0
+ %11 = extractvalue { i8*, i32 } %9, 1
+ tail call void @__cxa_end_catch() nounwind
+ br label %lpad1.body
+
+for.cond.i.invoke.cont2.loopexit_crit_edge: ; preds = %for.inc.i
+ br label %invoke.cont2.loopexit
+
+invoke.cont2.loopexit: ; preds = %for.cond.i.invoke.cont2.loopexit_crit_edge, %catch
+ br label %invoke.cont2
+
+invoke.cont2: ; preds = %invoke.cont2.loopexit, %invoke.cont2.i
+ tail call void @__cxa_end_catch() nounwind
+ br label %try.cont
+
+for.cond.try.cont.loopexit_crit_edge: ; preds = %for.inc
+ br label %try.cont.loopexit
+
+try.cont.loopexit: ; preds = %for.cond.try.cont.loopexit_crit_edge, %entry
+ br label %try.cont
+
+try.cont: ; preds = %try.cont.loopexit, %invoke.cont2
+ ret void
+
+lpad1.body: ; preds = %lpad1.i, %lpad.i
+ %exn.slot.0.i = phi i8* [ %10, %lpad1.i ], [ %6, %lpad.i ]
+ %ehselector.slot.0.i = phi i32 [ %11, %lpad1.i ], [ %7, %lpad.i ]
+ tail call void @__cxa_end_catch() nounwind
+ br label %eh.resume
+
+eh.resume: ; preds = %lpad1.body, %lpad
+ %exn.slot.0 = phi i8* [ %exn.slot.0.i, %lpad1.body ], [ %1, %lpad ]
+ %ehselector.slot.0 = phi i32 [ %ehselector.slot.0.i, %lpad1.body ], [ %2, %lpad ]
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+ %lpad.val5 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+ resume { i8*, i32 } %lpad.val5
+}
+
+declare void @_Z3fooi(i32)
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*) nounwind readnone
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+define void @test4() nounwind uwtable {
+entry:
+ br label %"7"
+
+"3": ; preds = %"7"
+ br i1 undef, label %"31", label %"4"
+
+"4": ; preds = %"3"
+ %. = select i1 undef, float 0x3F50624DE0000000, float undef
+ %0 = add i32 %1, 1
+ br label %"7"
+
+"7": ; preds = %"4", %entry
+ %1 = phi i32 [ %0, %"4" ], [ 0, %entry ]
+ %2 = icmp slt i32 %1, 100
+ br i1 %2, label %"3", label %"8"
+
+"8": ; preds = %"7"
+ br i1 undef, label %"9", label %"31"
+
+"9": ; preds = %"8"
+ br label %"33"
+
+"27": ; preds = %"31"
+ unreachable
+
+"31": ; preds = %"8", %"3"
+ br i1 undef, label %"27", label %"32"
+
+"32": ; preds = %"31"
+ br label %"33"
+
+"33": ; preds = %"32", %"9"
+ ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll b/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
index a6996a81fb07..af3a53708b49 100644
--- a/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
+++ b/test/Transforms/LoopStrengthReduce/2011-10-03-CritEdgeMerge.ll
@@ -1,15 +1,15 @@
; RUN: opt -loop-reduce -S < %s | FileCheck %s
;
; Test LSR's use of SplitCriticalEdge during phi rewriting.
-; Verify that identical edges are merged. rdar://problem/6453893
target triple = "x86-apple-darwin"
-; CHECK: @test
+; Verify that identical edges are merged. rdar://problem/6453893
+; CHECK: @test1
; CHECK: bb89:
; CHECK: phi i8* [ %lsr.iv.next1, %bbA.bb89_crit_edge ], [ %lsr.iv.next1, %bbB.bb89_crit_edge ]{{$}}
-define i8* @test() {
+define i8* @test1() {
entry:
br label %loop
@@ -41,3 +41,41 @@ bb89:
exit:
ret i8* %tmp75phi
}
+
+; Handle single-predecessor phis: PR13756
+; CHECK: @test2
+; CHECK: bb89:
+; CHECK: phi i8* [ %lsr.iv.next1, %bbA ], [ %lsr.iv.next1, %bbA ], [ %lsr.iv.next1, %bbA ]{{$}}
+define i8* @test2() {
+entry:
+ br label %loop
+
+loop:
+ %rec = phi i32 [ %next, %loop ], [ 0, %entry ]
+ %next = add i32 %rec, 1
+ %tmp75 = getelementptr i8* null, i32 %next
+ br i1 false, label %loop, label %loopexit
+
+loopexit:
+ br i1 false, label %bbA, label %bbB
+
+bbA:
+ switch i32 0, label %bb89 [
+ i32 47, label %bb89
+ i32 58, label %bb89
+ ]
+
+bbB:
+ switch i8 0, label %exit [
+ i8 47, label %exit
+ i8 58, label %exit
+ ]
+
+bb89:
+ %tmp75phi = phi i8* [ %tmp75, %bbA ], [ %tmp75, %bbA ], [ %tmp75, %bbA ]
+ br label %exit
+
+exit:
+ %result = phi i8* [ %tmp75phi, %bb89 ], [ %tmp75, %bbB ], [ %tmp75, %bbB ], [ %tmp75, %bbB ]
+ ret i8* %result
+}
diff --git a/test/Transforms/LoopUnroll/pr11361.ll b/test/Transforms/LoopUnroll/pr11361.ll
index 7ce7f5fe4600..62de2f728d23 100644
--- a/test/Transforms/LoopUnroll/pr11361.ll
+++ b/test/Transforms/LoopUnroll/pr11361.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unroll -disable-output
+; RUN: opt -loop-unroll -disable-output < %s
; PR11361
; This tests for an iterator invalidation issue.
diff --git a/test/Transforms/LoopUnroll/pr14167.ll b/test/Transforms/LoopUnroll/pr14167.ll
new file mode 100644
index 000000000000..205ae44b72e4
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr14167.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -loop-unroll -unroll-runtime | FileCheck %s
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v128:128:128-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @test1() nounwind {
+; Ensure that we don't crash when the trip count == -1.
+; CHECK: @test1
+entry:
+ br label %for.cond2.preheader
+
+for.cond2.preheader: ; preds = %for.end, %entry
+ br i1 false, label %middle.block, label %vector.ph
+
+vector.ph: ; preds = %for.cond2.preheader
+ br label %vector.body
+
+vector.body: ; preds = %vector.body, %vector.ph
+ br i1 undef, label %middle.block.loopexit, label %vector.body
+
+middle.block.loopexit: ; preds = %vector.body
+ br label %middle.block
+
+middle.block: ; preds = %middle.block.loopexit, %for.cond2.preheader
+ br i1 true, label %for.end, label %scalar.preheader
+
+scalar.preheader: ; preds = %middle.block
+ br label %for.body4
+
+for.body4: ; preds = %for.body4, %scalar.preheader
+ %indvars.iv = phi i64 [ 16000, %scalar.preheader ], [ %indvars.iv.next, %for.body4 ]
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, 16000
+ br i1 %exitcond, label %for.body4, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body4
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %middle.block
+ br i1 undef, label %for.cond2.preheader, label %for.end15
+
+for.end15: ; preds = %for.end
+ ret void
+}
diff --git a/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
index 61c54ddb156b..609520064a7a 100644
--- a/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
+++ b/test/Transforms/LoopUnswitch/2011-06-02-CritSwitch.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-unswitch -disable-output
+; RUN: opt -loop-unswitch -disable-output < %s
; PR10031
define i32 @test(i32 %command) {
diff --git a/test/Transforms/LoopVectorize/2012-10-20-infloop.ll b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
new file mode 100644
index 000000000000..0176c9a18966
--- /dev/null
+++ b/test/Transforms/LoopVectorize/2012-10-20-infloop.ll
@@ -0,0 +1,27 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce
+
+; Check that we don't fall into an infinite loop.
+define void @test() nounwind {
+entry:
+ br label %for.body
+
+for.body:
+ %0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
+ br label %for.body
+}
+
+
+
+define void @test2() nounwind {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv47 = phi i64 [ 0, %entry ], [ %indvars.iv.next48, %for.body ]
+ %0 = phi i32 [ 1, %entry ], [ 0, %for.body ]
+ %indvars.iv.next48 = add i64 %indvars.iv47, 1
+ br i1 undef, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ unreachable
+}
diff --git a/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
new file mode 100644
index 000000000000..2516e248bc96
--- /dev/null
+++ b/test/Transforms/LoopVectorize/2012-10-22-isconsec.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -loop-vectorize -dce -force-vector-width=4
+
+; Check that we don't crash.
+
+target datalayout = "e-p:64:64:64-S128-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f16:16:16-f32:32:32-f64:64:64-f128:128:128-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+module asm "\09.ident\09\22GCC: (GNU) 4.6.3 LLVM: 3.2svn\22"
+
+@b = common global [32000 x float] zeroinitializer, align 16
+
+define i32 @set1ds(i32 %_n, float* nocapture %arr, float %value, i32 %stride) nounwind uwtable {
+entry:
+ %0 = icmp sgt i32 %_n, 0
+ br i1 %0, label %"3.lr.ph", label %"5"
+
+"3.lr.ph": ; preds = %entry
+ %1 = bitcast float* %arr to i8*
+ %2 = sext i32 %stride to i64
+ br label %"3"
+
+"3": ; preds = %"3.lr.ph", %"3"
+ %indvars.iv = phi i64 [ 0, %"3.lr.ph" ], [ %indvars.iv.next, %"3" ]
+ %3 = shl nsw i64 %indvars.iv, 2
+ %4 = getelementptr inbounds i8* %1, i64 %3
+ %5 = bitcast i8* %4 to float*
+ store float %value, float* %5, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, %2
+ %6 = trunc i64 %indvars.iv.next to i32
+ %7 = icmp slt i32 %6, %_n
+ br i1 %7, label %"3", label %"5"
+
+"5": ; preds = %"3", %entry
+ ret i32 0
+}
+
+define i32 @init(i8* nocapture %name) unnamed_addr nounwind uwtable {
+entry:
+ br label %"3"
+
+"3": ; preds = %"3", %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %"3" ]
+ %0 = shl nsw i64 %indvars.iv, 2
+ %1 = getelementptr inbounds i8* bitcast (float* getelementptr inbounds ([32000 x float]* @b, i64 0, i64 16000) to i8*), i64 %0
+ %2 = bitcast i8* %1 to float*
+ store float -1.000000e+00, float* %2, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 16000
+ br i1 %exitcond, label %"5", label %"3"
+
+"5": ; preds = %"3"
+ ret i32 0
+}
+
+!0 = metadata !{metadata !"alias set 7: float", metadata !1}
+!1 = metadata !{metadata !1}
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
new file mode 100644
index 000000000000..a2d176a534c9
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @read_mod_write_single_ptr
+;CHECK: load <8 x float>
+;CHECK: ret i32
+define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds float* %a, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = fmul float %3, 3.000000e+00
+ store float %4, float* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
+
+
+;CHECK: @read_mod_i64
+;CHECK: load <8 x i64>
+;CHECK: ret i32
+define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i64* %a, i64 %indvars.iv
+ %3 = load i64* %2, align 4
+ %4 = mul i64 %3, 3
+ store i64 %4, i64* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/conversion-cost.ll b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
new file mode 100644
index 000000000000..8f1bb545fa01
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/conversion-cost.ll
@@ -0,0 +1,48 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @conversion_cost1
+;CHECK: store <2 x i8>
+;CHECK: ret
+define i32 @conversion_cost1(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 3
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 3, %0 ]
+ %2 = trunc i64 %indvars.iv to i8
+ %3 = getelementptr inbounds i8* %A, i64 %indvars.iv
+ store i8 %2, i8* %3, align 1
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
+
+;CHECK: @conversion_cost2
+;CHECK: <2 x float>
+;CHECK: ret
+define i32 @conversion_cost2(i32 %n, i8* nocapture %A, float* nocapture %B) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 9
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
+ %2 = add nsw i64 %indvars.iv, 3
+ %3 = trunc i64 %2 to i32
+ %4 = sitofp i32 %3 to float
+ %5 = getelementptr inbounds float* %B, i64 %indvars.iv
+ store float %4, float* %5, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
new file mode 100644
index 000000000000..628f9912c8c9
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@c = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: cost_model_1
+;CHECK: <4 x i32>
+;CHECK: ret void
+define void @cost_model_1() nounwind uwtable noinline ssp {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %0 = shl nsw i64 %indvars.iv, 1
+ %arrayidx = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %0
+ %1 = load i32* %arrayidx, align 8
+ %idxprom1 = sext i32 %1 to i64
+ %arrayidx2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %idxprom1
+ %2 = load i32* %arrayidx2, align 4
+ %arrayidx4 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
+ %3 = load i32* %arrayidx4, align 4
+ %idxprom5 = sext i32 %3 to i64
+ %arrayidx6 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %idxprom5
+ store i32 %2, i32* %arrayidx6, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/gcc-examples.ll b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
new file mode 100644
index 000000000000..574c529834ac
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/gcc-examples.ll
@@ -0,0 +1,62 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; Select VF = 8;
+;CHECK: @example1
+;CHECK: load <8 x i32>
+;CHECK: add nsw <8 x i32>
+;CHECK: store <8 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
+
+; Select VF=4 because sext <8 x i1> to <8 x i32> is expensive.
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+ %3 = load i16* %2, align 2
+ %4 = sext i16 %3 to i32
+ %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ store i32 %4, i32* %5, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %6, label %1
+
+; <label>:6 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/lit.local.cfg b/test/Transforms/LoopVectorize/X86/lit.local.cfg
new file mode 100644
index 000000000000..a8ad0f1a28b2
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/LoopVectorize/cpp-new-array.ll b/test/Transforms/LoopVectorize/cpp-new-array.ll
new file mode 100644
index 000000000000..26902eba9e29
--- /dev/null
+++ b/test/Transforms/LoopVectorize/cpp-new-array.ll
@@ -0,0 +1,46 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @cpp_new_arrays
+;CHECK: insertelement <4 x i32>
+;CHECK: load <4 x float>
+;CHECK: fadd <4 x float>
+;CHECK: ret i32
+define i32 @cpp_new_arrays() uwtable ssp {
+entry:
+ %call = call noalias i8* @_Znwm(i64 4)
+ %0 = bitcast i8* %call to float*
+ store float 1.000000e+03, float* %0, align 4
+ %call1 = call noalias i8* @_Znwm(i64 4)
+ %1 = bitcast i8* %call1 to float*
+ store float 1.000000e+03, float* %1, align 4
+ %call3 = call noalias i8* @_Znwm(i64 4)
+ %2 = bitcast i8* %call3 to float*
+ store float 1.000000e+03, float* %2, align 4
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %idxprom = sext i32 %i.01 to i64
+ %arrayidx = getelementptr inbounds float* %0, i64 %idxprom
+ %3 = load float* %arrayidx, align 4
+ %idxprom5 = sext i32 %i.01 to i64
+ %arrayidx6 = getelementptr inbounds float* %1, i64 %idxprom5
+ %4 = load float* %arrayidx6, align 4
+ %add = fadd float %3, %4
+ %idxprom7 = sext i32 %i.01 to i64
+ %arrayidx8 = getelementptr inbounds float* %2, i64 %idxprom7
+ store float %add, float* %arrayidx8, align 4
+ %inc = add nsw i32 %i.01, 1
+ %cmp = icmp slt i32 %inc, 1000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ %5 = load float* %2, align 4
+ %conv10 = fptosi float %5 to i32
+ ret i32 %conv10
+}
+
+declare noalias i8* @_Znwm(i64)
diff --git a/test/Transforms/LoopVectorize/flags.ll b/test/Transforms/LoopVectorize/flags.ll
new file mode 100644
index 000000000000..2f22a764572f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/flags.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @flags1
+;CHECK: load <4 x i32>
+;CHECK: mul nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret i32
+define i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 9
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = mul nsw i32 %3, 3
+ store i32 %4, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
+
+
+;CHECK: @flags2
+;CHECK: load <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret i32
+define i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 9
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = mul i32 %3, 3
+ store i32 %4, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
new file mode 100644
index 000000000000..fce29d240487
--- /dev/null
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -0,0 +1,650 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+@G = common global [32 x [1024 x i32]] zeroinitializer, align 16
+@ub = common global [1024 x i32] zeroinitializer, align 16
+@uc = common global [1024 x i32] zeroinitializer, align 16
+@d = common global [2048 x i32] zeroinitializer, align 16
+@fa = common global [1024 x float] zeroinitializer, align 16
+@fb = common global [1024 x float] zeroinitializer, align 16
+@ic = common global [1024 x i32] zeroinitializer, align 16
+@da = common global [1024 x float] zeroinitializer, align 16
+@db = common global [1024 x float] zeroinitializer, align 16
+@dc = common global [1024 x float] zeroinitializer, align 16
+@dd = common global [1024 x float] zeroinitializer, align 16
+@dj = common global [1024 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
+;CHECK: @example2
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example2(i32 %n, i32 %x) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph5, label %.preheader
+
+..preheader_crit_edge: ; preds = %.lr.ph5
+ %phitmp = sext i32 %n to i64
+ br label %.preheader
+
+.preheader: ; preds = %..preheader_crit_edge, %0
+ %i.0.lcssa = phi i64 [ %phitmp, %..preheader_crit_edge ], [ 0, %0 ]
+ %2 = icmp eq i32 %n, 0
+ br i1 %2, label %._crit_edge, label %.lr.ph
+
+.lr.ph5: ; preds = %0, %.lr.ph5
+ %indvars.iv6 = phi i64 [ %indvars.iv.next7, %.lr.ph5 ], [ 0, %0 ]
+ %3 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv6
+ store i32 %x, i32* %3, align 4
+ %indvars.iv.next7 = add i64 %indvars.iv6, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next7 to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %..preheader_crit_edge, label %.lr.ph5
+
+.lr.ph: ; preds = %.preheader, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ %i.0.lcssa, %.preheader ]
+ %.02 = phi i32 [ %4, %.lr.ph ], [ %n, %.preheader ]
+ %4 = add nsw i32 %.02, -1
+ %5 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %6 = load i32* %5, align 4
+ %7 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %8 = load i32* %7, align 4
+ %9 = and i32 %8, %6
+ %10 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %9, i32* %10, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %11 = icmp eq i32 %4, 0
+ br i1 %11, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %.preheader
+ ret void
+}
+
+; We can't vectorize this loop because it has non constant loop bounds.
+;CHECK: @example3
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example3(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+ %1 = icmp eq i32 %n, 0
+ br i1 %1, label %._crit_edge, label %.lr.ph
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %.05 = phi i32 [ %2, %.lr.ph ], [ %n, %0 ]
+ %.014 = phi i32* [ %5, %.lr.ph ], [ %p, %0 ]
+ %.023 = phi i32* [ %3, %.lr.ph ], [ %q, %0 ]
+ %2 = add nsw i32 %.05, -1
+ %3 = getelementptr inbounds i32* %.023, i64 1
+ %4 = load i32* %.023, align 16
+ %5 = getelementptr inbounds i32* %.014, i64 1
+ store i32 %4, i32* %.014, align 16
+ %6 = icmp eq i32 %2, 0
+ br i1 %6, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret void
+}
+
+;CHECK: @example4
+;CHECK: load <4 x i32>
+;CHECK: ret void
+define void @example4(i32 %n, i32* noalias nocapture %p, i32* noalias nocapture %q) nounwind uwtable ssp {
+ %1 = add nsw i32 %n, -1
+ %2 = icmp eq i32 %n, 0
+ br i1 %2, label %.preheader4, label %.lr.ph10
+
+.preheader4: ; preds = %0
+ %3 = icmp sgt i32 %1, 0
+ br i1 %3, label %.lr.ph6, label %._crit_edge
+
+.lr.ph10: ; preds = %0, %.lr.ph10
+ %4 = phi i32 [ %9, %.lr.ph10 ], [ %1, %0 ]
+ %.018 = phi i32* [ %8, %.lr.ph10 ], [ %p, %0 ]
+ %.027 = phi i32* [ %5, %.lr.ph10 ], [ %q, %0 ]
+ %5 = getelementptr inbounds i32* %.027, i64 1
+ %6 = load i32* %.027, align 16
+ %7 = add nsw i32 %6, 5
+ %8 = getelementptr inbounds i32* %.018, i64 1
+ store i32 %7, i32* %.018, align 16
+ %9 = add nsw i32 %4, -1
+ %10 = icmp eq i32 %4, 0
+ br i1 %10, label %._crit_edge, label %.lr.ph10
+
+.preheader: ; preds = %.lr.ph6
+ br i1 %3, label %.lr.ph, label %._crit_edge
+
+.lr.ph6: ; preds = %.preheader4, %.lr.ph6
+ %indvars.iv11 = phi i64 [ %indvars.iv.next12, %.lr.ph6 ], [ 0, %.preheader4 ]
+ %indvars.iv.next12 = add i64 %indvars.iv11, 1
+ %11 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv.next12
+ %12 = load i32* %11, align 4
+ %13 = add nsw i64 %indvars.iv11, 3
+ %14 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %13
+ %15 = load i32* %14, align 4
+ %16 = add nsw i32 %15, %12
+ %17 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv11
+ store i32 %16, i32* %17, align 4
+ %lftr.wideiv13 = trunc i64 %indvars.iv.next12 to i32
+ %exitcond14 = icmp eq i32 %lftr.wideiv13, %1
+ br i1 %exitcond14, label %.preheader, label %.lr.ph6
+
+.lr.ph: ; preds = %.preheader, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.preheader ]
+ %18 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %19 = load i32* %18, align 4
+ %20 = icmp sgt i32 %19, 4
+ %21 = select i1 %20, i32 4, i32 0
+ %22 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ store i32 %21, i32* %22, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %1
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph10, %.preheader4, %.lr.ph, %.preheader
+ ret void
+}
+
+;CHECK: @example8
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example8(i32 %x) nounwind uwtable ssp {
+ br label %.preheader
+
+.preheader: ; preds = %3, %0
+ %indvars.iv3 = phi i64 [ 0, %0 ], [ %indvars.iv.next4, %3 ]
+ br label %1
+
+; <label>:1 ; preds = %1, %.preheader
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [32 x [1024 x i32]]* @G, i64 0, i64 %indvars.iv3, i64 %indvars.iv
+ store i32 %x, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %3, label %1
+
+; <label>:3 ; preds = %1
+ %indvars.iv.next4 = add i64 %indvars.iv3, 1
+ %lftr.wideiv5 = trunc i64 %indvars.iv.next4 to i32
+ %exitcond6 = icmp eq i32 %lftr.wideiv5, 32
+ br i1 %exitcond6, label %4, label %.preheader
+
+; <label>:4 ; preds = %3
+ ret void
+}
+
+;CHECK: @example9
+;CHECK: phi <4 x i32>
+;CHECK: ret i32
+define i32 @example9() nounwind uwtable readonly ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %diff.01 = phi i32 [ 0, %0 ], [ %7, %1 ]
+ %2 = getelementptr inbounds [1024 x i32]* @ub, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [1024 x i32]* @uc, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add i32 %3, %diff.01
+ %7 = sub i32 %6, %5
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret i32 %7
+}
+
+;CHECK: @example10a
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: load <4 x i16>
+;CHECK: add <4 x i16>
+;CHECK: store <4 x i16>
+;CHECK: ret void
+define void @example10a(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds i32* %ib, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %ic, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %8 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+ %9 = load i16* %8, align 2
+ %10 = getelementptr inbounds i16* %sc, i64 %indvars.iv
+ %11 = load i16* %10, align 2
+ %12 = add i16 %11, %9
+ %13 = getelementptr inbounds i16* %sa, i64 %indvars.iv
+ store i16 %12, i16* %13, align 2
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %14, label %1
+
+; <label>:14 ; preds = %1
+ ret void
+}
+
+;CHECK: @example10b
+;CHECK: load <4 x i16>
+;CHECK: sext <4 x i16>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example10b(i16* noalias nocapture %sa, i16* noalias nocapture %sb, i16* noalias nocapture %sc, i32* noalias nocapture %ia, i32* noalias nocapture %ib, i32* noalias nocapture %ic) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds i16* %sb, i64 %indvars.iv
+ %3 = load i16* %2, align 2
+ %4 = sext i16 %3 to i32
+ %5 = getelementptr inbounds i32* %ia, i64 %indvars.iv
+ store i32 %4, i32* %5, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %6, label %1
+
+; <label>:6 ; preds = %1
+ ret void
+}
+
+;CHECK: @example11
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: load i32
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: insertelement
+;CHECK: ret void
+define void @example11() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = shl nsw i64 %indvars.iv, 1
+ %3 = or i64 %2, 1
+ %4 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %3
+ %5 = load i32* %4, align 4
+ %6 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %3
+ %7 = load i32* %6, align 4
+ %8 = mul nsw i32 %7, %5
+ %9 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %2
+ %10 = load i32* %9, align 8
+ %11 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %2
+ %12 = load i32* %11, align 8
+ %13 = mul nsw i32 %12, %10
+ %14 = sub nsw i32 %8, %13
+ %15 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %14, i32* %15, align 4
+ %16 = mul nsw i32 %7, %10
+ %17 = mul nsw i32 %12, %5
+ %18 = add nsw i32 %17, %16
+ %19 = getelementptr inbounds [2048 x i32]* @d, i64 0, i64 %indvars.iv
+ store i32 %18, i32* %19, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 512
+ br i1 %exitcond, label %20, label %1
+
+; <label>:20 ; preds = %1
+ ret void
+}
+
+;CHECK: @example12
+;CHECK: trunc <4 x i64>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example12() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %3 = trunc i64 %indvars.iv to i32
+ store i32 %3, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %4, label %1
+
+; <label>:4 ; preds = %1
+ ret void
+}
+
+; Can't vectorize because of reductions.
+;CHECK: @example13
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example13(i32** nocapture %A, i32** nocapture %B, i32* nocapture %out) nounwind uwtable ssp {
+ br label %.preheader
+
+.preheader: ; preds = %14, %0
+ %indvars.iv4 = phi i64 [ 0, %0 ], [ %indvars.iv.next5, %14 ]
+ %1 = getelementptr inbounds i32** %A, i64 %indvars.iv4
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32** %B, i64 %indvars.iv4
+ %4 = load i32** %3, align 8
+ br label %5
+
+; <label>:5 ; preds = %.preheader, %5
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %5 ]
+ %diff.02 = phi i32 [ 0, %.preheader ], [ %11, %5 ]
+ %6 = getelementptr inbounds i32* %2, i64 %indvars.iv
+ %7 = load i32* %6, align 4
+ %8 = getelementptr inbounds i32* %4, i64 %indvars.iv
+ %9 = load i32* %8, align 4
+ %10 = add i32 %7, %diff.02
+ %11 = sub i32 %10, %9
+ %indvars.iv.next = add i64 %indvars.iv, 8
+ %12 = trunc i64 %indvars.iv.next to i32
+ %13 = icmp slt i32 %12, 1024
+ br i1 %13, label %5, label %14
+
+; <label>:14 ; preds = %5
+ %15 = getelementptr inbounds i32* %out, i64 %indvars.iv4
+ store i32 %11, i32* %15, align 4
+ %indvars.iv.next5 = add i64 %indvars.iv4, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next5 to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 32
+ br i1 %exitcond, label %16, label %.preheader
+
+; <label>:16 ; preds = %14
+ ret void
+}
+
+; Can't vectorize because of reductions.
+;CHECK: @example14
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example14(i32** nocapture %in, i32** nocapture %coeff, i32* nocapture %out) nounwind uwtable ssp {
+.preheader3:
+ br label %.preheader
+
+.preheader: ; preds = %11, %.preheader3
+ %indvars.iv7 = phi i64 [ 0, %.preheader3 ], [ %indvars.iv.next8, %11 ]
+ %sum.05 = phi i32 [ 0, %.preheader3 ], [ %10, %11 ]
+ br label %0
+
+; <label>:0 ; preds = %0, %.preheader
+ %indvars.iv = phi i64 [ 0, %.preheader ], [ %indvars.iv.next, %0 ]
+ %sum.12 = phi i32 [ %sum.05, %.preheader ], [ %10, %0 ]
+ %1 = getelementptr inbounds i32** %in, i64 %indvars.iv
+ %2 = load i32** %1, align 8
+ %3 = getelementptr inbounds i32* %2, i64 %indvars.iv7
+ %4 = load i32* %3, align 4
+ %5 = getelementptr inbounds i32** %coeff, i64 %indvars.iv
+ %6 = load i32** %5, align 8
+ %7 = getelementptr inbounds i32* %6, i64 %indvars.iv7
+ %8 = load i32* %7, align 4
+ %9 = mul nsw i32 %8, %4
+ %10 = add nsw i32 %9, %sum.12
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %11, label %0
+
+; <label>:11 ; preds = %0
+ %indvars.iv.next8 = add i64 %indvars.iv7, 1
+ %lftr.wideiv9 = trunc i64 %indvars.iv.next8 to i32
+ %exitcond10 = icmp eq i32 %lftr.wideiv9, 32
+ br i1 %exitcond10, label %.preheader3.1, label %.preheader
+
+.preheader3.1: ; preds = %11
+ store i32 %10, i32* %out, align 4
+ br label %.preheader.1
+
+.preheader.1: ; preds = %24, %.preheader3.1
+ %indvars.iv7.1 = phi i64 [ 0, %.preheader3.1 ], [ %indvars.iv.next8.1, %24 ]
+ %sum.05.1 = phi i32 [ 0, %.preheader3.1 ], [ %23, %24 ]
+ br label %12
+
+; <label>:12 ; preds = %12, %.preheader.1
+ %indvars.iv.1 = phi i64 [ 0, %.preheader.1 ], [ %13, %12 ]
+ %sum.12.1 = phi i32 [ %sum.05.1, %.preheader.1 ], [ %23, %12 ]
+ %13 = add nsw i64 %indvars.iv.1, 1
+ %14 = getelementptr inbounds i32** %in, i64 %13
+ %15 = load i32** %14, align 8
+ %16 = getelementptr inbounds i32* %15, i64 %indvars.iv7.1
+ %17 = load i32* %16, align 4
+ %18 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.1
+ %19 = load i32** %18, align 8
+ %20 = getelementptr inbounds i32* %19, i64 %indvars.iv7.1
+ %21 = load i32* %20, align 4
+ %22 = mul nsw i32 %21, %17
+ %23 = add nsw i32 %22, %sum.12.1
+ %lftr.wideiv.1 = trunc i64 %13 to i32
+ %exitcond.1 = icmp eq i32 %lftr.wideiv.1, 1024
+ br i1 %exitcond.1, label %24, label %12
+
+; <label>:24 ; preds = %12
+ %indvars.iv.next8.1 = add i64 %indvars.iv7.1, 1
+ %lftr.wideiv9.1 = trunc i64 %indvars.iv.next8.1 to i32
+ %exitcond10.1 = icmp eq i32 %lftr.wideiv9.1, 32
+ br i1 %exitcond10.1, label %.preheader3.2, label %.preheader.1
+
+.preheader3.2: ; preds = %24
+ %25 = getelementptr inbounds i32* %out, i64 1
+ store i32 %23, i32* %25, align 4
+ br label %.preheader.2
+
+.preheader.2: ; preds = %38, %.preheader3.2
+ %indvars.iv7.2 = phi i64 [ 0, %.preheader3.2 ], [ %indvars.iv.next8.2, %38 ]
+ %sum.05.2 = phi i32 [ 0, %.preheader3.2 ], [ %37, %38 ]
+ br label %26
+
+; <label>:26 ; preds = %26, %.preheader.2
+ %indvars.iv.2 = phi i64 [ 0, %.preheader.2 ], [ %indvars.iv.next.2, %26 ]
+ %sum.12.2 = phi i32 [ %sum.05.2, %.preheader.2 ], [ %37, %26 ]
+ %27 = add nsw i64 %indvars.iv.2, 2
+ %28 = getelementptr inbounds i32** %in, i64 %27
+ %29 = load i32** %28, align 8
+ %30 = getelementptr inbounds i32* %29, i64 %indvars.iv7.2
+ %31 = load i32* %30, align 4
+ %32 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.2
+ %33 = load i32** %32, align 8
+ %34 = getelementptr inbounds i32* %33, i64 %indvars.iv7.2
+ %35 = load i32* %34, align 4
+ %36 = mul nsw i32 %35, %31
+ %37 = add nsw i32 %36, %sum.12.2
+ %indvars.iv.next.2 = add i64 %indvars.iv.2, 1
+ %lftr.wideiv.2 = trunc i64 %indvars.iv.next.2 to i32
+ %exitcond.2 = icmp eq i32 %lftr.wideiv.2, 1024
+ br i1 %exitcond.2, label %38, label %26
+
+; <label>:38 ; preds = %26
+ %indvars.iv.next8.2 = add i64 %indvars.iv7.2, 1
+ %lftr.wideiv9.2 = trunc i64 %indvars.iv.next8.2 to i32
+ %exitcond10.2 = icmp eq i32 %lftr.wideiv9.2, 32
+ br i1 %exitcond10.2, label %.preheader3.3, label %.preheader.2
+
+.preheader3.3: ; preds = %38
+ %39 = getelementptr inbounds i32* %out, i64 2
+ store i32 %37, i32* %39, align 4
+ br label %.preheader.3
+
+.preheader.3: ; preds = %52, %.preheader3.3
+ %indvars.iv7.3 = phi i64 [ 0, %.preheader3.3 ], [ %indvars.iv.next8.3, %52 ]
+ %sum.05.3 = phi i32 [ 0, %.preheader3.3 ], [ %51, %52 ]
+ br label %40
+
+; <label>:40 ; preds = %40, %.preheader.3
+ %indvars.iv.3 = phi i64 [ 0, %.preheader.3 ], [ %indvars.iv.next.3, %40 ]
+ %sum.12.3 = phi i32 [ %sum.05.3, %.preheader.3 ], [ %51, %40 ]
+ %41 = add nsw i64 %indvars.iv.3, 3
+ %42 = getelementptr inbounds i32** %in, i64 %41
+ %43 = load i32** %42, align 8
+ %44 = getelementptr inbounds i32* %43, i64 %indvars.iv7.3
+ %45 = load i32* %44, align 4
+ %46 = getelementptr inbounds i32** %coeff, i64 %indvars.iv.3
+ %47 = load i32** %46, align 8
+ %48 = getelementptr inbounds i32* %47, i64 %indvars.iv7.3
+ %49 = load i32* %48, align 4
+ %50 = mul nsw i32 %49, %45
+ %51 = add nsw i32 %50, %sum.12.3
+ %indvars.iv.next.3 = add i64 %indvars.iv.3, 1
+ %lftr.wideiv.3 = trunc i64 %indvars.iv.next.3 to i32
+ %exitcond.3 = icmp eq i32 %lftr.wideiv.3, 1024
+ br i1 %exitcond.3, label %52, label %40
+
+; <label>:52 ; preds = %40
+ %indvars.iv.next8.3 = add i64 %indvars.iv7.3, 1
+ %lftr.wideiv9.3 = trunc i64 %indvars.iv.next8.3 to i32
+ %exitcond10.3 = icmp eq i32 %lftr.wideiv9.3, 32
+ br i1 %exitcond10.3, label %53, label %.preheader.3
+
+; <label>:53 ; preds = %52
+ %54 = getelementptr inbounds i32* %out, i64 3
+ store i32 %51, i32* %54, align 4
+ ret void
+}
+
+; Can't vectorize because the src and dst pointers are not disjoint.
+;CHECK: @example21
+;CHECK-NOT: <4 x i32>
+;CHECK: ret i32
+define i32 @example21(i32* nocapture %b, i32 %n) nounwind uwtable readonly ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0
+ %2 = sext i32 %n to i64
+ br label %3
+
+; <label>:3 ; preds = %.lr.ph, %3
+ %indvars.iv = phi i64 [ %2, %.lr.ph ], [ %indvars.iv.next, %3 ]
+ %a.02 = phi i32 [ 0, %.lr.ph ], [ %6, %3 ]
+ %indvars.iv.next = add i64 %indvars.iv, -1
+ %4 = getelementptr inbounds i32* %b, i64 %indvars.iv.next
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %a.02
+ %7 = trunc i64 %indvars.iv.next to i32
+ %8 = icmp sgt i32 %7, 0
+ br i1 %8, label %3, label %._crit_edge
+
+._crit_edge: ; preds = %3, %0
+ %a.0.lcssa = phi i32 [ 0, %0 ], [ %6, %3 ]
+ ret i32 %a.0.lcssa
+}
+
+; Can't vectorize because there are multiple PHIs.
+;CHECK: @example23
+;CHECK-NOT: <4 x i32>
+;CHECK: ret void
+define void @example23(i16* nocapture %src, i32* nocapture %dst) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %.04 = phi i16* [ %src, %0 ], [ %2, %1 ]
+ %.013 = phi i32* [ %dst, %0 ], [ %6, %1 ]
+ %i.02 = phi i32 [ 0, %0 ], [ %7, %1 ]
+ %2 = getelementptr inbounds i16* %.04, i64 1
+ %3 = load i16* %.04, align 2
+ %4 = zext i16 %3 to i32
+ %5 = shl nuw nsw i32 %4, 7
+ %6 = getelementptr inbounds i32* %.013, i64 1
+ store i32 %5, i32* %.013, align 4
+ %7 = add nsw i32 %i.02, 1
+ %exitcond = icmp eq i32 %7, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
+;CHECK: @example24
+;CHECK: shufflevector <4 x i16>
+;CHECK: ret void
+define void @example24(i16 signext %x, i16 signext %y) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [1024 x float]* @fa, i64 0, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = getelementptr inbounds [1024 x float]* @fb, i64 0, i64 %indvars.iv
+ %5 = load float* %4, align 4
+ %6 = fcmp olt float %3, %5
+ %x.y = select i1 %6, i16 %x, i16 %y
+ %7 = sext i16 %x.y to i32
+ %8 = getelementptr inbounds [1024 x i32]* @ic, i64 0, i64 %indvars.iv
+ store i32 %7, i32* %8, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %9, label %1
+
+; <label>:9 ; preds = %1
+ ret void
+}
+
+;CHECK: @example25
+;CHECK: and <4 x i1>
+;CHECK: zext <4 x i1>
+;CHECK: ret void
+define void @example25() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [1024 x float]* @da, i64 0, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = getelementptr inbounds [1024 x float]* @db, i64 0, i64 %indvars.iv
+ %5 = load float* %4, align 4
+ %6 = fcmp olt float %3, %5
+ %7 = getelementptr inbounds [1024 x float]* @dc, i64 0, i64 %indvars.iv
+ %8 = load float* %7, align 4
+ %9 = getelementptr inbounds [1024 x float]* @dd, i64 0, i64 %indvars.iv
+ %10 = load float* %9, align 4
+ %11 = fcmp olt float %8, %10
+ %12 = and i1 %6, %11
+ %13 = zext i1 %12 to i32
+ %14 = getelementptr inbounds [1024 x i32]* @dj, i64 0, i64 %indvars.iv
+ store i32 %13, i32* %14, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 1024
+ br i1 %exitcond, label %15, label %1
+
+; <label>:15 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/increment.ll b/test/Transforms/LoopVectorize/increment.ll
new file mode 100644
index 000000000000..71ea7689fc04
--- /dev/null
+++ b/test/Transforms/LoopVectorize/increment.ll
@@ -0,0 +1,66 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+; This is the loop.
+; for (i=0; i<n; i++){
+; a[i] += i;
+; }
+;CHECK: @inc
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @inc(i32 %n) nounwind uwtable noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = trunc i64 %indvars.iv to i32
+ %5 = add nsw i32 %3, %4
+ store i32 %5, i32* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret void
+}
+
+; Can't vectorize this loop because the access to A[X] is non linear.
+;
+; for (i = 0; i < n; ++i) {
+; A[B[i]]++;
+;
+;CHECK: @histogram
+;CHECK-NOT: <4 x i32>
+;CHECK: ret i32
+define i32 @histogram(i32* nocapture noalias %A, i32* nocapture noalias %B, i32 %n) nounwind uwtable ssp {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %idxprom1 = sext i32 %0 to i64
+ %arrayidx2 = getelementptr inbounds i32* %A, i64 %idxprom1
+ %1 = load i32* %arrayidx2, align 4
+ %inc = add nsw i32 %1, 1
+ store i32 %inc, i32* %arrayidx2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 0
+}
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
new file mode 100644
index 000000000000..b31bceb50df6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@array = common global [1024 x i32] zeroinitializer, align 16
+
+;CHECK: @array_at_plus_one
+;CHECK: add <4 x i64>
+;CHECK: trunc <4 x i64>
+;CHECK: add i64 %index, 12
+;CHECK: ret i32
+define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = add nsw i64 %indvars.iv, 12
+ %3 = getelementptr inbounds [1024 x i32]* @array, i64 0, i64 %2
+ %4 = trunc i64 %indvars.iv to i32
+ store i32 %4, i32* %3, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
diff --git a/test/Transforms/LoopVectorize/lit.local.cfg b/test/Transforms/LoopVectorize/lit.local.cfg
new file mode 100644
index 000000000000..19eebc0ac7ac
--- /dev/null
+++ b/test/Transforms/LoopVectorize/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll', '.c', '.cpp']
diff --git a/test/Transforms/LoopVectorize/non-const-n.ll b/test/Transforms/LoopVectorize/non-const-n.ll
new file mode 100644
index 000000000000..1a6c15ed96c4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/non-const-n.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+@a = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: shl i32
+;CHECK: zext i32
+;CHECK: load <4 x i32>
+;CHECK: add nsw <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1(i32 %n) nounwind uwtable ssp {
+ %n4 = shl i32 %n, 2
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n4
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/read-only.ll b/test/Transforms/LoopVectorize/read-only.ll
new file mode 100644
index 000000000000..b4d1bac132f0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/read-only.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @read_only_func
+;CHECK: load <4 x i32>
+;CHECK: ret i32
+define i32 @read_only_func(i32* nocapture %A, i32* nocapture %B, i32 %n) nounwind uwtable readonly ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = add nsw i64 %indvars.iv, 13
+ %5 = getelementptr inbounds i32* %B, i64 %4
+ %6 = load i32* %5, align 4
+ %7 = shl i32 %6, 1
+ %8 = add i32 %3, %sum.02
+ %9 = add i32 %8, %7
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/reduction.ll b/test/Transforms/LoopVectorize/reduction.ll
new file mode 100644
index 000000000000..c1848b35fc6e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/reduction.ll
@@ -0,0 +1,232 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @reduction_sum
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: add <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = trunc i64 %indvars.iv to i32
+ %7 = add i32 %sum.02, %6
+ %8 = add i32 %7, %3
+ %9 = add i32 %8, %5
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
+
+;CHECK: @reduction_prod
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: mul <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = trunc i64 %indvars.iv to i32
+ %7 = mul i32 %prod.02, %6
+ %8 = mul i32 %7, %3
+ %9 = mul i32 %8, %5
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ]
+ ret i32 %prod.0.lcssa
+}
+
+;CHECK: @reduction_mix
+;CHECK: phi <4 x i32>
+;CHECK: load <4 x i32>
+;CHECK: mul nsw <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = mul nsw i32 %5, %3
+ %7 = trunc i64 %indvars.iv to i32
+ %8 = add i32 %sum.02, %7
+ %9 = add i32 %8, %6
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
+
+;CHECK: @reduction_mul
+;CHECK: mul <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %sum.02 = phi i32 [ %9, %.lr.ph ], [ 19, %0 ]
+ %2 = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = trunc i64 %indvars.iv to i32
+ %7 = add i32 %3, %6
+ %8 = add i32 %7, %5
+ %9 = mul i32 %8, %sum.02
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ]
+ ret i32 %sum.0.lcssa
+}
+
+;CHECK: @start_at_non_zero
+;CHECK: phi <4 x i32>
+;CHECK: <i32 120, i32 0, i32 0, i32 0>
+;CHECK: ret i32
+define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ]
+ %arrayidx = getelementptr inbounds i32* %in, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %coeff, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, %sum.09
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ]
+ ret i32 %sum.0.lcssa
+}
+
+;CHECK: @reduction_and
+;CHECK: and <4 x i32>
+;CHECK: <i32 -1, i32 -1, i32 -1, i32 -1>
+;CHECK: ret i32
+define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %and = and i32 %add, %result.08
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ]
+ ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_or
+;CHECK: or <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %or = or i32 %add, %result.08
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ]
+ ret i32 %result.0.lcssa
+}
+
+;CHECK: @reduction_xor
+;CHECK: xor <4 x i32>
+;CHECK: ret i32
+define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly {
+entry:
+ %cmp7 = icmp sgt i32 %n, 0
+ br i1 %cmp7, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32* %A, i64 %indvars.iv
+ %0 = load i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32* %B, i64 %indvars.iv
+ %1 = load i32* %arrayidx2, align 4
+ %add = add nsw i32 %1, %0
+ %xor = xor i32 %add, %result.08
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ]
+ ret i32 %result.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
new file mode 100644
index 000000000000..23933cf7c7db
--- /dev/null
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.9.0"
+
+; Make sure we vectorize this loop:
+; int foo(float *a, float *b, int n) {
+; for (int i=0; i<n; ++i)
+; a[i] = b[i] * 3;
+; }
+
+;CHECK: load <4 x float>
+define i32 @foo(float* nocapture %a, float* nocapture %b, i32 %n) nounwind uwtable ssp {
+entry:
+ %cmp6 = icmp sgt i32 %n, 0
+ br i1 %cmp6, label %for.body, label %for.end
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float* %b, i64 %indvars.iv
+ %0 = load float* %arrayidx, align 4, !tbaa !0
+ %mul = fmul float %0, 3.000000e+00
+ %arrayidx2 = getelementptr inbounds float* %a, i64 %indvars.iv
+ store float %mul, float* %arrayidx2, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret i32 undef
+}
+
+!0 = metadata !{metadata !"float", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/scalar-select.ll b/test/Transforms/LoopVectorize/scalar-select.ll
new file mode 100644
index 000000000000..e537bde31bb0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/scalar-select.ll
@@ -0,0 +1,37 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK: load <4 x i32>
+; make sure that we have a scalar condition and a vector operand.
+;CHECK: select i1 %cond, <4 x i32>
+;CHECK: store <4 x i32>
+;CHECK: ret void
+define void @example1(i1 %cond) nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ %sel = select i1 %cond, i32 %6, i32 zeroinitializer
+ store i32 %sel, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 256
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/small-loop.ll b/test/Transforms/LoopVectorize/small-loop.ll
new file mode 100644
index 000000000000..4a6e4b231dfe
--- /dev/null
+++ b/test/Transforms/LoopVectorize/small-loop.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+@a = common global [2048 x i32] zeroinitializer, align 16
+@b = common global [2048 x i32] zeroinitializer, align 16
+@c = common global [2048 x i32] zeroinitializer, align 16
+
+;CHECK: @example1
+;CHECK-NOT: load <4 x i32>
+;CHECK: ret void
+define void @example1() nounwind uwtable ssp {
+ br label %1
+
+; <label>:1 ; preds = %1, %0
+ %indvars.iv = phi i64 [ 0, %0 ], [ %indvars.iv.next, %1 ]
+ %2 = getelementptr inbounds [2048 x i32]* @b, i64 0, i64 %indvars.iv
+ %3 = load i32* %2, align 4
+ %4 = getelementptr inbounds [2048 x i32]* @c, i64 0, i64 %indvars.iv
+ %5 = load i32* %4, align 4
+ %6 = add nsw i32 %5, %3
+ %7 = getelementptr inbounds [2048 x i32]* @a, i64 0, i64 %indvars.iv
+ store i32 %6, i32* %7, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, 8 ; <----- A really small trip count.
+ br i1 %exitcond, label %8, label %1
+
+; <label>:8 ; preds = %1
+ ret void
+}
+
diff --git a/test/Transforms/LoopVectorize/start-non-zero.ll b/test/Transforms/LoopVectorize/start-non-zero.ll
new file mode 100644
index 000000000000..5aa3bc034d0b
--- /dev/null
+++ b/test/Transforms/LoopVectorize/start-non-zero.ll
@@ -0,0 +1,35 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @start_at_nonzero
+;CHECK: mul nuw <4 x i32>
+;CHECK: ret i32
+define i32 @start_at_nonzero(i32* nocapture %a, i32 %start, i32 %end) nounwind uwtable ssp {
+entry:
+ %cmp3 = icmp slt i32 %start, %end
+ br i1 %cmp3, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %0 = sext i32 %start to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.lr.ph, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32* %a, i64 %indvars.iv
+ %1 = load i32* %arrayidx, align 4, !tbaa !0
+ %mul = mul nuw i32 %1, 333
+ store i32 %mul, i32* %arrayidx, align 4, !tbaa !0
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %2 = trunc i64 %indvars.iv.next to i32
+ %cmp = icmp slt i32 %2, %end
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body, %entry
+ ret i32 4
+}
+
+!0 = metadata !{metadata !"int", metadata !1}
+!1 = metadata !{metadata !"omnipotent char", metadata !2}
+!2 = metadata !{metadata !"Simple C/C++ TBAA"}
diff --git a/test/Transforms/LoopVectorize/write-only.ll b/test/Transforms/LoopVectorize/write-only.ll
new file mode 100644
index 000000000000..eb027604134f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/write-only.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -dce -instcombine -licm -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+;CHECK: @read_mod_write_single_ptr
+;CHECK: load <4 x float>
+;CHECK: ret i32
+define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
+ %1 = icmp sgt i32 %n, 0
+ br i1 %1, label %.lr.ph, label %._crit_edge
+
+.lr.ph: ; preds = %0, %.lr.ph
+ %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ]
+ %2 = getelementptr inbounds float* %a, i64 %indvars.iv
+ %3 = load float* %2, align 4
+ %4 = fmul float %3, 3.000000e+00
+ store float %4, float* %2, align 4
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %._crit_edge, label %.lr.ph
+
+._crit_edge: ; preds = %.lr.ph, %0
+ ret i32 undef
+}
diff --git a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
index 61ba3c7e6cc5..597b69dee3d4 100644
--- a/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
+++ b/test/Transforms/MemCpyOpt/2008-03-13-ReturnSlotBitcast.ll
@@ -9,11 +9,11 @@ declare void @g(%a*)
define float @f() {
entry:
%a_var = alloca %a
- %b_var = alloca %b
+ %b_var = alloca %b, align 1
call void @g(%a* %a_var)
%a_i8 = bitcast %a* %a_var to i8*
%b_i8 = bitcast %b* %b_var to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 4, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_i8, i8* %a_i8, i32 4, i32 1, i1 false)
%tmp1 = getelementptr %b* %b_var, i32 0, i32 0
%tmp2 = load float* %tmp1
ret float %tmp2
diff --git a/test/Transforms/MemCpyOpt/align.ll b/test/Transforms/MemCpyOpt/align.ll
index b1f900d9da4c..1b98f6ad383f 100644
--- a/test/Transforms/MemCpyOpt/align.ll
+++ b/test/Transforms/MemCpyOpt/align.ll
@@ -1,12 +1,15 @@
-; RUN: opt < %s -S -memcpyopt | FileCheck %s
+; RUN: opt < %s -S -basicaa -memcpyopt | FileCheck %s
target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
; The resulting memset is only 4-byte aligned, despite containing
; a 16-byte aligned store in the middle.
-; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
-
define void @foo(i32* %p) {
+; CHECK: @foo
+; CHECK: call void @llvm.memset.p0i8.i64(i8* {{.*}}, i8 0, i64 16, i32 4, i1 false)
%a0 = getelementptr i32* %p, i64 0
store i32 0, i32* %a0, align 4
%a1 = getelementptr i32* %p, i64 1
@@ -17,3 +20,18 @@ define void @foo(i32* %p) {
store i32 0, i32* %a3, align 4
ret void
}
+
+; Replacing %a8 with %a4 in the memset requires boosting the alignment of %a4.
+
+define void @bar() {
+; CHECK: @bar
+; CHECK: %a4 = alloca i32, align 8
+; CHECK-NOT: memcpy
+ %a4 = alloca i32, align 4
+ %a8 = alloca i32, align 8
+ %a8.cast = bitcast i32* %a8 to i8*
+ %a4.cast = bitcast i32* %a4 to i8*
+ call void @llvm.memset.p0i8.i64(i8* %a8.cast, i8 0, i64 4, i32 8, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %a4.cast, i8* %a8.cast, i64 4, i32 4, i1 false)
+ ret void
+}
diff --git a/test/Transforms/MemCpyOpt/form-memset.ll b/test/Transforms/MemCpyOpt/form-memset.ll
index 8832f897b089..f63b1dcfdd5f 100644
--- a/test/Transforms/MemCpyOpt/form-memset.ll
+++ b/test/Transforms/MemCpyOpt/form-memset.ll
@@ -248,3 +248,27 @@ entry:
; CHECK: @test8
; CHECK: store <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1>, <4 x i32>* %0, align 16
}
+
+@test9buf = internal unnamed_addr global [16 x i64] zeroinitializer, align 16
+
+define void @test9() nounwind {
+ store i8 -1, i8* bitcast ([16 x i64]* @test9buf to i8*), align 16
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 1), align 1
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 2), align 2
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 3), align 1
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 4), align 4
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 5), align 1
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 6), align 2
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 7), align 1
+ store i8 -1, i8* bitcast (i64* getelementptr inbounds ([16 x i64]* @test9buf, i64 0, i64 1) to i8*), align 8
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 9), align 1
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 10), align 2
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 11), align 1
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 12), align 4
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 13), align 1
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 14), align 2
+ store i8 -1, i8* getelementptr (i8* bitcast ([16 x i64]* @test9buf to i8*), i64 15), align 1
+ ret void
+; CHECK: @test9(
+; CHECK: call void @llvm.memset.p0i8.i64(i8* bitcast ([16 x i64]* @test9buf to i8*), i8 -1, i64 16, i32 16, i1 false)
+}
diff --git a/test/Transforms/MetaRenamer/lit.local.cfg b/test/Transforms/MetaRenamer/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/MetaRenamer/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/MetaRenamer/metarenamer.ll b/test/Transforms/MetaRenamer/metarenamer.ll
new file mode 100644
index 000000000000..ad41bcf50f19
--- /dev/null
+++ b/test/Transforms/MetaRenamer/metarenamer.ll
@@ -0,0 +1,96 @@
+; RUN: opt %s -metarenamer -S | FileCheck %s
+
+; CHECK: target triple {{.*}}
+; CHECK-NOT: {{^x*}}xxx{{^x*}}
+; CHECK: ret i32 6
+
+target triple = "x86_64-pc-linux-gnu"
+
+%struct.bar_xxx = type { i32, double }
+%struct.foo_xxx = type { i32, float, %struct.bar_xxx }
+
+@func_5_xxx.static_local_3_xxx = internal global i32 3, align 4
+@global_3_xxx = common global i32 0, align 4
+
+@func_7_xxx = alias weak i32 (...)* @aliased_func_7_xxx
+
+declare i32 @aliased_func_7_xxx(...)
+
+define i32 @func_3_xxx() nounwind uwtable ssp {
+ ret i32 3
+}
+
+define void @func_4_xxx(%struct.foo_xxx* sret %agg.result) nounwind uwtable ssp {
+ %1 = alloca %struct.foo_xxx, align 8
+ %2 = getelementptr inbounds %struct.foo_xxx* %1, i32 0, i32 0
+ store i32 1, i32* %2, align 4
+ %3 = getelementptr inbounds %struct.foo_xxx* %1, i32 0, i32 1
+ store float 2.000000e+00, float* %3, align 4
+ %4 = getelementptr inbounds %struct.foo_xxx* %1, i32 0, i32 2
+ %5 = getelementptr inbounds %struct.bar_xxx* %4, i32 0, i32 0
+ store i32 3, i32* %5, align 4
+ %6 = getelementptr inbounds %struct.bar_xxx* %4, i32 0, i32 1
+ store double 4.000000e+00, double* %6, align 8
+ %7 = bitcast %struct.foo_xxx* %agg.result to i8*
+ %8 = bitcast %struct.foo_xxx* %1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %7, i8* %8, i64 24, i32 8, i1 false)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
+
+define i32 @func_5_xxx(i32 %arg_1_xxx, i32 %arg_2_xxx, i32 %arg_3_xxx, i32 %arg_4_xxx) nounwind uwtable ssp {
+ %1 = alloca i32, align 4
+ %2 = alloca i32, align 4
+ %3 = alloca i32, align 4
+ %4 = alloca i32, align 4
+ %local_1_xxx = alloca i32, align 4
+ %local_2_xxx = alloca i32, align 4
+ %i = alloca i32, align 4
+ store i32 %arg_1_xxx, i32* %1, align 4
+ store i32 %arg_2_xxx, i32* %2, align 4
+ store i32 %arg_3_xxx, i32* %3, align 4
+ store i32 %arg_4_xxx, i32* %4, align 4
+ store i32 1, i32* %local_1_xxx, align 4
+ store i32 2, i32* %local_2_xxx, align 4
+ store i32 0, i32* %i, align 4
+ br label %5
+
+; <label>:5 ; preds = %9, %0
+ %6 = load i32* %i, align 4
+ %7 = icmp slt i32 %6, 10
+ br i1 %7, label %8, label %12
+
+; <label>:8 ; preds = %5
+ br label %9
+
+; <label>:9 ; preds = %8
+ %10 = load i32* %i, align 4
+ %11 = add nsw i32 %10, 1
+ store i32 %11, i32* %i, align 4
+ br label %5
+
+; <label>:12 ; preds = %5
+ %13 = load i32* %local_1_xxx, align 4
+ %14 = load i32* %1, align 4
+ %15 = add nsw i32 %13, %14
+ %16 = load i32* %local_2_xxx, align 4
+ %17 = add nsw i32 %15, %16
+ %18 = load i32* %2, align 4
+ %19 = add nsw i32 %17, %18
+ %20 = load i32* @func_5_xxx.static_local_3_xxx, align 4
+ %21 = add nsw i32 %19, %20
+ %22 = load i32* %3, align 4
+ %23 = add nsw i32 %21, %22
+ %24 = load i32* %4, align 4
+ %25 = add nsw i32 %23, %24
+ ret i32 %25
+}
+
+define i32 @varargs_func_6_xxx(i32 %arg_1_xxx, i32 %arg_2_xxx, ...) nounwind uwtable ssp {
+ %1 = alloca i32, align 4
+ %2 = alloca i32, align 4
+ store i32 %arg_1_xxx, i32* %1, align 4
+ store i32 %arg_2_xxx, i32* %2, align 4
+ ret i32 6
+}
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index 0a7ba5de71bc..7b64b1be7c62 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -objc-arc -S < %s | FileCheck %s
+; RUN: opt -basicaa -objc-arc -S < %s | FileCheck %s
target datalayout = "e-p:64:64:64"
@@ -1498,7 +1498,7 @@ define i8* @test49(i8* %p) nounwind {
}
; Do delete retain+release with intervening stores of the
-; address value;
+; address value.
; CHECK: define void @test50(
; CHECK-NOT: @objc_
diff --git a/test/Transforms/ObjCARC/nested.ll b/test/Transforms/ObjCARC/nested.ll
index a618a21d8bb3..32be03ec6ae0 100644
--- a/test/Transforms/ObjCARC/nested.ll
+++ b/test/Transforms/ObjCARC/nested.ll
@@ -16,6 +16,10 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
declare i8* @objc_msgSend(i8*, i8*, ...) nonlazybind
declare void @use(i8*)
declare void @objc_release(i8*)
+declare i8* @def()
+declare void @__crasher_block_invoke(i8* nocapture)
+declare i8* @objc_retainBlock(i8*)
+declare void @__crasher_block_invoke1(i8* nocapture)
!0 = metadata !{}
@@ -279,11 +283,13 @@ forcoll.empty:
ret void
}
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because isn't isn't sophisticated enough in
+; reasnoning about nesting.
; CHECK: define void @test6(
; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
; CHECK: }
define void @test6() nounwind {
entry:
@@ -345,11 +351,13 @@ forcoll.empty:
ret void
}
-; Delete a nested retain+release pair.
+; TODO: Delete a nested retain+release pair.
+; The optimizer currently can't do this, because isn't isn't sophisticated enough in
+; reasnoning about nesting.
; CHECK: define void @test7(
; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
; CHECK: }
define void @test7() nounwind {
entry:
@@ -553,12 +561,12 @@ forcoll.empty:
ret void
}
-; Like test9, but without a split backedge. This we can optimize.
+; Like test9, but without a split backedge. TODO: optimize this.
; CHECK: define void @test9b(
; CHECK: call i8* @objc_retain
; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
; CHECK: }
define void @test9b() nounwind {
entry:
@@ -687,12 +695,12 @@ forcoll.empty:
ret void
}
-; Like test10, but without a split backedge. This we can optimize.
+; Like test10, but without a split backedge. TODO: optimize this.
; CHECK: define void @test10b(
; CHECK: call i8* @objc_retain
; CHECK: call i8* @objc_retain
-; CHECK-NOT: @objc_retain
+; CHECK: @objc_retain
; CHECK: }
define void @test10b() nounwind {
entry:
@@ -751,3 +759,64 @@ forcoll.empty:
call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
ret void
}
+
+; Pointers to strong pointers can obscure provenance relationships. Be conservative
+; in the face of escaping pointers. rdar://12150909.
+
+%struct.__block_d = type { i64, i64 }
+
+@_NSConcreteStackBlock = external global i8*
+@__block_d_tmp = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+@__block_d_tmp5 = external hidden constant { i64, i64, i8*, i8*, i8*, i8* }
+
+; CHECK: define void @test11(
+; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
+; CHECK: tail call i8* @objc_retain(i8* %call) nounwind
+; CHECK: call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+; CHECK: }
+define void @test11() {
+entry:
+ %block = alloca <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, align 8
+ %block9 = alloca <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>, align 8
+ %call = call i8* @def(), !clang.arc.no_objc_arc_exceptions !0
+ %foo = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 5
+ %block.isa = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 0
+ store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa, align 8
+ %block.flags = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 1
+ store i32 1107296256, i32* %block.flags, align 8
+ %block.reserved = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 2
+ store i32 0, i32* %block.reserved, align 4
+ %block.invoke = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 3
+ store i8* bitcast (void (i8*)* @__crasher_block_invoke to i8*), i8** %block.invoke, align 8
+ %block.d = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block, i64 0, i32 4
+ store %struct.__block_d* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_d_tmp to %struct.__block_d*), %struct.__block_d** %block.d, align 8
+ %foo2 = tail call i8* @objc_retain(i8* %call) nounwind
+ store i8* %foo2, i8** %foo, align 8
+ %foo4 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block to i8*
+ %foo5 = call i8* @objc_retainBlock(i8* %foo4) nounwind
+ call void @use(i8* %foo5), !clang.arc.no_objc_arc_exceptions !0
+ call void @objc_release(i8* %foo5) nounwind
+ %strongdestroy = load i8** %foo, align 8
+ call void @objc_release(i8* %strongdestroy) nounwind, !clang.imprecise_release !0
+ %foo10 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 5
+ %block.isa11 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 0
+ store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** %block.isa11, align 8
+ %block.flags12 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 1
+ store i32 1107296256, i32* %block.flags12, align 8
+ %block.reserved13 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 2
+ store i32 0, i32* %block.reserved13, align 4
+ %block.invoke14 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 3
+ store i8* bitcast (void (i8*)* @__crasher_block_invoke1 to i8*), i8** %block.invoke14, align 8
+ %block.d15 = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9, i64 0, i32 4
+ store %struct.__block_d* bitcast ({ i64, i64, i8*, i8*, i8*, i8* }* @__block_d_tmp5 to %struct.__block_d*), %struct.__block_d** %block.d15, align 8
+ %foo18 = call i8* @objc_retain(i8* %call) nounwind
+ store i8* %call, i8** %foo10, align 8
+ %foo20 = bitcast <{ i8*, i32, i32, i8*, %struct.__block_d*, i8* }>* %block9 to i8*
+ %foo21 = call i8* @objc_retainBlock(i8* %foo20) nounwind
+ call void @use(i8* %foo21), !clang.arc.no_objc_arc_exceptions !0
+ call void @objc_release(i8* %foo21) nounwind
+ %strongdestroy25 = load i8** %foo10, align 8
+ call void @objc_release(i8* %strongdestroy25) nounwind, !clang.imprecise_release !0
+ call void @objc_release(i8* %call) nounwind, !clang.imprecise_release !0
+ ret void
+}
diff --git a/test/Transforms/ObjCARC/path-overflow.ll b/test/Transforms/ObjCARC/path-overflow.ll
new file mode 100644
index 000000000000..e7866ed1b442
--- /dev/null
+++ b/test/Transforms/ObjCARC/path-overflow.ll
@@ -0,0 +1,329 @@
+; RUN: opt -objc-arc -S < %s
+; rdar://12277446
+
+; The total number of paths grows exponentially with the number of branches, and a
+; computation of this number can overflow any reasonable fixed-sized integer.
+
+target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
+target triple = "thumbv7-apple-ios5.0.0"
+
+%struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768 = type { i32*, i32, i8*, i32 }
+
+@_unnamed_cfstring_591 = external constant %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768, section "__DATA,__cfstring"
+
+declare i8* @objc_retain(i8*) nonlazybind
+
+declare void @objc_release(i8*) nonlazybind
+
+define hidden void @foo() {
+entry:
+ br i1 undef, label %msgSend.nullinit, label %msgSend.call
+
+msgSend.call: ; preds = %entry
+ br label %msgSend.cont
+
+msgSend.nullinit: ; preds = %entry
+ br label %msgSend.cont
+
+msgSend.cont: ; preds = %msgSend.nullinit, %msgSend.call
+ %0 = bitcast %struct.NSConstantString.11.33.55.77.99.121.143.332.1130.1340.2768* @_unnamed_cfstring_591 to i8*
+ %1 = call i8* @objc_retain(i8* %0) nounwind
+ br i1 undef, label %msgSend.nullinit33, label %msgSend.call32
+
+msgSend.call32: ; preds = %if.end10
+ br label %msgSend.cont34
+
+msgSend.nullinit33: ; preds = %if.end10
+ br label %msgSend.cont34
+
+msgSend.cont34: ; preds = %msgSend.nullinit33, %msgSend.call32
+ br i1 undef, label %msgSend.nullinit38, label %msgSend.call37
+
+msgSend.call37: ; preds = %msgSend.cont34
+ br label %msgSend.cont39
+
+msgSend.nullinit38: ; preds = %msgSend.cont34
+ br label %msgSend.cont39
+
+msgSend.cont39: ; preds = %msgSend.nullinit38, %msgSend.call37
+ br i1 undef, label %msgSend.nullinit49, label %msgSend.call48
+
+msgSend.call48: ; preds = %msgSend.cont39
+ br label %msgSend.cont50
+
+msgSend.nullinit49: ; preds = %msgSend.cont39
+ br label %msgSend.cont50
+
+msgSend.cont50: ; preds = %msgSend.nullinit49, %msgSend.call48
+ br i1 undef, label %msgSend.nullinit61, label %msgSend.call60
+
+msgSend.call60: ; preds = %msgSend.cont50
+ br label %msgSend.cont62
+
+msgSend.nullinit61: ; preds = %msgSend.cont50
+ br label %msgSend.cont62
+
+msgSend.cont62: ; preds = %msgSend.nullinit61, %msgSend.call60
+ br i1 undef, label %msgSend.nullinit67, label %msgSend.call66
+
+msgSend.call66: ; preds = %msgSend.cont62
+ br label %msgSend.cont68
+
+msgSend.nullinit67: ; preds = %msgSend.cont62
+ br label %msgSend.cont68
+
+msgSend.cont68: ; preds = %msgSend.nullinit67, %msgSend.call66
+ br i1 undef, label %msgSend.nullinit84, label %msgSend.call83
+
+msgSend.call83: ; preds = %msgSend.cont68
+ br label %msgSend.cont85
+
+msgSend.nullinit84: ; preds = %msgSend.cont68
+ br label %msgSend.cont85
+
+msgSend.cont85: ; preds = %msgSend.nullinit84, %msgSend.call83
+ br i1 undef, label %msgSend.nullinit90, label %msgSend.call89
+
+msgSend.call89: ; preds = %msgSend.cont85
+ br label %msgSend.cont91
+
+msgSend.nullinit90: ; preds = %msgSend.cont85
+ br label %msgSend.cont91
+
+msgSend.cont91: ; preds = %msgSend.nullinit90, %msgSend.call89
+ br i1 undef, label %msgSend.nullinit104, label %msgSend.call103
+
+msgSend.call103: ; preds = %msgSend.cont91
+ br label %msgSend.cont105
+
+msgSend.nullinit104: ; preds = %msgSend.cont91
+ br label %msgSend.cont105
+
+msgSend.cont105: ; preds = %msgSend.nullinit104, %msgSend.call103
+ br i1 undef, label %land.lhs.true, label %if.end123
+
+land.lhs.true: ; preds = %msgSend.cont105
+ br i1 undef, label %if.then117, label %if.end123
+
+if.then117: ; preds = %land.lhs.true
+ br label %if.end123
+
+if.end123: ; preds = %if.then117, %land.lhs.true, %msgSend.cont105
+ br i1 undef, label %msgSend.nullinit132, label %msgSend.call131
+
+msgSend.call131: ; preds = %if.end123
+ br label %msgSend.cont133
+
+msgSend.nullinit132: ; preds = %if.end123
+ br label %msgSend.cont133
+
+msgSend.cont133: ; preds = %msgSend.nullinit132, %msgSend.call131
+ br i1 undef, label %msgSend.nullinit139, label %msgSend.call138
+
+msgSend.call138: ; preds = %msgSend.cont133
+ br label %msgSend.cont140
+
+msgSend.nullinit139: ; preds = %msgSend.cont133
+ br label %msgSend.cont140
+
+msgSend.cont140: ; preds = %msgSend.nullinit139, %msgSend.call138
+ br i1 undef, label %if.then151, label %if.end157
+
+if.then151: ; preds = %msgSend.cont140
+ br label %if.end157
+
+if.end157: ; preds = %if.then151, %msgSend.cont140
+ br i1 undef, label %msgSend.nullinit164, label %msgSend.call163
+
+msgSend.call163: ; preds = %if.end157
+ br label %msgSend.cont165
+
+msgSend.nullinit164: ; preds = %if.end157
+ br label %msgSend.cont165
+
+msgSend.cont165: ; preds = %msgSend.nullinit164, %msgSend.call163
+ br i1 undef, label %msgSend.nullinit176, label %msgSend.call175
+
+msgSend.call175: ; preds = %msgSend.cont165
+ br label %msgSend.cont177
+
+msgSend.nullinit176: ; preds = %msgSend.cont165
+ br label %msgSend.cont177
+
+msgSend.cont177: ; preds = %msgSend.nullinit176, %msgSend.call175
+ br i1 undef, label %land.lhs.true181, label %if.end202
+
+land.lhs.true181: ; preds = %msgSend.cont177
+ br i1 undef, label %if.then187, label %if.end202
+
+if.then187: ; preds = %land.lhs.true181
+ br i1 undef, label %msgSend.nullinit199, label %msgSend.call198
+
+msgSend.call198: ; preds = %if.then187
+ br label %msgSend.cont200
+
+msgSend.nullinit199: ; preds = %if.then187
+ br label %msgSend.cont200
+
+msgSend.cont200: ; preds = %msgSend.nullinit199, %msgSend.call198
+ br label %if.end202
+
+if.end202: ; preds = %msgSend.cont200, %land.lhs.true181, %msgSend.cont177
+ br i1 undef, label %msgSend.nullinit236, label %msgSend.call235
+
+msgSend.call235: ; preds = %if.end202
+ br label %msgSend.cont237
+
+msgSend.nullinit236: ; preds = %if.end202
+ br label %msgSend.cont237
+
+msgSend.cont237: ; preds = %msgSend.nullinit236, %msgSend.call235
+ br i1 undef, label %msgSend.nullinit254, label %msgSend.call253
+
+msgSend.call253: ; preds = %msgSend.cont237
+ br label %msgSend.cont255
+
+msgSend.nullinit254: ; preds = %msgSend.cont237
+ br label %msgSend.cont255
+
+msgSend.cont255: ; preds = %msgSend.nullinit254, %msgSend.call253
+ br i1 undef, label %msgSend.nullinit269, label %msgSend.call268
+
+msgSend.call268: ; preds = %msgSend.cont255
+ br label %msgSend.cont270
+
+msgSend.nullinit269: ; preds = %msgSend.cont255
+ br label %msgSend.cont270
+
+msgSend.cont270: ; preds = %msgSend.nullinit269, %msgSend.call268
+ br i1 undef, label %msgSend.nullinit281, label %msgSend.call280
+
+msgSend.call280: ; preds = %msgSend.cont270
+ br label %msgSend.cont282
+
+msgSend.nullinit281: ; preds = %msgSend.cont270
+ br label %msgSend.cont282
+
+msgSend.cont282: ; preds = %msgSend.nullinit281, %msgSend.call280
+ br i1 undef, label %msgSend.nullinit287, label %msgSend.call286
+
+msgSend.call286: ; preds = %msgSend.cont282
+ br label %msgSend.cont288
+
+msgSend.nullinit287: ; preds = %msgSend.cont282
+ br label %msgSend.cont288
+
+msgSend.cont288: ; preds = %msgSend.nullinit287, %msgSend.call286
+ br i1 undef, label %msgSend.nullinit303, label %msgSend.call302
+
+msgSend.call302: ; preds = %msgSend.cont288
+ br label %msgSend.cont304
+
+msgSend.nullinit303: ; preds = %msgSend.cont288
+ br label %msgSend.cont304
+
+msgSend.cont304: ; preds = %msgSend.nullinit303, %msgSend.call302
+ br i1 undef, label %msgSend.nullinit344, label %msgSend.call343
+
+msgSend.call343: ; preds = %msgSend.cont304
+ br label %msgSend.cont345
+
+msgSend.nullinit344: ; preds = %msgSend.cont304
+ br label %msgSend.cont345
+
+msgSend.cont345: ; preds = %msgSend.nullinit344, %msgSend.call343
+ br i1 undef, label %msgSend.nullinit350, label %msgSend.call349
+
+msgSend.call349: ; preds = %msgSend.cont345
+ br label %msgSend.cont351
+
+msgSend.nullinit350: ; preds = %msgSend.cont345
+ br label %msgSend.cont351
+
+msgSend.cont351: ; preds = %msgSend.nullinit350, %msgSend.call349
+ br i1 undef, label %msgSend.nullinit366, label %msgSend.call365
+
+msgSend.call365: ; preds = %msgSend.cont351
+ br label %msgSend.cont367
+
+msgSend.nullinit366: ; preds = %msgSend.cont351
+ br label %msgSend.cont367
+
+msgSend.cont367: ; preds = %msgSend.nullinit366, %msgSend.call365
+ br i1 undef, label %msgSend.nullinit376, label %msgSend.call375
+
+msgSend.call375: ; preds = %msgSend.cont367
+ br label %msgSend.cont377
+
+msgSend.nullinit376: ; preds = %msgSend.cont367
+ br label %msgSend.cont377
+
+msgSend.cont377: ; preds = %msgSend.nullinit376, %msgSend.call375
+ br i1 undef, label %if.then384, label %if.else401
+
+if.then384: ; preds = %msgSend.cont377
+ br i1 undef, label %msgSend.nullinit392, label %msgSend.call391
+
+msgSend.call391: ; preds = %if.then384
+ br label %msgSend.cont393
+
+msgSend.nullinit392: ; preds = %if.then384
+ br label %msgSend.cont393
+
+msgSend.cont393: ; preds = %msgSend.nullinit392, %msgSend.call391
+ br label %if.end418
+
+if.else401: ; preds = %msgSend.cont377
+ br i1 undef, label %msgSend.nullinit409, label %msgSend.call408
+
+msgSend.call408: ; preds = %if.else401
+ br label %msgSend.cont410
+
+msgSend.nullinit409: ; preds = %if.else401
+ br label %msgSend.cont410
+
+msgSend.cont410: ; preds = %msgSend.nullinit409, %msgSend.call408
+ br label %if.end418
+
+if.end418: ; preds = %msgSend.cont410, %msgSend.cont393
+ br i1 undef, label %msgSend.nullinit470, label %msgSend.call469
+
+msgSend.call469: ; preds = %if.end418
+ br label %msgSend.cont471
+
+msgSend.nullinit470: ; preds = %if.end418
+ br label %msgSend.cont471
+
+msgSend.cont471: ; preds = %msgSend.nullinit470, %msgSend.call469
+ br i1 undef, label %msgSend.nullinit484, label %msgSend.call483
+
+msgSend.call483: ; preds = %msgSend.cont471
+ br label %msgSend.cont485
+
+msgSend.nullinit484: ; preds = %msgSend.cont471
+ br label %msgSend.cont485
+
+msgSend.cont485: ; preds = %msgSend.nullinit484, %msgSend.call483
+ br i1 undef, label %msgSend.nullinit500, label %msgSend.call499
+
+msgSend.call499: ; preds = %msgSend.cont485
+ br label %msgSend.cont501
+
+msgSend.nullinit500: ; preds = %msgSend.cont485
+ br label %msgSend.cont501
+
+msgSend.cont501: ; preds = %msgSend.nullinit500, %msgSend.call499
+ br i1 undef, label %msgSend.nullinit506, label %msgSend.call505
+
+msgSend.call505: ; preds = %msgSend.cont501
+ br label %msgSend.cont507
+
+msgSend.nullinit506: ; preds = %msgSend.cont501
+ br label %msgSend.cont507
+
+msgSend.cont507: ; preds = %msgSend.nullinit506, %msgSend.call505
+ call void @objc_release(i8* %0) nounwind, !clang.imprecise_release !0
+ ret void
+}
+
+!0 = metadata !{}
diff --git a/test/Transforms/PhaseOrdering/gdce.ll b/test/Transforms/PhaseOrdering/gdce.ll
new file mode 100644
index 000000000000..273e47e97cb4
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/gdce.ll
@@ -0,0 +1,106 @@
+; RUN: opt -O2 -S %s | FileCheck %s
+
+; Run global DCE to eliminate unused ctor and dtor.
+; rdar://9142819
+
+; CHECK: main
+; CHECK-NOT: _ZN4BaseC1Ev
+; CHECK-NOT: _ZN4BaseD1Ev
+; CHECK-NOT: _ZN4BaseD2Ev
+; CHECK-NOT: _ZN4BaseC2Ev
+; CHECK-NOT: _ZN4BaseD0Ev
+
+%class.Base = type { i32 (...)** }
+
+@_ZTV4Base = linkonce_odr unnamed_addr constant [4 x i8*] [i8* null, i8* bitcast ({ i8*, i8* }* @_ZTI4Base to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD1Ev to i8*), i8* bitcast (void (%class.Base*)* @_ZN4BaseD0Ev to i8*)]
+@_ZTVN10__cxxabiv117__class_type_infoE = external global i8*
+@_ZTS4Base = linkonce_odr constant [6 x i8] c"4Base\00"
+@_ZTI4Base = linkonce_odr unnamed_addr constant { i8*, i8* } { i8* bitcast (i8** getelementptr inbounds (i8** @_ZTVN10__cxxabiv117__class_type_infoE, i64 2) to i8*), i8* getelementptr inbounds ([6 x i8]* @_ZTS4Base, i32 0, i32 0) }
+
+define i32 @main() uwtable ssp {
+entry:
+ %retval = alloca i32, align 4
+ %b = alloca %class.Base, align 8
+ %cleanup.dest.slot = alloca i32
+ store i32 0, i32* %retval
+ call void @_ZN4BaseC1Ev(%class.Base* %b)
+ store i32 0, i32* %retval
+ store i32 1, i32* %cleanup.dest.slot
+ call void @_ZN4BaseD1Ev(%class.Base* %b)
+ %0 = load i32* %retval
+ ret i32 %0
+}
+
+define linkonce_odr void @_ZN4BaseC1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ call void @_ZN4BaseC2Ev(%class.Base* %this1)
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD1Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ call void @_ZN4BaseD2Ev(%class.Base* %this1)
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseC2Ev(%class.Base* %this) unnamed_addr nounwind uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ %0 = bitcast %class.Base* %this1 to i8***
+ store i8** getelementptr inbounds ([4 x i8*]* @_ZTV4Base, i64 0, i64 2), i8*** %0
+ ret void
+}
+
+define linkonce_odr void @_ZN4BaseD0Ev(%class.Base* %this) unnamed_addr uwtable ssp align 2 {
+entry:
+ %this.addr = alloca %class.Base*, align 8
+ %exn.slot = alloca i8*
+ %ehselector.slot = alloca i32
+ store %class.Base* %this, %class.Base** %this.addr, align 8
+ %this1 = load %class.Base** %this.addr
+ invoke void @_ZN4BaseD1Ev(%class.Base* %this1)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %entry
+ %0 = bitcast %class.Base* %this1 to i8*
+ call void @_ZdlPv(i8* %0) nounwind
+ ret void
+
+lpad: ; preds = %entry
+ %1 = landingpad { i8*, i32 } personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*)
+ cleanup
+ %2 = extractvalue { i8*, i32 } %1, 0
+ store i8* %2, i8** %exn.slot
+ %3 = extractvalue { i8*, i32 } %1, 1
+ store i32 %3, i32* %ehselector.slot
+ %4 = bitcast %class.Base* %this1 to i8*
+ call void @_ZdlPv(i8* %4) nounwind
+ br label %eh.resume
+
+eh.resume: ; preds = %lpad
+ %exn = load i8** %exn.slot
+ %sel = load i32* %ehselector.slot
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn, 0
+ %lpad.val2 = insertvalue { i8*, i32 } %lpad.val, i32 %sel, 1
+ resume { i8*, i32 } %lpad.val2
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare void @_ZdlPv(i8*) nounwind
diff --git a/test/Transforms/Reassociate/crash.ll b/test/Transforms/Reassociate/crash.ll
index ce586e15fbcf..e29b5dc9c0ce 100644
--- a/test/Transforms/Reassociate/crash.ll
+++ b/test/Transforms/Reassociate/crash.ll
@@ -144,3 +144,31 @@ define i32 @sozefx_(i32 %x, i32 %y) {
%t6 = add i32 %t4, %t5
ret i32 %t6
}
+
+define i32 @bar(i32 %arg, i32 %arg1, i32 %arg2) {
+ %tmp1 = mul i32 %arg1, 2
+ %tmp2 = mul i32 %tmp1, 3
+ %tmp3 = mul i32 %arg2, 2
+ %tmp4 = add i32 %tmp1, 1 ; dead code
+ %ret = add i32 %tmp2, %tmp3
+ ret i32 %ret
+}
+
+; PR14060
+define i8 @hang(i8 %p, i8 %p0, i8 %p1, i8 %p2, i8 %p3, i8 %p4, i8 %p5, i8 %p6, i8 %p7, i8 %p8, i8 %p9) {
+ %tmp = zext i1 false to i8
+ %tmp16 = or i8 %tmp, 1
+ %tmp22 = or i8 %p7, %p0
+ %tmp23 = or i8 %tmp16, %tmp22
+ %tmp28 = or i8 %p9, %p1
+ %tmp31 = or i8 %tmp23, %p2
+ %tmp32 = or i8 %tmp31, %tmp28
+ %tmp38 = or i8 %p8, %p3
+ %tmp39 = or i8 %tmp16, %tmp38
+ %tmp43 = or i8 %tmp39, %p4
+ %tmp44 = or i8 %tmp43, 1
+ %tmp47 = or i8 %tmp32, %p5
+ %tmp50 = or i8 %tmp47, %p6
+ %tmp51 = or i8 %tmp44, %tmp50
+ ret i8 %tmp51
+}
diff --git a/test/Transforms/SCCP/loadtest.ll b/test/Transforms/SCCP/loadtest.ll
index add2af483f56..dd1dba69143c 100644
--- a/test/Transforms/SCCP/loadtest.ll
+++ b/test/Transforms/SCCP/loadtest.ll
@@ -1,8 +1,9 @@
; This test makes sure that these instructions are properly constant propagated.
-target datalayout = "e-p:32:32"
+; RUN: opt < %s -default-data-layout="e-p:32:32" -sccp -S | FileCheck %s
+; RUN: opt < %s -default-data-layout="E-p:32:32" -sccp -S | FileCheck %s
-; RUN: opt < %s -sccp -S | not grep load
+; CHECK-NOT: load
@X = constant i32 42 ; <i32*> [#uses=1]
diff --git a/test/Transforms/SROA/alignment.ll b/test/Transforms/SROA/alignment.ll
new file mode 100644
index 000000000000..ad5fb6c4a5d8
--- /dev/null
+++ b/test/Transforms/SROA/alignment.ll
@@ -0,0 +1,171 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
+define void @test1({ i8, i8 }* %a, { i8, i8 }* %b) {
+; CHECK: @test1
+; CHECK: %[[gep_a0:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 0
+; CHECK: %[[a0:.*]] = load i8* %[[gep_a0]], align 16
+; CHECK: %[[gep_a1:.*]] = getelementptr inbounds { i8, i8 }* %a, i64 0, i32 1
+; CHECK: %[[a1:.*]] = load i8* %[[gep_a1]], align 1
+; CHECK: %[[gep_b0:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 0
+; CHECK: store i8 %[[a0]], i8* %[[gep_b0]], align 16
+; CHECK: %[[gep_b1:.*]] = getelementptr inbounds { i8, i8 }* %b, i64 0, i32 1
+; CHECK: store i8 %[[a1]], i8* %[[gep_b1]], align 1
+; CHECK: ret void
+
+entry:
+ %alloca = alloca { i8, i8 }, align 16
+ %gep_a = getelementptr { i8, i8 }* %a, i32 0, i32 0
+ %gep_alloca = getelementptr { i8, i8 }* %alloca, i32 0, i32 0
+ %gep_b = getelementptr { i8, i8 }* %b, i32 0, i32 0
+
+ store i8 420, i8* %gep_alloca, align 16
+
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_alloca, i8* %gep_a, i32 2, i32 16, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %gep_b, i8* %gep_alloca, i32 2, i32 16, i1 false)
+ ret void
+}
+
+define void @test2() {
+; CHECK: @test2
+; CHECK: alloca i16
+; CHECK: load i8* %{{.*}}
+; CHECK: store i8 42, i8* %{{.*}}
+; CHECK: ret void
+
+entry:
+ %a = alloca { i8, i8, i8, i8 }, align 2
+ %gep1 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 1
+ %cast1 = bitcast i8* %gep1 to i16*
+ store volatile i16 0, i16* %cast1
+ %gep2 = getelementptr { i8, i8, i8, i8 }* %a, i32 0, i32 2
+ %result = load i8* %gep2
+ store i8 42, i8* %gep2
+ ret void
+}
+
+define void @PR13920(<2 x i64>* %a, i16* %b) {
+; Test that alignments on memcpy intrinsics get propagated to loads and stores.
+; CHECK: @PR13920
+; CHECK: load <2 x i64>* %a, align 2
+; CHECK: store <2 x i64> {{.*}}, <2 x i64>* {{.*}}, align 2
+; CHECK: ret void
+
+entry:
+ %aa = alloca <2 x i64>, align 16
+ %aptr = bitcast <2 x i64>* %a to i8*
+ %aaptr = bitcast <2 x i64>* %aa to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %aaptr, i8* %aptr, i32 16, i32 2, i1 false)
+ %bptr = bitcast i16* %b to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %bptr, i8* %aaptr, i32 16, i32 2, i1 false)
+ ret void
+}
+
+define void @test3(i8* %x) {
+; Test that when we promote an alloca to a type with lower ABI alignment, we
+; provide the needed explicit alignment that code using the alloca may be
+; expecting. However, also check that any offset within an alloca can in turn
+; reduce the alignment.
+; CHECK: @test3
+; CHECK: alloca [22 x i8], align 8
+; CHECK: alloca [18 x i8], align 2
+; CHECK: ret void
+
+entry:
+ %a = alloca { i8*, i8*, i8* }
+ %b = alloca { i8*, i8*, i8* }
+ %a_raw = bitcast { i8*, i8*, i8* }* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a_raw, i8* %x, i32 22, i32 8, i1 false)
+ %b_raw = bitcast { i8*, i8*, i8* }* %b to i8*
+ %b_gep = getelementptr i8* %b_raw, i32 6
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b_gep, i8* %x, i32 18, i32 2, i1 false)
+ ret void
+}
+
+define void @test5() {
+; Test that we preserve underaligned loads and stores when splitting.
+; CHECK: @test5
+; CHECK: alloca [9 x i8]
+; CHECK: alloca [9 x i8]
+; CHECK: store volatile double 0.0{{.*}}, double* %{{.*}}, align 1
+; CHECK: load i16* %{{.*}}, align 1
+; CHECK: load double* %{{.*}}, align 1
+; CHECK: store volatile double %{{.*}}, double* %{{.*}}, align 1
+; CHECK: load i16* %{{.*}}, align 1
+; CHECK: ret void
+
+entry:
+ %a = alloca [18 x i8]
+ %raw1 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 0
+ %ptr1 = bitcast i8* %raw1 to double*
+ store volatile double 0.0, double* %ptr1, align 1
+ %weird_gep1 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 7
+ %weird_cast1 = bitcast i8* %weird_gep1 to i16*
+ %weird_load1 = load i16* %weird_cast1, align 1
+
+ %raw2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 9
+ %ptr2 = bitcast i8* %raw2 to double*
+ %d1 = load double* %ptr1, align 1
+ store volatile double %d1, double* %ptr2, align 1
+ %weird_gep2 = getelementptr inbounds [18 x i8]* %a, i32 0, i32 16
+ %weird_cast2 = bitcast i8* %weird_gep2 to i16*
+ %weird_load2 = load i16* %weird_cast2, align 1
+
+ ret void
+}
+
+define void @test6() {
+; Test that we promote alignment when the underlying alloca switches to one
+; that innately provides it.
+; CHECK: @test6
+; CHECK: alloca double
+; CHECK: alloca double
+; CHECK-NOT: align
+; CHECK: ret void
+
+entry:
+ %a = alloca [16 x i8]
+ %raw1 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 0
+ %ptr1 = bitcast i8* %raw1 to double*
+ store volatile double 0.0, double* %ptr1, align 1
+
+ %raw2 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 8
+ %ptr2 = bitcast i8* %raw2 to double*
+ %val = load double* %ptr1, align 1
+ store volatile double %val, double* %ptr2, align 1
+
+ ret void
+}
+
+define void @test7(i8* %out) {
+; Test that we properly compute the destination alignment when rewriting
+; memcpys as direct loads or stores.
+; CHECK: @test7
+; CHECK-NOT: alloca
+
+entry:
+ %a = alloca [16 x i8]
+ %raw1 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 0
+ %ptr1 = bitcast i8* %raw1 to double*
+ %raw2 = getelementptr inbounds [16 x i8]* %a, i32 0, i32 8
+ %ptr2 = bitcast i8* %raw2 to double*
+
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %raw1, i8* %out, i32 16, i32 0, i1 false)
+; CHECK: %[[val2:.*]] = load double* %{{.*}}, align 1
+; CHECK: %[[val1:.*]] = load double* %{{.*}}, align 1
+
+ %val1 = load double* %ptr2, align 1
+ %val2 = load double* %ptr1, align 1
+
+ store double %val1, double* %ptr1, align 1
+ store double %val2, double* %ptr2, align 1
+
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %out, i8* %raw1, i32 16, i32 0, i1 false)
+; CHECK: store double %[[val1]], double* %{{.*}}, align 1
+; CHECK: store double %[[val2]], double* %{{.*}}, align 1
+
+ ret void
+; CHECK: ret void
+}
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
new file mode 100644
index 000000000000..b363eefb3f9d
--- /dev/null
+++ b/test/Transforms/SROA/basictest.ll
@@ -0,0 +1,1136 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+define i32 @test0() {
+; CHECK: @test0
+; CHECK-NOT: alloca
+; CHECK: ret i32
+
+entry:
+ %a1 = alloca i32
+ %a2 = alloca float
+
+ %a1.i8 = bitcast i32* %a1 to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %a1.i8)
+
+ store i32 0, i32* %a1
+ %v1 = load i32* %a1
+
+ call void @llvm.lifetime.end(i64 4, i8* %a1.i8)
+
+ %a2.i8 = bitcast float* %a2 to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %a2.i8)
+
+ store float 0.0, float* %a2
+ %v2 = load float * %a2
+ %v2.int = bitcast float %v2 to i32
+ %sum1 = add i32 %v1, %v2.int
+
+ call void @llvm.lifetime.end(i64 4, i8* %a2.i8)
+
+ ret i32 %sum1
+}
+
+define i32 @test1() {
+; CHECK: @test1
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+
+entry:
+ %X = alloca { i32, float }
+ %Y = getelementptr { i32, float }* %X, i64 0, i32 0
+ store i32 0, i32* %Y
+ %Z = load i32* %Y
+ ret i32 %Z
+}
+
+define i64 @test2(i64 %X) {
+; CHECK: @test2
+; CHECK-NOT: alloca
+; CHECK: ret i64 %X
+
+entry:
+ %A = alloca [8 x i8]
+ %B = bitcast [8 x i8]* %A to i64*
+ store i64 %X, i64* %B
+ br label %L2
+
+L2:
+ %Z = load i64* %B
+ ret i64 %Z
+}
+
+define void @test3(i8* %dst, i8* %src) {
+; CHECK: @test3
+
+entry:
+ %a = alloca [300 x i8]
+; CHECK-NOT: alloca
+; CHECK: %[[test3_a1:.*]] = alloca [42 x i8]
+; CHECK-NEXT: %[[test3_a2:.*]] = alloca [99 x i8]
+; CHECK-NEXT: %[[test3_a3:.*]] = alloca [16 x i8]
+; CHECK-NEXT: %[[test3_a4:.*]] = alloca [42 x i8]
+; CHECK-NEXT: %[[test3_a5:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test3_a6:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test3_a7:.*]] = alloca [85 x i8]
+
+ %b = getelementptr [300 x i8]* %a, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 300, i32 1, i1 false)
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 42
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
+; CHECK-NEXT: %[[test3_r1:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 142
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 158
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 200
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 207
+; CHECK-NEXT: %[[test3_r2:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 208
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 215
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
+
+ ; Clobber a single element of the array, this should be promotable.
+ %c = getelementptr [300 x i8]* %a, i64 0, i64 42
+ store i8 0, i8* %c
+
+ ; Make a sequence of overlapping stores to the array. These overlap both in
+ ; forward strides and in shrinking accesses.
+ %overlap.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 142
+ %overlap.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 143
+ %overlap.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 144
+ %overlap.4.i8 = getelementptr [300 x i8]* %a, i64 0, i64 145
+ %overlap.5.i8 = getelementptr [300 x i8]* %a, i64 0, i64 146
+ %overlap.6.i8 = getelementptr [300 x i8]* %a, i64 0, i64 147
+ %overlap.7.i8 = getelementptr [300 x i8]* %a, i64 0, i64 148
+ %overlap.8.i8 = getelementptr [300 x i8]* %a, i64 0, i64 149
+ %overlap.9.i8 = getelementptr [300 x i8]* %a, i64 0, i64 150
+ %overlap.1.i16 = bitcast i8* %overlap.1.i8 to i16*
+ %overlap.1.i32 = bitcast i8* %overlap.1.i8 to i32*
+ %overlap.1.i64 = bitcast i8* %overlap.1.i8 to i64*
+ %overlap.2.i64 = bitcast i8* %overlap.2.i8 to i64*
+ %overlap.3.i64 = bitcast i8* %overlap.3.i8 to i64*
+ %overlap.4.i64 = bitcast i8* %overlap.4.i8 to i64*
+ %overlap.5.i64 = bitcast i8* %overlap.5.i8 to i64*
+ %overlap.6.i64 = bitcast i8* %overlap.6.i8 to i64*
+ %overlap.7.i64 = bitcast i8* %overlap.7.i8 to i64*
+ %overlap.8.i64 = bitcast i8* %overlap.8.i8 to i64*
+ %overlap.9.i64 = bitcast i8* %overlap.9.i8 to i64*
+ store i8 1, i8* %overlap.1.i8
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
+; CHECK-NEXT: store i8 1, i8* %[[gep]]
+ store i16 1, i16* %overlap.1.i16
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i16*
+; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
+ store i32 1, i32* %overlap.1.i32
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
+ store i64 1, i64* %overlap.1.i64
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [16 x i8]* %[[test3_a3]] to i64*
+; CHECK-NEXT: store i64 1, i64* %[[bitcast]]
+ store i64 2, i64* %overlap.2.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 2, i64* %[[bitcast]]
+ store i64 3, i64* %overlap.3.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 2
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 3, i64* %[[bitcast]]
+ store i64 4, i64* %overlap.4.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 3
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 4, i64* %[[bitcast]]
+ store i64 5, i64* %overlap.5.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 4
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 5, i64* %[[bitcast]]
+ store i64 6, i64* %overlap.6.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 5
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 6, i64* %[[bitcast]]
+ store i64 7, i64* %overlap.7.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 6
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 7, i64* %[[bitcast]]
+ store i64 8, i64* %overlap.8.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 7
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 8, i64* %[[bitcast]]
+ store i64 9, i64* %overlap.9.i64
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 8
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i64*
+; CHECK-NEXT: store i64 9, i64* %[[bitcast]]
+
+ ; Make two sequences of overlapping stores with more gaps and irregularities.
+ %overlap2.1.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 200
+ %overlap2.1.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 201
+ %overlap2.1.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 202
+ %overlap2.1.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 203
+
+ %overlap2.2.0.i8 = getelementptr [300 x i8]* %a, i64 0, i64 208
+ %overlap2.2.1.i8 = getelementptr [300 x i8]* %a, i64 0, i64 209
+ %overlap2.2.2.i8 = getelementptr [300 x i8]* %a, i64 0, i64 210
+ %overlap2.2.3.i8 = getelementptr [300 x i8]* %a, i64 0, i64 211
+
+ %overlap2.1.0.i16 = bitcast i8* %overlap2.1.0.i8 to i16*
+ %overlap2.1.0.i32 = bitcast i8* %overlap2.1.0.i8 to i32*
+ %overlap2.1.1.i32 = bitcast i8* %overlap2.1.1.i8 to i32*
+ %overlap2.1.2.i32 = bitcast i8* %overlap2.1.2.i8 to i32*
+ %overlap2.1.3.i32 = bitcast i8* %overlap2.1.3.i8 to i32*
+ store i8 1, i8* %overlap2.1.0.i8
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: store i8 1, i8* %[[gep]]
+ store i16 1, i16* %overlap2.1.0.i16
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i16*
+; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
+ store i32 1, i32* %overlap2.1.0.i32
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a5]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
+ store i32 2, i32* %overlap2.1.1.i32
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 2, i32* %[[bitcast]]
+ store i32 3, i32* %overlap2.1.2.i32
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
+ store i32 4, i32* %overlap2.1.3.i32
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 3
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
+
+ %overlap2.2.0.i32 = bitcast i8* %overlap2.2.0.i8 to i32*
+ %overlap2.2.1.i16 = bitcast i8* %overlap2.2.1.i8 to i16*
+ %overlap2.2.1.i32 = bitcast i8* %overlap2.2.1.i8 to i32*
+ %overlap2.2.2.i32 = bitcast i8* %overlap2.2.2.i8 to i32*
+ %overlap2.2.3.i32 = bitcast i8* %overlap2.2.3.i8 to i32*
+ store i32 1, i32* %overlap2.2.0.i32
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast [7 x i8]* %[[test3_a6]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
+ store i8 1, i8* %overlap2.2.1.i8
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: store i8 1, i8* %[[gep]]
+ store i16 1, i16* %overlap2.2.1.i16
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 1, i16* %[[bitcast]]
+ store i32 1, i32* %overlap2.2.1.i32
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 1, i32* %[[bitcast]]
+ store i32 3, i32* %overlap2.2.2.i32
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 3, i32* %[[bitcast]]
+ store i32 4, i32* %overlap2.2.3.i32
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 3
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i32*
+; CHECK-NEXT: store i32 4, i32* %[[bitcast]]
+
+ %overlap2.prefix = getelementptr i8* %overlap2.1.1.i8, i64 -4
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.prefix, i8* %src, i32 8, i32 1, i1 false)
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 39
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %src, i32 3
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 3
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 5
+
+ ; Bridge between the overlapping areas
+ call void @llvm.memset.p0i8.i32(i8* %overlap2.1.2.i8, i8 42, i32 8, i32 1, i1 false)
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 2
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 5
+; ...promoted i8 store...
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[gep]], i8 42, i32 2
+
+ ; Entirely within the second overlap.
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.1.i8, i8* %src, i32 5, i32 1, i1 false)
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 1
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
+
+ ; Trailing past the second overlap.
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %overlap2.2.2.i8, i8* %src, i32 8, i32 1, i1 false)
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 2
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 5
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 5
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 3
+
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 300, i32 1, i1 false)
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 42
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
+; CHECK-NEXT: store i8 0, i8* %[[gep]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [99 x i8]* %[[test3_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 99
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 142
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [16 x i8]* %[[test3_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 16
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 158
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [42 x i8]* %[[test3_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 42
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 200
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 207
+; CHECK-NEXT: store i8 42, i8* %[[gep]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 208
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test3_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 215
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [85 x i8]* %[[test3_a7]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 85
+
+ ret void
+}
+
+define void @test4(i8* %dst, i8* %src) {
+; CHECK: @test4
+
+entry:
+ %a = alloca [100 x i8]
+; CHECK-NOT: alloca
+; CHECK: %[[test4_a1:.*]] = alloca [20 x i8]
+; CHECK-NEXT: %[[test4_a2:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test4_a3:.*]] = alloca [10 x i8]
+; CHECK-NEXT: %[[test4_a4:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test4_a5:.*]] = alloca [7 x i8]
+; CHECK-NEXT: %[[test4_a6:.*]] = alloca [40 x i8]
+
+ %b = getelementptr [100 x i8]* %a, i64 0, i64 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %b, i8* %src, i32 100, i32 1, i1 false)
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep]], i8* %src, i32 20
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 20
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: %[[test4_r1:.*]] = load i16* %[[bitcast]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 22
+; CHECK-NEXT: %[[test4_r2:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 23
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 30
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 40
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: %[[test4_r3:.*]] = load i16* %[[bitcast]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 42
+; CHECK-NEXT: %[[test4_r4:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 43
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 50
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: %[[test4_r5:.*]] = load i16* %[[bitcast]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %src, i64 52
+; CHECK-NEXT: %[[test4_r6:.*]] = load i8* %[[gep]]
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 53
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds i8* %src, i64 60
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
+
+ %a.src.1 = getelementptr [100 x i8]* %a, i64 0, i64 20
+ %a.dst.1 = getelementptr [100 x i8]* %a, i64 0, i64 40
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.1, i32 10, i32 1, i1 false)
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+
+ ; Clobber a single element of the array, this should be promotable, and be deleted.
+ %c = getelementptr [100 x i8]* %a, i64 0, i64 42
+ store i8 0, i8* %c
+
+ %a.src.2 = getelementptr [100 x i8]* %a, i64 0, i64 50
+ call void @llvm.memmove.p0i8.p0i8.i32(i8* %a.dst.1, i8* %a.src.2, i32 10, i32 1, i1 false)
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %b, i32 100, i32 1, i1 false)
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds [20 x i8]* %[[test4_a1]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[gep]], i32 20
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 20
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 %[[test4_r1]], i16* %[[bitcast]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 22
+; CHECK-NEXT: store i8 %[[test4_r2]], i8* %[[gep]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 23
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a2]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 30
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [10 x i8]* %[[test4_a3]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 10
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 40
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 42
+; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 43
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a4]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 50
+; CHECK-NEXT: %[[bitcast:.*]] = bitcast i8* %[[gep]] to i16*
+; CHECK-NEXT: store i16 %[[test4_r5]], i16* %[[bitcast]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i8* %dst, i64 52
+; CHECK-NEXT: store i8 %[[test4_r6]], i8* %[[gep]]
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 53
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [7 x i8]* %[[test4_a5]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 7
+; CHECK-NEXT: %[[gep_dst:.*]] = getelementptr inbounds i8* %dst, i64 60
+; CHECK-NEXT: %[[gep_src:.*]] = getelementptr inbounds [40 x i8]* %[[test4_a6]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[gep_dst]], i8* %[[gep_src]], i32 40
+
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define i16 @test5() {
+; CHECK: @test5
+; CHECK-NOT: alloca float
+; CHECK: %[[cast:.*]] = bitcast float 0.0{{.*}} to i32
+; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[cast]], 16
+; CHECK-NEXT: %[[trunc:.*]] = trunc i32 %[[shr]] to i16
+; CHECK-NEXT: ret i16 %[[trunc]]
+
+entry:
+ %a = alloca [4 x i8]
+ %fptr = bitcast [4 x i8]* %a to float*
+ store float 0.0, float* %fptr
+ %ptr = getelementptr [4 x i8]* %a, i32 0, i32 2
+ %iptr = bitcast i8* %ptr to i16*
+ %val = load i16* %iptr
+ ret i16 %val
+}
+
+define i32 @test6() {
+; CHECK: @test6
+; CHECK: alloca i32
+; CHECK-NEXT: store volatile i32
+; CHECK-NEXT: load i32*
+; CHECK-NEXT: ret i32
+
+entry:
+ %a = alloca [4 x i8]
+ %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
+ call void @llvm.memset.p0i8.i32(i8* %ptr, i8 42, i32 4, i32 1, i1 true)
+ %iptr = bitcast i8* %ptr to i32*
+ %val = load i32* %iptr
+ ret i32 %val
+}
+
+define void @test7(i8* %src, i8* %dst) {
+; CHECK: @test7
+; CHECK: alloca i32
+; CHECK-NEXT: bitcast i8* %src to i32*
+; CHECK-NEXT: load volatile i32*
+; CHECK-NEXT: store volatile i32
+; CHECK-NEXT: bitcast i8* %dst to i32*
+; CHECK-NEXT: load volatile i32*
+; CHECK-NEXT: store volatile i32
+; CHECK-NEXT: ret
+
+entry:
+ %a = alloca [4 x i8]
+ %ptr = getelementptr [4 x i8]* %a, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
+ ret void
+}
+
+
+%S1 = type { i32, i32, [16 x i8] }
+%S2 = type { %S1*, %S2* }
+
+define %S2 @test8(%S2* %s2) {
+; CHECK: @test8
+entry:
+ %new = alloca %S2
+; CHECK-NOT: alloca
+
+ %s2.next.ptr = getelementptr %S2* %s2, i64 0, i32 1
+ %s2.next = load %S2** %s2.next.ptr
+; CHECK: %[[gep:.*]] = getelementptr %S2* %s2, i64 0, i32 1
+; CHECK-NEXT: %[[next:.*]] = load %S2** %[[gep]]
+
+ %s2.next.s1.ptr = getelementptr %S2* %s2.next, i64 0, i32 0
+ %s2.next.s1 = load %S1** %s2.next.s1.ptr
+ %new.s1.ptr = getelementptr %S2* %new, i64 0, i32 0
+ store %S1* %s2.next.s1, %S1** %new.s1.ptr
+ %s2.next.next.ptr = getelementptr %S2* %s2.next, i64 0, i32 1
+ %s2.next.next = load %S2** %s2.next.next.ptr
+ %new.next.ptr = getelementptr %S2* %new, i64 0, i32 1
+ store %S2* %s2.next.next, %S2** %new.next.ptr
+; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 0
+; CHECK-NEXT: %[[next_s1:.*]] = load %S1** %[[gep]]
+; CHECK-NEXT: %[[gep:.*]] = getelementptr %S2* %[[next]], i64 0, i32 1
+; CHECK-NEXT: %[[next_next:.*]] = load %S2** %[[gep]]
+
+ %new.s1 = load %S1** %new.s1.ptr
+ %result1 = insertvalue %S2 undef, %S1* %new.s1, 0
+; CHECK-NEXT: %[[result1:.*]] = insertvalue %S2 undef, %S1* %[[next_s1]], 0
+ %new.next = load %S2** %new.next.ptr
+ %result2 = insertvalue %S2 %result1, %S2* %new.next, 1
+; CHECK-NEXT: %[[result2:.*]] = insertvalue %S2 %[[result1]], %S2* %[[next_next]], 1
+ ret %S2 %result2
+; CHECK-NEXT: ret %S2 %[[result2]]
+}
+
+define i64 @test9() {
+; Ensure we can handle loads off the end of an alloca even when wrapped in
+; weird bit casts and types. The result is undef, but this shouldn't crash
+; anything.
+; CHECK: @test9
+; CHECK-NOT: alloca
+; CHECK: ret i64 undef
+
+entry:
+ %a = alloca { [3 x i8] }
+ %gep1 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 0
+ store i8 0, i8* %gep1, align 1
+ %gep2 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 1
+ store i8 0, i8* %gep2, align 1
+ %gep3 = getelementptr inbounds { [3 x i8] }* %a, i32 0, i32 0, i32 2
+ store i8 26, i8* %gep3, align 1
+ %cast = bitcast { [3 x i8] }* %a to { i64 }*
+ %elt = getelementptr inbounds { i64 }* %cast, i32 0, i32 0
+ %result = load i64* %elt
+ ret i64 %result
+}
+
+define %S2* @test10() {
+; CHECK: @test10
+; CHECK-NOT: alloca %S2*
+; CHECK: ret %S2* null
+
+entry:
+ %a = alloca [8 x i8]
+ %ptr = getelementptr [8 x i8]* %a, i32 0, i32 0
+ call void @llvm.memset.p0i8.i32(i8* %ptr, i8 0, i32 8, i32 1, i1 false)
+ %s2ptrptr = bitcast i8* %ptr to %S2**
+ %s2ptr = load %S2** %s2ptrptr
+ ret %S2* %s2ptr
+}
+
+define i32 @test11() {
+; CHECK: @test11
+; CHECK-NOT: alloca
+; CHECK: ret i32 0
+
+entry:
+ %X = alloca i32
+ br i1 undef, label %good, label %bad
+
+good:
+ %Y = getelementptr i32* %X, i64 0
+ store i32 0, i32* %Y
+ %Z = load i32* %Y
+ ret i32 %Z
+
+bad:
+ %Y2 = getelementptr i32* %X, i64 1
+ store i32 0, i32* %Y2
+ %Z2 = load i32* %Y2
+ ret i32 %Z2
+}
+
+define i8 @test12() {
+; We fully promote these to the i24 load or store size, resulting in just masks
+; and other operations that instcombine will fold, but no alloca.
+;
+; CHECK: @test12
+
+entry:
+ %a = alloca [3 x i8]
+ %b = alloca [3 x i8]
+; CHECK-NOT: alloca
+
+ %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
+ store i8 0, i8* %a0ptr
+ %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
+ store i8 0, i8* %a1ptr
+ %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
+ store i8 0, i8* %a2ptr
+ %aiptr = bitcast [3 x i8]* %a to i24*
+ %ai = load i24* %aiptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK: %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift2:.*]] = shl i24 %[[ext2]], 16
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, 65535
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[shift2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], -256
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[ext0]]
+
+ %biptr = bitcast [3 x i8]* %b to i24*
+ store i24 %ai, i24* %biptr
+ %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
+ %b0 = load i8* %b0ptr
+ %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
+ %b1 = load i8* %b1ptr
+ %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
+ %b2 = load i8* %b2ptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK: %[[trunc0:.*]] = trunc i24 %[[insert0]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
+; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
+; CHECK-NEXT: %[[shift2:.*]] = lshr i24 %[[insert0]], 16
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[shift2]] to i8
+
+ %bsum0 = add i8 %b0, %b1
+ %bsum1 = add i8 %bsum0, %b2
+ ret i8 %bsum1
+; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
+; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
+; CHECK-NEXT: ret i8 %[[sum1]]
+}
+
+define i32 @test13() {
+; Ensure we don't crash and handle undefined loads that straddle the end of the
+; allocation.
+; CHECK: @test13
+; CHECK: %[[ret:.*]] = zext i16 undef to i32
+; CHECK: ret i32 %[[ret]]
+
+entry:
+ %a = alloca [3 x i8]
+ %b0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
+ store i8 0, i8* %b0ptr
+ %b1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
+ store i8 0, i8* %b1ptr
+ %b2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
+ store i8 0, i8* %b2ptr
+ %iptrcast = bitcast [3 x i8]* %a to i16*
+ %iptrgep = getelementptr i16* %iptrcast, i64 1
+ %i = load i16* %iptrgep
+ %ret = zext i16 %i to i32
+ ret i32 %ret
+}
+
+%test14.struct = type { [3 x i32] }
+
+define void @test14(...) nounwind uwtable {
+; This is a strange case where we split allocas into promotable partitions, but
+; also gain enough data to prove they must be dead allocas due to GEPs that walk
+; across two adjacent allocas. Test that we don't try to promote or otherwise
+; do bad things to these dead allocas, they should just be removed.
+; CHECK: @test14
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
+
+entry:
+ %a = alloca %test14.struct
+ %p = alloca %test14.struct*
+ %0 = bitcast %test14.struct* %a to i8*
+ %1 = getelementptr i8* %0, i64 12
+ %2 = bitcast i8* %1 to %test14.struct*
+ %3 = getelementptr inbounds %test14.struct* %2, i32 0, i32 0
+ %4 = getelementptr inbounds %test14.struct* %a, i32 0, i32 0
+ %5 = bitcast [3 x i32]* %3 to i32*
+ %6 = bitcast [3 x i32]* %4 to i32*
+ %7 = load i32* %6, align 4
+ store i32 %7, i32* %5, align 4
+ %8 = getelementptr inbounds i32* %5, i32 1
+ %9 = getelementptr inbounds i32* %6, i32 1
+ %10 = load i32* %9, align 4
+ store i32 %10, i32* %8, align 4
+ %11 = getelementptr inbounds i32* %5, i32 2
+ %12 = getelementptr inbounds i32* %6, i32 2
+ %13 = load i32* %12, align 4
+ store i32 %13, i32* %11, align 4
+ ret void
+}
+
+define i32 @test15(i1 %flag) nounwind uwtable {
+; Ensure that when there are dead instructions using an alloca that are not
+; loads or stores we still delete them during partitioning and rewriting.
+; Otherwise we'll go to promote them while thy still have unpromotable uses.
+; CHECK: @test15
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: br label %loop
+
+entry:
+ %l0 = alloca i64
+ %l1 = alloca i64
+ %l2 = alloca i64
+ %l3 = alloca i64
+ br label %loop
+
+loop:
+ %dead3 = phi i8* [ %gep3, %loop ], [ null, %entry ]
+
+ store i64 1879048192, i64* %l0, align 8
+ %bc0 = bitcast i64* %l0 to i8*
+ %gep0 = getelementptr i8* %bc0, i64 3
+ %dead0 = bitcast i8* %gep0 to i64*
+
+ store i64 1879048192, i64* %l1, align 8
+ %bc1 = bitcast i64* %l1 to i8*
+ %gep1 = getelementptr i8* %bc1, i64 3
+ %dead1 = getelementptr i8* %gep1, i64 1
+
+ store i64 1879048192, i64* %l2, align 8
+ %bc2 = bitcast i64* %l2 to i8*
+ %gep2.1 = getelementptr i8* %bc2, i64 1
+ %gep2.2 = getelementptr i8* %bc2, i64 3
+ ; Note that this select should get visited multiple times due to using two
+ ; different GEPs off the same alloca. We should only delete it once.
+ %dead2 = select i1 %flag, i8* %gep2.1, i8* %gep2.2
+
+ store i64 1879048192, i64* %l3, align 8
+ %bc3 = bitcast i64* %l3 to i8*
+ %gep3 = getelementptr i8* %bc3, i64 3
+
+ br label %loop
+}
+
+define void @test16(i8* %src, i8* %dst) {
+; Ensure that we can promote an alloca of [3 x i8] to an i24 SSA value.
+; CHECK: @test16
+; CHECK-NOT: alloca
+; CHECK: %[[srccast:.*]] = bitcast i8* %src to i24*
+; CHECK-NEXT: load i24* %[[srccast]]
+; CHECK-NEXT: %[[dstcast:.*]] = bitcast i8* %dst to i24*
+; CHECK-NEXT: store i24 0, i24* %[[dstcast]]
+; CHECK-NEXT: ret void
+
+entry:
+ %a = alloca [3 x i8]
+ %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 false)
+ %cast = bitcast i8* %ptr to i24*
+ store i24 0, i24* %cast
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 false)
+ ret void
+}
+
+define void @test17(i8* %src, i8* %dst) {
+; Ensure that we can rewrite unpromotable memcpys which extend past the end of
+; the alloca.
+; CHECK: @test17
+; CHECK: %[[a:.*]] = alloca [3 x i8]
+; CHECK-NEXT: %[[ptr:.*]] = getelementptr [3 x i8]* %[[a]], i32 0, i32 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[ptr]], i8* %src,
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[ptr]],
+; CHECK-NEXT: ret void
+
+entry:
+ %a = alloca [3 x i8]
+ %ptr = getelementptr [3 x i8]* %a, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 4, i32 1, i1 true)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 4, i32 1, i1 true)
+ ret void
+}
+
+define void @test18(i8* %src, i8* %dst, i32 %size) {
+; Preserve transfer instrinsics with a variable size, even if they overlap with
+; fixed size operations. Further, continue to split and promote allocas preceding
+; the variable sized intrinsic.
+; CHECK: @test18
+; CHECK: %[[a:.*]] = alloca [34 x i8]
+; CHECK: %[[srcgep1:.*]] = getelementptr inbounds i8* %src, i64 4
+; CHECK-NEXT: %[[srccast1:.*]] = bitcast i8* %[[srcgep1]] to i32*
+; CHECK-NEXT: %[[srcload:.*]] = load i32* %[[srccast1]]
+; CHECK-NEXT: %[[agep1:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %[[agep1]], i8* %src, i32 %size,
+; CHECK-NEXT: %[[agep2:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* %[[agep2]], i8 42, i32 %size,
+; CHECK-NEXT: %[[dstcast1:.*]] = bitcast i8* %dst to i32*
+; CHECK-NEXT: store i32 42, i32* %[[dstcast1]]
+; CHECK-NEXT: %[[dstgep1:.*]] = getelementptr inbounds i8* %dst, i64 4
+; CHECK-NEXT: %[[dstcast2:.*]] = bitcast i8* %[[dstgep1]] to i32*
+; CHECK-NEXT: store i32 %[[srcload]], i32* %[[dstcast2]]
+; CHECK-NEXT: %[[agep3:.*]] = getelementptr inbounds [34 x i8]* %[[a]], i64 0, i64 0
+; CHECK-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %[[agep3]], i32 %size,
+; CHECK-NEXT: ret void
+
+entry:
+ %a = alloca [42 x i8]
+ %ptr = getelementptr [42 x i8]* %a, i32 0, i32 0
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr, i8* %src, i32 8, i32 1, i1 false)
+ %ptr2 = getelementptr [42 x i8]* %a, i32 0, i32 8
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %ptr2, i8* %src, i32 %size, i32 1, i1 false)
+ call void @llvm.memset.p0i8.i32(i8* %ptr2, i8 42, i32 %size, i32 1, i1 false)
+ %cast = bitcast i8* %ptr to i32*
+ store i32 42, i32* %cast
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr, i32 8, i32 1, i1 false)
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %ptr2, i32 %size, i32 1, i1 false)
+ ret void
+}
+
+%opaque = type opaque
+
+define i32 @test19(%opaque* %x) {
+; This input will cause us to try to compute a natural GEP when rewriting
+; pointers in such a way that we try to GEP through the opaque type. Previously,
+; a check for an unsized type was missing and this crashed. Ensure it behaves
+; reasonably now.
+; CHECK: @test19
+; CHECK-NOT: alloca
+; CHECK: ret i32 undef
+
+entry:
+ %a = alloca { i64, i8* }
+ %cast1 = bitcast %opaque* %x to i8*
+ %cast2 = bitcast { i64, i8* }* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast2, i8* %cast1, i32 16, i32 1, i1 false)
+ %gep = getelementptr inbounds { i64, i8* }* %a, i32 0, i32 0
+ %val = load i64* %gep
+ ret i32 undef
+}
+
+define i32 @test20() {
+; Ensure we can track negative offsets (before the beginning of the alloca) and
+; negative relative offsets from offsets starting past the end of the alloca.
+; CHECK: @test20
+; CHECK-NOT: alloca
+; CHECK: %[[sum1:.*]] = add i32 1, 2
+; CHECK: %[[sum2:.*]] = add i32 %[[sum1]], 3
+; CHECK: ret i32 %[[sum2]]
+
+entry:
+ %a = alloca [3 x i32]
+ %gep1 = getelementptr [3 x i32]* %a, i32 0, i32 0
+ store i32 1, i32* %gep1
+ %gep2.1 = getelementptr [3 x i32]* %a, i32 0, i32 -2
+ %gep2.2 = getelementptr i32* %gep2.1, i32 3
+ store i32 2, i32* %gep2.2
+ %gep3.1 = getelementptr [3 x i32]* %a, i32 0, i32 14
+ %gep3.2 = getelementptr i32* %gep3.1, i32 -12
+ store i32 3, i32* %gep3.2
+
+ %load1 = load i32* %gep1
+ %load2 = load i32* %gep2.2
+ %load3 = load i32* %gep3.2
+ %sum1 = add i32 %load1, %load2
+ %sum2 = add i32 %sum1, %load3
+ ret i32 %sum2
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
+define i8 @test21() {
+; Test allocations and offsets which border on overflow of the int64_t used
+; internally. This is really awkward to really test as LLVM doesn't really
+; support such extreme constructs cleanly.
+; CHECK: @test21
+; CHECK-NOT: alloca
+; CHECK: or i8 -1, -1
+
+entry:
+ %a = alloca [2305843009213693951 x i8]
+ %gep0 = getelementptr [2305843009213693951 x i8]* %a, i64 0, i64 2305843009213693949
+ store i8 255, i8* %gep0
+ %gep1 = getelementptr [2305843009213693951 x i8]* %a, i64 0, i64 -9223372036854775807
+ %gep2 = getelementptr i8* %gep1, i64 -1
+ call void @llvm.memset.p0i8.i64(i8* %gep2, i8 0, i64 18446744073709551615, i32 1, i1 false)
+ %gep3 = getelementptr i8* %gep1, i64 9223372036854775807
+ %gep4 = getelementptr i8* %gep3, i64 9223372036854775807
+ %gep5 = getelementptr i8* %gep4, i64 -6917529027641081857
+ store i8 255, i8* %gep5
+ %cast1 = bitcast i8* %gep4 to i32*
+ store i32 0, i32* %cast1
+ %load = load i8* %gep0
+ %gep6 = getelementptr i8* %gep0, i32 1
+ %load2 = load i8* %gep6
+ %result = or i8 %load, %load2
+ ret i8 %result
+}
+
+%PR13916.struct = type { i8 }
+
+define void @PR13916.1() {
+; Ensure that we handle overlapping memcpy intrinsics correctly, especially in
+; the case where there is a directly identical value for both source and dest.
+; CHECK: @PR13916.1
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+ %a = alloca i8
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a, i8* %a, i32 1, i32 1, i1 false)
+ %tmp2 = load i8* %a
+ ret void
+}
+
+define void @PR13916.2() {
+; Check whether we continue to handle them correctly when they start off with
+; different pointer value chains, but during rewriting we coalesce them into the
+; same value.
+; CHECK: @PR13916.2
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+ %a = alloca %PR13916.struct, align 1
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+ %tmp0 = bitcast %PR13916.struct* %a to i8*
+ %tmp1 = bitcast %PR13916.struct* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp0, i8* %tmp1, i32 1, i32 1, i1 false)
+ br label %if.end
+
+if.end:
+ %gep = getelementptr %PR13916.struct* %a, i32 0, i32 0
+ %tmp2 = load i8* %gep
+ ret void
+}
+
+define void @PR13990() {
+; Ensure we can handle cases where processing one alloca causes the other
+; alloca to become dead and get deleted. This might crash or fail under
+; Valgrind if we regress.
+; CHECK: @PR13990
+; CHECK-NOT: alloca
+; CHECK: unreachable
+; CHECK: unreachable
+
+entry:
+ %tmp1 = alloca i8*
+ %tmp2 = alloca i8*
+ br i1 undef, label %bb1, label %bb2
+
+bb1:
+ store i8* undef, i8** %tmp2
+ br i1 undef, label %bb2, label %bb3
+
+bb2:
+ %tmp50 = select i1 undef, i8** %tmp2, i8** %tmp1
+ br i1 undef, label %bb3, label %bb4
+
+bb3:
+ unreachable
+
+bb4:
+ unreachable
+}
+
+define double @PR13969(double %x) {
+; Check that we detect when promotion will un-escape an alloca and iterate to
+; re-try running SROA over that alloca. Without that, the two allocas that are
+; stored into a dead alloca don't get rewritten and promoted.
+; CHECK: @PR13969
+
+entry:
+ %a = alloca double
+ %b = alloca double*
+ %c = alloca double
+; CHECK-NOT: alloca
+
+ store double %x, double* %a
+ store double* %c, double** %b
+ store double* %a, double** %b
+ store double %x, double* %c
+ %ret = load double* %a
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+ ret double %ret
+; CHECK: ret double %x
+}
+
+%PR14034.struct = type { { {} }, i32, %PR14034.list }
+%PR14034.list = type { %PR14034.list*, %PR14034.list* }
+
+define void @PR14034() {
+; This test case tries to form GEPs into the empty leading struct members, and
+; subsequently crashed (under valgrind) before we fixed the PR. The important
+; thing is to handle empty structs gracefully.
+; CHECK: @PR14034
+
+entry:
+ %a = alloca %PR14034.struct
+ %list = getelementptr %PR14034.struct* %a, i32 0, i32 2
+ %prev = getelementptr %PR14034.list* %list, i32 0, i32 1
+ store %PR14034.list* undef, %PR14034.list** %prev
+ %cast0 = bitcast %PR14034.struct* undef to i8*
+ %cast1 = bitcast %PR14034.struct* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast0, i8* %cast1, i32 12, i32 0, i1 false)
+ ret void
+}
+
+define i32 @test22(i32 %x) {
+; Test that SROA and promotion is not confused by a grab bax mixture of pointer
+; types involving wrapper aggregates and zero-length aggregate members.
+; CHECK: @test22
+
+entry:
+ %a1 = alloca { { [1 x { i32 }] } }
+ %a2 = alloca { {}, { float }, [0 x i8] }
+ %a3 = alloca { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }
+; CHECK-NOT: alloca
+
+ %wrap1 = insertvalue [1 x { i32 }] undef, i32 %x, 0, 0
+ %gep1 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0, i32 0
+ store [1 x { i32 }] %wrap1, [1 x { i32 }]* %gep1
+
+ %gep2 = getelementptr { { [1 x { i32 }] } }* %a1, i32 0, i32 0
+ %ptrcast1 = bitcast { [1 x { i32 }] }* %gep2 to { [1 x { float }] }*
+ %load1 = load { [1 x { float }] }* %ptrcast1
+ %unwrap1 = extractvalue { [1 x { float }] } %load1, 0, 0
+
+ %wrap2 = insertvalue { {}, { float }, [0 x i8] } undef, { float } %unwrap1, 1
+ store { {}, { float }, [0 x i8] } %wrap2, { {}, { float }, [0 x i8] }* %a2
+
+ %gep3 = getelementptr { {}, { float }, [0 x i8] }* %a2, i32 0, i32 1, i32 0
+ %ptrcast2 = bitcast float* %gep3 to <4 x i8>*
+ %load3 = load <4 x i8>* %ptrcast2
+ %valcast1 = bitcast <4 x i8> %load3 to i32
+
+ %wrap3 = insertvalue [1 x [1 x i32]] undef, i32 %valcast1, 0, 0
+ %wrap4 = insertvalue { [1 x [1 x i32]], {} } undef, [1 x [1 x i32]] %wrap3, 0
+ %gep4 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1
+ %ptrcast3 = bitcast { [0 x double], [1 x [1 x <4 x i8>]], {} }* %gep4 to { [1 x [1 x i32]], {} }*
+ store { [1 x [1 x i32]], {} } %wrap4, { [1 x [1 x i32]], {} }* %ptrcast3
+
+ %gep5 = getelementptr { [0 x i8], { [0 x double], [1 x [1 x <4 x i8>]], {} }, { { {} } } }* %a3, i32 0, i32 1, i32 1, i32 0
+ %ptrcast4 = bitcast [1 x <4 x i8>]* %gep5 to { {}, float, {} }*
+ %load4 = load { {}, float, {} }* %ptrcast4
+ %unwrap2 = extractvalue { {}, float, {} } %load4, 1
+ %valcast2 = bitcast float %unwrap2 to i32
+
+ ret i32 %valcast2
+; CHECK: ret i32
+}
+
+define void @PR14059.1(double* %d) {
+; In PR14059 a peculiar construct was identified as something that is used
+; pervasively in ARM's ABI-calling-convention lowering: the passing of a struct
+; of doubles via an array of i32 in order to place the data into integer
+; registers. This in turn was missed as an optimization by SROA due to the
+; partial loads and stores of integers to the double alloca we were trying to
+; form and promote. The solution is to widen the integer operations to be
+; whole-alloca operations, and perform the appropriate bitcasting on the
+; *values* rather than the pointers. When this works, partial reads and writes
+; via integers can be promoted away.
+; CHECK: @PR14059.1
+; CHECK-NOT: alloca
+; CHECK: ret void
+
+entry:
+ %X.sroa.0.i = alloca double, align 8
+ %0 = bitcast double* %X.sroa.0.i to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %0)
+
+ ; Store to the low 32-bits...
+ %X.sroa.0.0.cast2.i = bitcast double* %X.sroa.0.i to i32*
+ store i32 0, i32* %X.sroa.0.0.cast2.i, align 8
+
+ ; Also use a memset to the middle 32-bits for fun.
+ %X.sroa.0.2.raw_idx2.i = getelementptr inbounds i8* %0, i32 2
+ call void @llvm.memset.p0i8.i64(i8* %X.sroa.0.2.raw_idx2.i, i8 0, i64 4, i32 1, i1 false)
+
+ ; Or a memset of the whole thing.
+ call void @llvm.memset.p0i8.i64(i8* %0, i8 0, i64 8, i32 1, i1 false)
+
+ ; Write to the high 32-bits with a memcpy.
+ %X.sroa.0.4.raw_idx4.i = getelementptr inbounds i8* %0, i32 4
+ %d.raw = bitcast double* %d to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %X.sroa.0.4.raw_idx4.i, i8* %d.raw, i32 4, i32 1, i1 false)
+
+ ; Store to the high 32-bits...
+ %X.sroa.0.4.cast5.i = bitcast i8* %X.sroa.0.4.raw_idx4.i to i32*
+ store i32 1072693248, i32* %X.sroa.0.4.cast5.i, align 4
+
+ ; Do the actual math...
+ %X.sroa.0.0.load1.i = load double* %X.sroa.0.i, align 8
+ %accum.real.i = load double* %d, align 8
+ %add.r.i = fadd double %accum.real.i, %X.sroa.0.0.load1.i
+ store double %add.r.i, double* %d, align 8
+ call void @llvm.lifetime.end(i64 -1, i8* %0)
+ ret void
+}
+
+define i64 @PR14059.2({ float, float }* %phi) {
+; Check that SROA can split up alloca-wide integer loads and stores where the
+; underlying alloca has smaller components that are accessed independently. This
+; shows up particularly with ABI lowering patterns coming out of Clang that rely
+; on the particular register placement of a single large integer return value.
+; CHECK: @PR14059.2
+
+entry:
+ %retval = alloca { float, float }, align 4
+ ; CHECK-NOT: alloca
+
+ %0 = bitcast { float, float }* %retval to i64*
+ store i64 0, i64* %0
+ ; CHECK-NOT: store
+
+ %phi.realp = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
+ %phi.real = load float* %phi.realp
+ %phi.imagp = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
+ %phi.imag = load float* %phi.imagp
+ ; CHECK: %[[realp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 0
+ ; CHECK-NEXT: %[[real:.*]] = load float* %[[realp]]
+ ; CHECK-NEXT: %[[imagp:.*]] = getelementptr inbounds { float, float }* %phi, i32 0, i32 1
+ ; CHECK-NEXT: %[[imag:.*]] = load float* %[[imagp]]
+
+ %real = getelementptr inbounds { float, float }* %retval, i32 0, i32 0
+ %imag = getelementptr inbounds { float, float }* %retval, i32 0, i32 1
+ store float %phi.real, float* %real
+ store float %phi.imag, float* %imag
+ ; CHECK-NEXT: %[[real_convert:.*]] = bitcast float %[[real]] to i32
+ ; CHECK-NEXT: %[[imag_convert:.*]] = bitcast float %[[imag]] to i32
+ ; CHECK-NEXT: %[[imag_ext:.*]] = zext i32 %[[imag_convert]] to i64
+ ; CHECK-NEXT: %[[imag_shift:.*]] = shl i64 %[[imag_ext]], 32
+ ; CHECK-NEXT: %[[imag_mask:.*]] = and i64 undef, 4294967295
+ ; CHECK-NEXT: %[[imag_insert:.*]] = or i64 %[[imag_mask]], %[[imag_shift]]
+ ; CHECK-NEXT: %[[real_ext:.*]] = zext i32 %[[real_convert]] to i64
+ ; CHECK-NEXT: %[[real_mask:.*]] = and i64 %[[imag_insert]], -4294967296
+ ; CHECK-NEXT: %[[real_insert:.*]] = or i64 %[[real_mask]], %[[real_ext]]
+
+ %1 = load i64* %0, align 1
+ ret i64 %1
+ ; CHECK-NEXT: ret i64 %[[real_insert]]
+}
+
+define void @PR14105({ [16 x i8] }* %ptr) {
+; Ensure that when rewriting the GEP index '-1' for this alloca we preserve is
+; sign as negative. We use a volatile memcpy to ensure promotion never actually
+; occurs.
+; CHECK: @PR14105
+
+entry:
+ %a = alloca { [16 x i8] }, align 8
+; CHECK: alloca [16 x i8], align 8
+
+ %gep = getelementptr inbounds { [16 x i8] }* %ptr, i64 -1
+; CHECK-NEXT: getelementptr inbounds { [16 x i8] }* %ptr, i64 -1, i32 0, i64 0
+
+ %cast1 = bitcast { [16 x i8 ] }* %gep to i8*
+ %cast2 = bitcast { [16 x i8 ] }* %a to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %cast1, i8* %cast2, i32 16, i32 8, i1 true)
+ ret void
+; CHECK: ret
+}
diff --git a/test/Transforms/SROA/big-endian.ll b/test/Transforms/SROA/big-endian.ll
new file mode 100644
index 000000000000..ce82d1f30b57
--- /dev/null
+++ b/test/Transforms/SROA/big-endian.ll
@@ -0,0 +1,119 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
+
+target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define i8 @test1() {
+; We fully promote these to the i24 load or store size, resulting in just masks
+; and other operations that instcombine will fold, but no alloca. Note this is
+; the same as test12 in basictest.ll, but here we assert big-endian byte
+; ordering.
+;
+; CHECK: @test1
+
+entry:
+ %a = alloca [3 x i8]
+ %b = alloca [3 x i8]
+; CHECK-NOT: alloca
+
+ %a0ptr = getelementptr [3 x i8]* %a, i64 0, i32 0
+ store i8 0, i8* %a0ptr
+ %a1ptr = getelementptr [3 x i8]* %a, i64 0, i32 1
+ store i8 0, i8* %a1ptr
+ %a2ptr = getelementptr [3 x i8]* %a, i64 0, i32 2
+ store i8 0, i8* %a2ptr
+ %aiptr = bitcast [3 x i8]* %a to i24*
+ %ai = load i24* %aiptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK: %[[ext2:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[mask2:.*]] = and i24 undef, -256
+; CHECK-NEXT: %[[insert2:.*]] = or i24 %[[mask2]], %[[ext2]]
+; CHECK-NEXT: %[[ext1:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift1:.*]] = shl i24 %[[ext1]], 8
+; CHECK-NEXT: %[[mask1:.*]] = and i24 %[[insert2]], -65281
+; CHECK-NEXT: %[[insert1:.*]] = or i24 %[[mask1]], %[[shift1]]
+; CHECK-NEXT: %[[ext0:.*]] = zext i8 0 to i24
+; CHECK-NEXT: %[[shift0:.*]] = shl i24 %[[ext0]], 16
+; CHECK-NEXT: %[[mask0:.*]] = and i24 %[[insert1]], 65535
+; CHECK-NEXT: %[[insert0:.*]] = or i24 %[[mask0]], %[[shift0]]
+
+ %biptr = bitcast [3 x i8]* %b to i24*
+ store i24 %ai, i24* %biptr
+ %b0ptr = getelementptr [3 x i8]* %b, i64 0, i32 0
+ %b0 = load i8* %b0ptr
+ %b1ptr = getelementptr [3 x i8]* %b, i64 0, i32 1
+ %b1 = load i8* %b1ptr
+ %b2ptr = getelementptr [3 x i8]* %b, i64 0, i32 2
+ %b2 = load i8* %b2ptr
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+; CHECK: %[[shift0:.*]] = lshr i24 %[[insert0]], 16
+; CHECK-NEXT: %[[trunc0:.*]] = trunc i24 %[[shift0]] to i8
+; CHECK-NEXT: %[[shift1:.*]] = lshr i24 %[[insert0]], 8
+; CHECK-NEXT: %[[trunc1:.*]] = trunc i24 %[[shift1]] to i8
+; CHECK-NEXT: %[[trunc2:.*]] = trunc i24 %[[insert0]] to i8
+
+ %bsum0 = add i8 %b0, %b1
+ %bsum1 = add i8 %bsum0, %b2
+ ret i8 %bsum1
+; CHECK: %[[sum0:.*]] = add i8 %[[trunc0]], %[[trunc1]]
+; CHECK-NEXT: %[[sum1:.*]] = add i8 %[[sum0]], %[[trunc2]]
+; CHECK-NEXT: ret i8 %[[sum1]]
+}
+
+define i64 @test2() {
+; Test for various mixed sizes of integer loads and stores all getting
+; promoted.
+;
+; CHECK: @test2
+
+entry:
+ %a = alloca [7 x i8]
+; CHECK-NOT: alloca
+
+ %a0ptr = getelementptr [7 x i8]* %a, i64 0, i32 0
+ %a1ptr = getelementptr [7 x i8]* %a, i64 0, i32 1
+ %a2ptr = getelementptr [7 x i8]* %a, i64 0, i32 2
+ %a3ptr = getelementptr [7 x i8]* %a, i64 0, i32 3
+
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+
+ %a0i16ptr = bitcast i8* %a0ptr to i16*
+ store i16 1, i16* %a0i16ptr
+; CHECK: %[[mask0:.*]] = and i16 1, -16
+
+ %a1i4ptr = bitcast i8* %a1ptr to i4*
+ store i4 1, i4* %a1i4ptr
+; CHECK-NEXT: %[[insert0:.*]] = or i16 %[[mask0]], 1
+
+ store i8 1, i8* %a2ptr
+; CHECK-NEXT: %[[mask1:.*]] = and i40 undef, 4294967295
+; CHECK-NEXT: %[[insert1:.*]] = or i40 %[[mask1]], 4294967296
+
+ %a3i24ptr = bitcast i8* %a3ptr to i24*
+ store i24 1, i24* %a3i24ptr
+; CHECK-NEXT: %[[mask2:.*]] = and i40 %[[insert1]], -4294967041
+; CHECK-NEXT: %[[insert2:.*]] = or i40 %[[mask2]], 256
+
+ %a2i40ptr = bitcast i8* %a2ptr to i40*
+ store i40 1, i40* %a2i40ptr
+; CHECK-NEXT: %[[ext3:.*]] = zext i40 1 to i56
+; CHECK-NEXT: %[[mask3:.*]] = and i56 undef, -1099511627776
+; CHECK-NEXT: %[[insert3:.*]] = or i56 %[[mask3]], %[[ext3]]
+
+; CHCEK-NOT: store
+; CHCEK-NOT: load
+
+ %aiptr = bitcast [7 x i8]* %a to i56*
+ %ai = load i56* %aiptr
+ %ret = zext i56 %ai to i64
+ ret i64 %ret
+; CHECK-NEXT: %[[ext4:.*]] = zext i16 %[[insert0]] to i56
+; CHECK-NEXT: %[[shift4:.*]] = shl i56 %[[ext4]], 40
+; CHECK-NEXT: %[[mask4:.*]] = and i56 %[[insert3]], 1099511627775
+; CHECK-NEXT: %[[insert4:.*]] = or i56 %[[mask4]], %[[shift4]]
+; CHECK-NEXT: %[[ret:.*]] = zext i56 %[[insert4]] to i64
+; CHECK-NEXT: ret i64 %[[ret]]
+}
diff --git a/test/Transforms/SROA/fca.ll b/test/Transforms/SROA/fca.ll
new file mode 100644
index 000000000000..c30a5cc974fc
--- /dev/null
+++ b/test/Transforms/SROA/fca.ll
@@ -0,0 +1,49 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+; RUN: opt < %s -sroa -force-ssa-updater -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define { i32, i32 } @test0(i32 %x, i32 %y) {
+; CHECK: @test0
+; CHECK-NOT: alloca
+; CHECK: insertvalue { i32, i32 }
+; CHECK: insertvalue { i32, i32 }
+; CHECK: ret { i32, i32 }
+
+entry:
+ %a = alloca { i32, i32 }
+
+ store { i32, i32 } undef, { i32, i32 }* %a
+
+ %gep1 = getelementptr inbounds { i32, i32 }* %a, i32 0, i32 0
+ store i32 %x, i32* %gep1
+ %gep2 = getelementptr inbounds { i32, i32 }* %a, i32 0, i32 1
+ store i32 %y, i32* %gep2
+
+ %result = load { i32, i32 }* %a
+ ret { i32, i32 } %result
+}
+
+define { i32, i32 } @test1(i32 %x, i32 %y) {
+; FIXME: This may be too conservative. Duncan argues that we are allowed to
+; split the volatile load and store here but must produce volatile scalar loads
+; and stores from them.
+; CHECK: @test1
+; CHECK: alloca
+; CHECK: alloca
+; CHECK: load volatile { i32, i32 }*
+; CHECK: store volatile { i32, i32 }
+; CHECK: ret { i32, i32 }
+
+entry:
+ %a = alloca { i32, i32 }
+ %b = alloca { i32, i32 }
+
+ %gep1 = getelementptr inbounds { i32, i32 }* %a, i32 0, i32 0
+ store i32 %x, i32* %gep1
+ %gep2 = getelementptr inbounds { i32, i32 }* %a, i32 0, i32 1
+ store i32 %y, i32* %gep2
+
+ %result = load volatile { i32, i32 }* %a
+ store volatile { i32, i32 } %result, { i32, i32 }* %b
+ ret { i32, i32 } %result
+}
diff --git a/test/Transforms/SROA/lit.local.cfg b/test/Transforms/SROA/lit.local.cfg
new file mode 100644
index 000000000000..c6106e4746f2
--- /dev/null
+++ b/test/Transforms/SROA/lit.local.cfg
@@ -0,0 +1 @@
+config.suffixes = ['.ll']
diff --git a/test/Transforms/SROA/phi-and-select.ll b/test/Transforms/SROA/phi-and-select.ll
new file mode 100644
index 000000000000..921016a9c24b
--- /dev/null
+++ b/test/Transforms/SROA/phi-and-select.ll
@@ -0,0 +1,427 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+define i32 @test1() {
+; CHECK: @test1
+entry:
+ %a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+ %a0 = getelementptr [2 x i32]* %a, i64 0, i32 0
+ %a1 = getelementptr [2 x i32]* %a, i64 0, i32 1
+ store i32 0, i32* %a0
+ store i32 1, i32* %a1
+ %v0 = load i32* %a0
+ %v1 = load i32* %a1
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+ %cond = icmp sle i32 %v0, %v1
+ br i1 %cond, label %then, label %exit
+
+then:
+ br label %exit
+
+exit:
+ %phi = phi i32* [ %a1, %then ], [ %a0, %entry ]
+; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ]
+
+ %result = load i32* %phi
+ ret i32 %result
+}
+
+define i32 @test2() {
+; CHECK: @test2
+entry:
+ %a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+ %a0 = getelementptr [2 x i32]* %a, i64 0, i32 0
+ %a1 = getelementptr [2 x i32]* %a, i64 0, i32 1
+ store i32 0, i32* %a0
+ store i32 1, i32* %a1
+ %v0 = load i32* %a0
+ %v1 = load i32* %a1
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+ %cond = icmp sle i32 %v0, %v1
+ %select = select i1 %cond, i32* %a1, i32* %a0
+; CHECK: select i1 %{{.*}}, i32 1, i32 0
+
+ %result = load i32* %select
+ ret i32 %result
+}
+
+define i32 @test3(i32 %x) {
+; CHECK: @test3
+entry:
+ %a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+ ; Note that we build redundant GEPs here to ensure that having different GEPs
+ ; into the same alloca partation continues to work with PHI speculation. This
+ ; was the underlying cause of PR13926.
+ %a0 = getelementptr [2 x i32]* %a, i64 0, i32 0
+ %a0b = getelementptr [2 x i32]* %a, i64 0, i32 0
+ %a1 = getelementptr [2 x i32]* %a, i64 0, i32 1
+ %a1b = getelementptr [2 x i32]* %a, i64 0, i32 1
+ store i32 0, i32* %a0
+ store i32 1, i32* %a1
+; CHECK-NOT: store
+
+ switch i32 %x, label %bb0 [ i32 1, label %bb1
+ i32 2, label %bb2
+ i32 3, label %bb3
+ i32 4, label %bb4
+ i32 5, label %bb5
+ i32 6, label %bb6
+ i32 7, label %bb7 ]
+
+bb0:
+ br label %exit
+bb1:
+ br label %exit
+bb2:
+ br label %exit
+bb3:
+ br label %exit
+bb4:
+ br label %exit
+bb5:
+ br label %exit
+bb6:
+ br label %exit
+bb7:
+ br label %exit
+
+exit:
+ %phi = phi i32* [ %a1, %bb0 ], [ %a0, %bb1 ], [ %a0, %bb2 ], [ %a1, %bb3 ],
+ [ %a1b, %bb4 ], [ %a0b, %bb5 ], [ %a0b, %bb6 ], [ %a1b, %bb7 ]
+; CHECK: phi i32 [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ], [ 1, %{{.*}} ], [ 0, %{{.*}} ], [ 0, %{{.*}} ], [ 1, %{{.*}} ]
+
+ %result = load i32* %phi
+ ret i32 %result
+}
+
+define i32 @test4() {
+; CHECK: @test4
+entry:
+ %a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+ %a0 = getelementptr [2 x i32]* %a, i64 0, i32 0
+ %a1 = getelementptr [2 x i32]* %a, i64 0, i32 1
+ store i32 0, i32* %a0
+ store i32 1, i32* %a1
+ %v0 = load i32* %a0
+ %v1 = load i32* %a1
+; CHECK-NOT: store
+; CHECK-NOT: load
+
+ %cond = icmp sle i32 %v0, %v1
+ %select = select i1 %cond, i32* %a0, i32* %a0
+; CHECK-NOT: select
+
+ %result = load i32* %select
+ ret i32 %result
+; CHECK: ret i32 0
+}
+
+define i32 @test5(i32* %b) {
+; CHECK: @test5
+entry:
+ %a = alloca [2 x i32]
+; CHECK-NOT: alloca
+
+ %a1 = getelementptr [2 x i32]* %a, i64 0, i32 1
+ store i32 1, i32* %a1
+; CHECK-NOT: store
+
+ %select = select i1 true, i32* %a1, i32* %b
+; CHECK-NOT: select
+
+ %result = load i32* %select
+; CHECK-NOT: load
+
+ ret i32 %result
+; CHECK: ret i32 1
+}
+
+declare void @f(i32*, i32*)
+
+define i32 @test6(i32* %b) {
+; CHECK: @test6
+entry:
+ %a = alloca [2 x i32]
+ %c = alloca i32
+; CHECK-NOT: alloca
+
+ %a1 = getelementptr [2 x i32]* %a, i64 0, i32 1
+ store i32 1, i32* %a1
+
+ %select = select i1 true, i32* %a1, i32* %b
+ %select2 = select i1 false, i32* %a1, i32* %b
+ %select3 = select i1 false, i32* %c, i32* %b
+; CHECK: %[[select2:.*]] = select i1 false, i32* undef, i32* %b
+; CHECK: %[[select3:.*]] = select i1 false, i32* undef, i32* %b
+
+ ; Note, this would potentially escape the alloca pointer except for the
+ ; constant folding of the select.
+ call void @f(i32* %select2, i32* %select3)
+; CHECK: call void @f(i32* %[[select2]], i32* %[[select3]])
+
+
+ %result = load i32* %select
+; CHECK-NOT: load
+
+ %dead = load i32* %c
+
+ ret i32 %result
+; CHECK: ret i32 1
+}
+
+define i32 @test7() {
+; CHECK: @test7
+; CHECK-NOT: alloca
+
+entry:
+ %X = alloca i32
+ br i1 undef, label %good, label %bad
+
+good:
+ %Y1 = getelementptr i32* %X, i64 0
+ store i32 0, i32* %Y1
+ br label %exit
+
+bad:
+ %Y2 = getelementptr i32* %X, i64 1
+ store i32 0, i32* %Y2
+ br label %exit
+
+exit:
+ %P = phi i32* [ %Y1, %good ], [ %Y2, %bad ]
+; CHECK: %[[phi:.*]] = phi i32 [ 0, %good ],
+ %Z2 = load i32* %P
+ ret i32 %Z2
+; CHECK: ret i32 %[[phi]]
+}
+
+define i32 @test8(i32 %b, i32* %ptr) {
+; Ensure that we rewrite allocas to the used type when that use is hidden by
+; a PHI that can be speculated.
+; CHECK: @test8
+; CHECK-NOT: alloca
+; CHECK-NOT: load
+; CHECK: %[[value:.*]] = load i32* %ptr
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i32 [ undef, %else ], [ %[[value]], %then ]
+; CHECK-NEXT: ret i32 %[[result]]
+
+entry:
+ %f = alloca float
+ %test = icmp ne i32 %b, 0
+ br i1 %test, label %then, label %else
+
+then:
+ br label %exit
+
+else:
+ %bitcast = bitcast float* %f to i32*
+ br label %exit
+
+exit:
+ %phi = phi i32* [ %bitcast, %else ], [ %ptr, %then ]
+ %loaded = load i32* %phi, align 4
+ ret i32 %loaded
+}
+
+define i32 @test9(i32 %b, i32* %ptr) {
+; Same as @test8 but for a select rather than a PHI node.
+; CHECK: @test9
+; CHECK-NOT: alloca
+; CHECK-NOT: load
+; CHECK: %[[value:.*]] = load i32* %ptr
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = select i1 %{{.*}}, i32 undef, i32 %[[value]]
+; CHECK-NEXT: ret i32 %[[result]]
+
+entry:
+ %f = alloca float
+ store i32 0, i32* %ptr
+ %test = icmp ne i32 %b, 0
+ %bitcast = bitcast float* %f to i32*
+ %select = select i1 %test, i32* %bitcast, i32* %ptr
+ %loaded = load i32* %select, align 4
+ ret i32 %loaded
+}
+
+define float @test10(i32 %b, float* %ptr) {
+; Don't try to promote allocas which are not elligible for it even after
+; rewriting due to the necessity of inserting bitcasts when speculating a PHI
+; node.
+; CHECK: @test10
+; CHECK: %[[alloca:.*]] = alloca
+; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
+; CHECK: %[[result:.*]] = phi float [ %[[allocavalue]], %else ], [ %[[argvalue]], %then ]
+; CHECK-NEXT: ret float %[[result]]
+
+entry:
+ %f = alloca double
+ store double 0.0, double* %f
+ %test = icmp ne i32 %b, 0
+ br i1 %test, label %then, label %else
+
+then:
+ br label %exit
+
+else:
+ %bitcast = bitcast double* %f to float*
+ br label %exit
+
+exit:
+ %phi = phi float* [ %bitcast, %else ], [ %ptr, %then ]
+ %loaded = load float* %phi, align 4
+ ret float %loaded
+}
+
+define float @test11(i32 %b, float* %ptr) {
+; Same as @test10 but for a select rather than a PHI node.
+; CHECK: @test11
+; CHECK: %[[alloca:.*]] = alloca
+; CHECK: %[[cast:.*]] = bitcast double* %[[alloca]] to float*
+; CHECK: %[[allocavalue:.*]] = load float* %[[cast]]
+; CHECK: %[[argvalue:.*]] = load float* %ptr
+; CHECK: %[[result:.*]] = select i1 %{{.*}}, float %[[allocavalue]], float %[[argvalue]]
+; CHECK-NEXT: ret float %[[result]]
+
+entry:
+ %f = alloca double
+ store double 0.0, double* %f
+ store float 0.0, float* %ptr
+ %test = icmp ne i32 %b, 0
+ %bitcast = bitcast double* %f to float*
+ %select = select i1 %test, float* %bitcast, float* %ptr
+ %loaded = load float* %select, align 4
+ ret float %loaded
+}
+
+define i32 @test12(i32 %x, i32* %p) {
+; Ensure we don't crash or fail to nuke dead selects of allocas if no load is
+; never found.
+; CHECK: @test12
+; CHECK-NOT: alloca
+; CHECK-NOT: select
+; CHECK: ret i32 %x
+
+entry:
+ %a = alloca i32
+ store i32 %x, i32* %a
+ %dead = select i1 undef, i32* %a, i32* %p
+ %load = load i32* %a
+ ret i32 %load
+}
+
+define i32 @test13(i32 %x, i32* %p) {
+; Ensure we don't crash or fail to nuke dead phis of allocas if no load is ever
+; found.
+; CHECK: @test13
+; CHECK-NOT: alloca
+; CHECK-NOT: phi
+; CHECK: ret i32 %x
+
+entry:
+ %a = alloca i32
+ store i32 %x, i32* %a
+ br label %loop
+
+loop:
+ %phi = phi i32* [ %p, %entry ], [ %a, %loop ]
+ br i1 undef, label %loop, label %exit
+
+exit:
+ %load = load i32* %a
+ ret i32 %load
+}
+
+define i32 @PR13905() {
+; Check a pattern where we have a chain of dead phi nodes to ensure they are
+; deleted and promotion can proceed.
+; CHECK: @PR13905
+; CHECK-NOT: alloca i32
+; CHECK: ret i32 undef
+
+entry:
+ %h = alloca i32
+ store i32 0, i32* %h
+ br i1 undef, label %loop1, label %exit
+
+loop1:
+ %phi1 = phi i32* [ null, %entry ], [ %h, %loop1 ], [ %h, %loop2 ]
+ br i1 undef, label %loop1, label %loop2
+
+loop2:
+ br i1 undef, label %loop1, label %exit
+
+exit:
+ %phi2 = phi i32* [ %phi1, %loop2 ], [ null, %entry ]
+ ret i32 undef
+}
+
+define i32 @PR13906() {
+; Another pattern which can lead to crashes due to failing to clear out dead
+; PHI nodes or select nodes. This triggers subtly differently from the above
+; cases because the PHI node is (recursively) alive, but the select is dead.
+; CHECK: @PR13906
+; CHECK-NOT: alloca
+
+entry:
+ %c = alloca i32
+ store i32 0, i32* %c
+ br label %for.cond
+
+for.cond:
+ %d.0 = phi i32* [ undef, %entry ], [ %c, %if.then ], [ %d.0, %for.cond ]
+ br i1 undef, label %if.then, label %for.cond
+
+if.then:
+ %tmpcast.d.0 = select i1 undef, i32* %c, i32* %d.0
+ br label %for.cond
+}
+
+define i64 @PR14132(i1 %flag) {
+; CHECK: @PR14132
+; Here we form a PHI-node by promoting the pointer alloca first, and then in
+; order to promote the other two allocas, we speculate the load of the
+; now-phi-node-pointer. In doing so we end up loading a 64-bit value from an i8
+; alloca, which is completely bogus. However, we were asserting on trying to
+; rewrite it. Now it is replaced with undef. Eventually we may replace it with
+; unrechable and even the CFG will go away here.
+entry:
+ %a = alloca i64
+ %b = alloca i8
+ %ptr = alloca i64*
+; CHECK-NOT: alloca
+
+ %ptr.cast = bitcast i64** %ptr to i8**
+ store i64 0, i64* %a
+ store i8 1, i8* %b
+ store i64* %a, i64** %ptr
+ br i1 %flag, label %if.then, label %if.end
+
+if.then:
+ store i8* %b, i8** %ptr.cast
+ br label %if.end
+
+if.end:
+ %tmp = load i64** %ptr
+ %result = load i64* %tmp
+; CHECK-NOT: store
+; CHECK-NOT: load
+; CHECK: %[[result:.*]] = phi i64 [ undef, %if.then ], [ 0, %entry ]
+
+ ret i64 %result
+; CHECK-NEXT: ret i64 %[[result]]
+}
diff --git a/test/Transforms/SROA/vector-promotion.ll b/test/Transforms/SROA/vector-promotion.ll
new file mode 100644
index 000000000000..ea28f5d1a647
--- /dev/null
+++ b/test/Transforms/SROA/vector-promotion.ll
@@ -0,0 +1,267 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n8:16:32:64"
+
+%S1 = type { i64, [42 x float] }
+
+define i32 @test1(<4 x i32> %x, <4 x i32> %y) {
+; CHECK: @test1
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK: extractelement <4 x i32> %x, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test2(<4 x i32> %x, <4 x i32> %y) {
+; CHECK: @test2
+; FIXME: This should be handled!
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK: alloca <4 x i32>
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %a.tmp3.cast = bitcast i32* %a.tmp3 to <2 x i32>*
+ %tmp3.vec = load <2 x i32>* %a.tmp3.cast
+ %tmp3 = extractelement <2 x i32> %tmp3.vec, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+}
+
+define i32 @test3(<4 x i32> %x, <4 x i32> %y) {
+; CHECK: @test3
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+ call void @llvm.memset.p0i8.i32(i8* %a.y.cast, i8 0, i32 16, i32 1, i1 false)
+; CHECK-NOT: memset
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+ call void @llvm.memset.p0i8.i32(i8* %a.tmp1.cast, i8 -1, i32 4, i32 1, i1 false)
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: load
+; CHECK: %[[insert:.*]] = insertelement <4 x i32> %x, i32 -1, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 3
+; CHECK-NEXT: extractelement <4 x i32> zeroinitializer, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test4(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
+; CHECK: @test4
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+ %z.cast = bitcast <4 x i32>* %z to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.y.cast, i8* %z.cast, i32 16, i32 1, i1 false)
+; CHECK-NOT: memcpy
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+ %z.tmp1 = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+ %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.tmp1.cast, i8* %z.tmp1.cast, i32 4, i32 1, i1 false)
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK: %[[load:.*]] = load <4 x i32>* %z
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+; CHECK-NEXT: %[[element_load:.*]] = load i32* %[[gep]]
+; CHECK-NEXT: %[[insert:.*]] = insertelement <4 x i32> %x, i32 %[[element_load]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[insert]], i32 2
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 3
+; CHECK-NEXT: extractelement <4 x i32> %[[load]], i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+define i32 @test5(<4 x i32> %x, <4 x i32> %y, <4 x i32>* %z) {
+; CHECK: @test5
+; The same as the above, but with reversed source and destination for the
+; element memcpy, and a self copy.
+entry:
+ %a = alloca [2 x <4 x i32>]
+; CHECK-NOT: alloca
+
+ %a.x = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0
+ store <4 x i32> %x, <4 x i32>* %a.x
+ %a.y = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1
+ store <4 x i32> %y, <4 x i32>* %a.y
+; CHECK-NOT: store
+
+ %a.y.cast = bitcast <4 x i32>* %a.y to i8*
+ %a.x.cast = bitcast <4 x i32>* %a.x to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %a.x.cast, i8* %a.y.cast, i32 16, i32 1, i1 false)
+; CHECK-NOT: memcpy
+
+ %a.tmp1 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 0, i64 2
+ %a.tmp1.cast = bitcast i32* %a.tmp1 to i8*
+ %z.tmp1 = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+ %z.tmp1.cast = bitcast i32* %z.tmp1 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %z.tmp1.cast, i8* %a.tmp1.cast, i32 4, i32 1, i1 false)
+ %tmp1 = load i32* %a.tmp1
+ %a.tmp2 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 3
+ %tmp2 = load i32* %a.tmp2
+ %a.tmp3 = getelementptr inbounds [2 x <4 x i32>]* %a, i64 0, i64 1, i64 0
+ %tmp3 = load i32* %a.tmp3
+; CHECK-NOT: memcpy
+; CHECK: %[[gep:.*]] = getelementptr inbounds <4 x i32>* %z, i64 0, i64 2
+; CHECK-NEXT: %[[extract:.*]] = extractelement <4 x i32> %y, i32 2
+; CHECK-NEXT: store i32 %[[extract]], i32* %[[gep]]
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 2
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 3
+; CHECK-NEXT: extractelement <4 x i32> %y, i32 0
+
+ %tmp4 = add i32 %tmp1, %tmp2
+ %tmp5 = add i32 %tmp3, %tmp4
+ ret i32 %tmp5
+; CHECK-NEXT: add
+; CHECK-NEXT: add
+; CHECK-NEXT: ret
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
+
+define i64 @test6(<4 x i64> %x, <4 x i64> %y, i64 %n) {
+; CHECK: @test6
+; The old scalarrepl pass would wrongly drop the store to the second alloca.
+; PR13254
+ %tmp = alloca { <4 x i64>, <4 x i64> }
+ %p0 = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0
+ store <4 x i64> %x, <4 x i64>* %p0
+; CHECK: store <4 x i64> %x,
+ %p1 = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 1
+ store <4 x i64> %y, <4 x i64>* %p1
+; CHECK: store <4 x i64> %y,
+ %addr = getelementptr inbounds { <4 x i64>, <4 x i64> }* %tmp, i32 0, i32 0, i64 %n
+ %res = load i64* %addr, align 4
+ ret i64 %res
+}
+
+define i32 @PR14212() {
+; CHECK: @PR14212
+; This caused a crash when "splitting" the load of the i32 in order to promote
+; the store of <3 x i8> properly. Heavily reduced from an OpenCL test case.
+entry:
+ %retval = alloca <3 x i8>, align 4
+; CHECK-NOT: alloca
+
+ store <3 x i8> undef, <3 x i8>* %retval, align 4
+ %cast = bitcast <3 x i8>* %retval to i32*
+ %load = load i32* %cast, align 4
+ ret i32 %load
+; CHECK: ret i32
+}
+
+define <2 x i8> @PR14349.1(i32 %x) {
+; CEHCK: @PR14349.1
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split store of an integer to a store of a vector.
+entry:
+ %a = alloca i32
+; CHECK-NOT: alloca
+
+ store i32 %x, i32* %a
+; CHECK-NOT: store
+
+ %cast = bitcast i32* %a to <2 x i8>*
+ %vec = load <2 x i8>* %cast
+; CHECK-NOT: load
+
+ ret <2 x i8> %vec
+; CHECK: %[[trunc:.*]] = trunc i32 %x to i16
+; CHECK: %[[cast:.*]] = bitcast i16 %[[trunc]] to <2 x i8>
+; CHECK: ret <2 x i8> %[[cast]]
+}
+
+define i32 @PR14349.2(<2 x i8> %x) {
+; CEHCK: @PR14349.2
+; The first testcase for broken SROA rewriting of split integer loads and
+; stores due to smaller vector loads and stores. This particular test ensures
+; that we can rewrite a split load of an integer to a load of a vector.
+entry:
+ %a = alloca i32
+; CHECK-NOT: alloca
+
+ %cast = bitcast i32* %a to <2 x i8>*
+ store <2 x i8> %x, <2 x i8>* %cast
+; CHECK-NOT: store
+
+ %int = load i32* %a
+; CHECK-NOT: load
+
+ ret i32 %int
+; CHECK: %[[cast:.*]] = bitcast <2 x i8> %x to i16
+; CHECK: %[[trunc:.*]] = zext i16 %[[cast]] to i32
+; CHECK: %[[insert:.*]] = or i32 %{{.*}}, %[[trunc]]
+; CHECK: ret i32 %[[insert]]
+}
diff --git a/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
new file mode 100644
index 000000000000..786fee9e6610
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/SPARC/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'Sparc' in targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll
new file mode 100644
index 000000000000..9d1568557f30
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/SPARC/switch_to_lookup_table.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -simplifycfg -S -mtriple=sparc-unknown-unknown | FileCheck %s
+
+; Check that switches are not turned into lookup tables, as this is not
+; considered profitable on the target.
+
+define i32 @f(i32 %c) nounwind uwtable readnone {
+entry:
+ switch i32 %c, label %sw.default [
+ i32 42, label %return
+ i32 43, label %sw.bb1
+ i32 44, label %sw.bb2
+ i32 45, label %sw.bb3
+ i32 46, label %sw.bb4
+ i32 47, label %sw.bb5
+ i32 48, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+
+; CHECK: @f
+; CHECK-NOT: getelementptr
+; CHECK: switch i32 %c
+}
diff --git a/test/Transforms/SimplifyCFG/X86/lit.local.cfg b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
new file mode 100644
index 000000000000..a8ad0f1a28b2
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/X86/lit.local.cfg
@@ -0,0 +1,6 @@
+config.suffixes = ['.ll', '.c', '.cpp']
+
+targets = set(config.root.targets_to_build.split())
+if not 'X86' in targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
new file mode 100644
index 000000000000..8a59992f5e64
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -0,0 +1,779 @@
+; RUN: opt < %s -simplifycfg -S -mtriple=x86_64-unknown-linux-gnu | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; The table for @f
+; CHECK: @switch.table = private unnamed_addr constant [7 x i32] [i32 55, i32 123, i32 0, i32 -1, i32 27, i32 62, i32 1]
+
+; The float table for @h
+; CHECK: @switch.table1 = private unnamed_addr constant [4 x float] [float 0x40091EB860000000, float 0x3FF3BE76C0000000, float 0x4012449BA0000000, float 0x4001AE1480000000]
+
+; The table for @foostring
+; CHECK: @switch.table2 = private unnamed_addr constant [4 x i8*] [i8* getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0)]
+
+; The table for @earlyreturncrash
+; CHECK: @switch.table3 = private unnamed_addr constant [4 x i32] [i32 42, i32 9, i32 88, i32 5]
+
+; The table for @large.
+; CHECK: @switch.table4 = private unnamed_addr constant [199 x i32] [i32 1, i32 4, i32 9,
+
+; The table for @cprop
+; CHECK: @switch.table5 = private unnamed_addr constant [7 x i32] [i32 5, i32 42, i32 126, i32 -452, i32 128, i32 6, i32 7]
+
+; The table for @unreachable
+; CHECK: @switch.table6 = private unnamed_addr constant [5 x i32] [i32 0, i32 0, i32 0, i32 1, i32 -1]
+
+; A simple int-to-int selection switch.
+; It is dense enough to be replaced by table lookup.
+; The result is directly by a ret from an otherwise empty bb,
+; so we return early, directly from the lookup bb.
+
+define i32 @f(i32 %c) {
+entry:
+ switch i32 %c, label %sw.default [
+ i32 42, label %return
+ i32 43, label %sw.bb1
+ i32 44, label %sw.bb2
+ i32 45, label %sw.bb3
+ i32 46, label %sw.bb4
+ i32 47, label %sw.bb5
+ i32 48, label %sw.bb6
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.default: br label %return
+return:
+ %retval.0 = phi i32 [ 15, %sw.default ], [ 1, %sw.bb6 ], [ 62, %sw.bb5 ], [ 27, %sw.bb4 ], [ -1, %sw.bb3 ], [ 0, %sw.bb2 ], [ 123, %sw.bb1 ], [ 55, %entry ]
+ ret i32 %retval.0
+
+; CHECK: @f
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %c, 42
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 7
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
+; CHECK: return:
+; CHECK-NEXT: ret i32 15
+}
+
+; A switch used to initialize two variables, an i8 and a float.
+
+declare void @dummy(i8 signext, float)
+define void @h(i32 %x) {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %sw.epilog
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+
+sw.epilog:
+ %a.0 = phi i8 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ]
+ %b.0 = phi float [ 0x4023FAE140000000, %sw.default ], [ 0x4001AE1480000000, %sw.bb3 ], [ 0x4012449BA0000000, %sw.bb2 ], [ 0x3FF3BE76C0000000, %sw.bb1 ], [ 0x40091EB860000000, %entry ]
+ call void @dummy(i8 signext %a.0, float %b.0)
+ ret void
+
+; CHECK: @h
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %sw.epilog
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.shiftamt = mul i32 %switch.tableidx, 8
+; CHECK-NEXT: %switch.downshift = lshr i32 89655594, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i32 %switch.downshift to i8
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x float]* @switch.table1, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load float* %switch.gep
+; CHECK-NEXT: br label %sw.epilog
+; CHECK: sw.epilog:
+; CHECK-NEXT: %a.0 = phi i8 [ %switch.masked, %switch.lookup ], [ 7, %entry ]
+; CHECK-NEXT: %b.0 = phi float [ %switch.load, %switch.lookup ], [ 0x4023FAE140000000, %entry ]
+; CHECK-NEXT: call void @dummy(i8 signext %a.0, float %b.0)
+; CHECK-NEXT: ret void
+}
+
+
+; Switch used to return a string.
+
+@.str = private unnamed_addr constant [4 x i8] c"foo\00", align 1
+@.str1 = private unnamed_addr constant [4 x i8] c"bar\00", align 1
+@.str2 = private unnamed_addr constant [4 x i8] c"baz\00", align 1
+@.str3 = private unnamed_addr constant [4 x i8] c"qux\00", align 1
+@.str4 = private unnamed_addr constant [6 x i8] c"error\00", align 1
+
+define i8* @foostring(i32 %x) {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %return
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: br label %return
+
+return:
+ %retval.0 = phi i8* [ getelementptr inbounds ([6 x i8]* @.str4, i64 0, i64 0), %sw.default ],
+ [ getelementptr inbounds ([4 x i8]* @.str3, i64 0, i64 0), %sw.bb3 ],
+ [ getelementptr inbounds ([4 x i8]* @.str2, i64 0, i64 0), %sw.bb2 ],
+ [ getelementptr inbounds ([4 x i8]* @.str1, i64 0, i64 0), %sw.bb1 ],
+ [ getelementptr inbounds ([4 x i8]* @.str, i64 0, i64 0), %entry ]
+ ret i8* %retval.0
+
+; CHECK: @foostring
+; CHECK: entry:
+; CHECK-NEXT: %switch.tableidx = sub i32 %x, 0
+; CHECK-NEXT: %0 = icmp ult i32 %switch.tableidx, 4
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %return
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i8*]* @switch.table2, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i8** %switch.gep
+; CHECK-NEXT: ret i8* %switch.load
+}
+
+; Switch used to initialize two values. The first value is returned, the second
+; value is not used. This used to make the transformation generate illegal code.
+
+define i32 @earlyreturncrash(i32 %x) {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %sw.epilog
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb1: br label %sw.epilog
+sw.bb2: br label %sw.epilog
+sw.bb3: br label %sw.epilog
+sw.default: br label %sw.epilog
+
+sw.epilog:
+ %a.0 = phi i32 [ 7, %sw.default ], [ 5, %sw.bb3 ], [ 88, %sw.bb2 ], [ 9, %sw.bb1 ], [ 42, %entry ]
+ %b.0 = phi i32 [ 10, %sw.default ], [ 5, %sw.bb3 ], [ 1, %sw.bb2 ], [ 4, %sw.bb1 ], [ 3, %entry ]
+ ret i32 %a.0
+
+; CHECK: @earlyreturncrash
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.gep = getelementptr inbounds [4 x i32]* @switch.table3, i32 0, i32 %switch.tableidx
+; CHECK-NEXT: %switch.load = load i32* %switch.gep
+; CHECK-NEXT: ret i32 %switch.load
+; CHECK: sw.epilog:
+; CHECK-NEXT: ret i32 7
+}
+
+
+; Example 7 from http://blog.regehr.org/archives/320
+; It is not dense enough for a regular table, but the results
+; can be packed into a bitmap.
+
+define i32 @crud(i8 zeroext %c) {
+entry:
+ %cmp = icmp ult i8 %c, 33
+ br i1 %cmp, label %lor.end, label %switch.early.test
+
+switch.early.test:
+ switch i8 %c, label %lor.rhs [
+ i8 92, label %lor.end
+ i8 62, label %lor.end
+ i8 60, label %lor.end
+ i8 59, label %lor.end
+ i8 58, label %lor.end
+ i8 46, label %lor.end
+ i8 44, label %lor.end
+ i8 34, label %lor.end
+ i8 39, label %switch.edge
+ ]
+
+switch.edge: br label %lor.end
+lor.rhs: br label %lor.end
+
+lor.end:
+ %0 = phi i1 [ true, %switch.early.test ],
+ [ false, %lor.rhs ],
+ [ true, %entry ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.early.test ],
+ [ true, %switch.edge ]
+ %lor.ext = zext i1 %0 to i32
+ ret i32 %lor.ext
+
+; CHECK: @crud
+; CHECK: entry:
+; CHECK-NEXT: %cmp = icmp ult i8 %c, 33
+; CHECK-NEXT: br i1 %cmp, label %lor.end, label %switch.early.test
+; CHECK: switch.early.test:
+; CHECK-NEXT: %switch.tableidx = sub i8 %c, 34
+; CHECK-NEXT: %0 = icmp ult i8 %switch.tableidx, 59
+; CHECK-NEXT: br i1 %0, label %switch.lookup, label %lor.end
+; CHECK: switch.lookup:
+; CHECK-NEXT: %switch.cast = zext i8 %switch.tableidx to i59
+; CHECK-NEXT: %switch.shiftamt = mul i59 %switch.cast, 1
+; CHECK-NEXT: %switch.downshift = lshr i59 -288230375765830623, %switch.shiftamt
+; CHECK-NEXT: %switch.masked = trunc i59 %switch.downshift to i1
+; CHECK-NEXT: br label %lor.end
+; CHECK: lor.end:
+; CHECK-NEXT: %1 = phi i1 [ true, %entry ], [ %switch.masked, %switch.lookup ], [ false, %switch.early.test ]
+; CHECK-NEXT: %lor.ext = zext i1 %1 to i32
+; CHECK-NEXT: ret i32 %lor.ext
+}
+
+; PR13946
+define i32 @overflow(i32 %type) {
+entry:
+ switch i32 %type, label %sw.default [
+ i32 -2147483648, label %sw.bb
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 -2147483645, label %sw.bb3
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb: br label %if.end
+sw.bb1: br label %if.end
+sw.bb2: br label %if.end
+sw.bb3: br label %if.end
+sw.default: br label %if.end
+if.else: br label %if.end
+
+if.end:
+ %dirent_type.0 = phi i32 [ 3, %sw.default ], [ 6, %sw.bb3 ], [ 5, %sw.bb2 ], [ 0, %sw.bb1 ], [ 3, %sw.bb ], [ 0, %if.else ]
+ ret i32 %dirent_type.0
+; CHECK: define i32 @overflow
+; CHECK: switch
+; CHECK: phi
+}
+
+; PR13985
+define i1 @undef(i32 %tmp) {
+bb:
+ switch i32 %tmp, label %bb3 [
+ i32 0, label %bb1
+ i32 1, label %bb1
+ i32 7, label %bb2
+ i32 8, label %bb2
+ ]
+
+bb1: br label %bb3
+bb2: br label %bb3
+
+bb3:
+ %tmp4 = phi i1 [ undef, %bb ], [ false, %bb2 ], [ true, %bb1 ]
+ ret i1 %tmp4
+; CHECK: define i1 @undef
+; CHECK: %switch.cast = trunc i32 %switch.tableidx to i9
+; CHECK: %switch.downshift = lshr i9 3, %switch.shiftamt
+}
+
+; Also handle large switches that would be rejected by
+; isValueEqualityComparison()
+; CHECK: large
+; CHECK-NOT: switch i32
+define i32 @large(i32 %x) {
+entry:
+ %cmp = icmp slt i32 %x, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %mul = mul i32 %x, -10
+ br label %if.end
+
+if.end:
+ %x.addr.0 = phi i32 [ %mul, %if.then ], [ %x, %entry ]
+ switch i32 %x.addr.0, label %return [
+ i32 199, label %sw.bb203
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ i32 4, label %sw.bb4
+ i32 5, label %sw.bb5
+ i32 6, label %sw.bb6
+ i32 7, label %sw.bb7
+ i32 8, label %sw.bb8
+ i32 9, label %sw.bb9
+ i32 10, label %sw.bb10
+ i32 11, label %sw.bb11
+ i32 12, label %sw.bb12
+ i32 13, label %sw.bb13
+ i32 14, label %sw.bb14
+ i32 15, label %sw.bb15
+ i32 16, label %sw.bb16
+ i32 17, label %sw.bb17
+ i32 18, label %sw.bb18
+ i32 19, label %sw.bb19
+ i32 20, label %sw.bb20
+ i32 21, label %sw.bb21
+ i32 22, label %sw.bb22
+ i32 23, label %sw.bb23
+ i32 24, label %sw.bb24
+ i32 25, label %sw.bb25
+ i32 26, label %sw.bb26
+ i32 27, label %sw.bb27
+ i32 28, label %sw.bb28
+ i32 29, label %sw.bb29
+ i32 30, label %sw.bb30
+ i32 31, label %sw.bb31
+ i32 32, label %sw.bb32
+ i32 33, label %sw.bb33
+ i32 34, label %sw.bb34
+ i32 35, label %sw.bb35
+ i32 36, label %sw.bb37
+ i32 37, label %sw.bb38
+ i32 38, label %sw.bb39
+ i32 39, label %sw.bb40
+ i32 40, label %sw.bb41
+ i32 41, label %sw.bb42
+ i32 42, label %sw.bb43
+ i32 43, label %sw.bb44
+ i32 44, label %sw.bb45
+ i32 45, label %sw.bb47
+ i32 46, label %sw.bb48
+ i32 47, label %sw.bb49
+ i32 48, label %sw.bb50
+ i32 49, label %sw.bb51
+ i32 50, label %sw.bb52
+ i32 51, label %sw.bb53
+ i32 52, label %sw.bb54
+ i32 53, label %sw.bb55
+ i32 54, label %sw.bb56
+ i32 55, label %sw.bb58
+ i32 56, label %sw.bb59
+ i32 57, label %sw.bb60
+ i32 58, label %sw.bb61
+ i32 59, label %sw.bb62
+ i32 60, label %sw.bb63
+ i32 61, label %sw.bb64
+ i32 62, label %sw.bb65
+ i32 63, label %sw.bb66
+ i32 64, label %sw.bb67
+ i32 65, label %sw.bb68
+ i32 66, label %sw.bb69
+ i32 67, label %sw.bb70
+ i32 68, label %sw.bb71
+ i32 69, label %sw.bb72
+ i32 70, label %sw.bb73
+ i32 71, label %sw.bb74
+ i32 72, label %sw.bb76
+ i32 73, label %sw.bb77
+ i32 74, label %sw.bb78
+ i32 75, label %sw.bb79
+ i32 76, label %sw.bb80
+ i32 77, label %sw.bb81
+ i32 78, label %sw.bb82
+ i32 79, label %sw.bb83
+ i32 80, label %sw.bb84
+ i32 81, label %sw.bb85
+ i32 82, label %sw.bb86
+ i32 83, label %sw.bb87
+ i32 84, label %sw.bb88
+ i32 85, label %sw.bb89
+ i32 86, label %sw.bb90
+ i32 87, label %sw.bb91
+ i32 88, label %sw.bb92
+ i32 89, label %sw.bb93
+ i32 90, label %sw.bb94
+ i32 91, label %sw.bb95
+ i32 92, label %sw.bb96
+ i32 93, label %sw.bb97
+ i32 94, label %sw.bb98
+ i32 95, label %sw.bb99
+ i32 96, label %sw.bb100
+ i32 97, label %sw.bb101
+ i32 98, label %sw.bb102
+ i32 99, label %sw.bb103
+ i32 100, label %sw.bb104
+ i32 101, label %sw.bb105
+ i32 102, label %sw.bb106
+ i32 103, label %sw.bb107
+ i32 104, label %sw.bb108
+ i32 105, label %sw.bb109
+ i32 106, label %sw.bb110
+ i32 107, label %sw.bb111
+ i32 108, label %sw.bb112
+ i32 109, label %sw.bb113
+ i32 110, label %sw.bb114
+ i32 111, label %sw.bb115
+ i32 112, label %sw.bb116
+ i32 113, label %sw.bb117
+ i32 114, label %sw.bb118
+ i32 115, label %sw.bb119
+ i32 116, label %sw.bb120
+ i32 117, label %sw.bb121
+ i32 118, label %sw.bb122
+ i32 119, label %sw.bb123
+ i32 120, label %sw.bb124
+ i32 121, label %sw.bb125
+ i32 122, label %sw.bb126
+ i32 123, label %sw.bb127
+ i32 124, label %sw.bb128
+ i32 125, label %sw.bb129
+ i32 126, label %sw.bb130
+ i32 127, label %sw.bb131
+ i32 128, label %sw.bb132
+ i32 129, label %sw.bb133
+ i32 130, label %sw.bb134
+ i32 131, label %sw.bb135
+ i32 132, label %sw.bb136
+ i32 133, label %sw.bb137
+ i32 134, label %sw.bb138
+ i32 135, label %sw.bb139
+ i32 136, label %sw.bb140
+ i32 137, label %sw.bb141
+ i32 138, label %sw.bb142
+ i32 139, label %sw.bb143
+ i32 140, label %sw.bb144
+ i32 141, label %sw.bb145
+ i32 142, label %sw.bb146
+ i32 143, label %sw.bb147
+ i32 144, label %sw.bb148
+ i32 145, label %sw.bb149
+ i32 146, label %sw.bb150
+ i32 147, label %sw.bb151
+ i32 148, label %sw.bb152
+ i32 149, label %sw.bb153
+ i32 150, label %sw.bb154
+ i32 151, label %sw.bb155
+ i32 152, label %sw.bb156
+ i32 153, label %sw.bb157
+ i32 154, label %sw.bb158
+ i32 155, label %sw.bb159
+ i32 156, label %sw.bb160
+ i32 157, label %sw.bb161
+ i32 158, label %sw.bb162
+ i32 159, label %sw.bb163
+ i32 160, label %sw.bb164
+ i32 161, label %sw.bb165
+ i32 162, label %sw.bb166
+ i32 163, label %sw.bb167
+ i32 164, label %sw.bb168
+ i32 165, label %sw.bb169
+ i32 166, label %sw.bb170
+ i32 167, label %sw.bb171
+ i32 168, label %sw.bb172
+ i32 169, label %sw.bb173
+ i32 170, label %sw.bb174
+ i32 171, label %sw.bb175
+ i32 172, label %sw.bb176
+ i32 173, label %sw.bb177
+ i32 174, label %sw.bb178
+ i32 175, label %sw.bb179
+ i32 176, label %sw.bb180
+ i32 177, label %sw.bb181
+ i32 178, label %sw.bb182
+ i32 179, label %sw.bb183
+ i32 180, label %sw.bb184
+ i32 181, label %sw.bb185
+ i32 182, label %sw.bb186
+ i32 183, label %sw.bb187
+ i32 184, label %sw.bb188
+ i32 185, label %sw.bb189
+ i32 186, label %sw.bb190
+ i32 187, label %sw.bb191
+ i32 188, label %sw.bb192
+ i32 189, label %sw.bb193
+ i32 190, label %sw.bb194
+ i32 191, label %sw.bb195
+ i32 192, label %sw.bb196
+ i32 193, label %sw.bb197
+ i32 194, label %sw.bb198
+ i32 195, label %sw.bb199
+ i32 196, label %sw.bb200
+ i32 197, label %sw.bb201
+ i32 198, label %sw.bb202
+ ]
+
+sw.bb1: br label %return
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.bb4: br label %return
+sw.bb5: br label %return
+sw.bb6: br label %return
+sw.bb7: br label %return
+sw.bb8: br label %return
+sw.bb9: br label %return
+sw.bb10: br label %return
+sw.bb11: br label %return
+sw.bb12: br label %return
+sw.bb13: br label %return
+sw.bb14: br label %return
+sw.bb15: br label %return
+sw.bb16: br label %return
+sw.bb17: br label %return
+sw.bb18: br label %return
+sw.bb19: br label %return
+sw.bb20: br label %return
+sw.bb21: br label %return
+sw.bb22: br label %return
+sw.bb23: br label %return
+sw.bb24: br label %return
+sw.bb25: br label %return
+sw.bb26: br label %return
+sw.bb27: br label %return
+sw.bb28: br label %return
+sw.bb29: br label %return
+sw.bb30: br label %return
+sw.bb31: br label %return
+sw.bb32: br label %return
+sw.bb33: br label %return
+sw.bb34: br label %return
+sw.bb35: br label %return
+sw.bb37: br label %return
+sw.bb38: br label %return
+sw.bb39: br label %return
+sw.bb40: br label %return
+sw.bb41: br label %return
+sw.bb42: br label %return
+sw.bb43: br label %return
+sw.bb44: br label %return
+sw.bb45: br label %return
+sw.bb47: br label %return
+sw.bb48: br label %return
+sw.bb49: br label %return
+sw.bb50: br label %return
+sw.bb51: br label %return
+sw.bb52: br label %return
+sw.bb53: br label %return
+sw.bb54: br label %return
+sw.bb55: br label %return
+sw.bb56: br label %return
+sw.bb58: br label %return
+sw.bb59: br label %return
+sw.bb60: br label %return
+sw.bb61: br label %return
+sw.bb62: br label %return
+sw.bb63: br label %return
+sw.bb64: br label %return
+sw.bb65: br label %return
+sw.bb66: br label %return
+sw.bb67: br label %return
+sw.bb68: br label %return
+sw.bb69: br label %return
+sw.bb70: br label %return
+sw.bb71: br label %return
+sw.bb72: br label %return
+sw.bb73: br label %return
+sw.bb74: br label %return
+sw.bb76: br label %return
+sw.bb77: br label %return
+sw.bb78: br label %return
+sw.bb79: br label %return
+sw.bb80: br label %return
+sw.bb81: br label %return
+sw.bb82: br label %return
+sw.bb83: br label %return
+sw.bb84: br label %return
+sw.bb85: br label %return
+sw.bb86: br label %return
+sw.bb87: br label %return
+sw.bb88: br label %return
+sw.bb89: br label %return
+sw.bb90: br label %return
+sw.bb91: br label %return
+sw.bb92: br label %return
+sw.bb93: br label %return
+sw.bb94: br label %return
+sw.bb95: br label %return
+sw.bb96: br label %return
+sw.bb97: br label %return
+sw.bb98: br label %return
+sw.bb99: br label %return
+sw.bb100: br label %return
+sw.bb101: br label %return
+sw.bb102: br label %return
+sw.bb103: br label %return
+sw.bb104: br label %return
+sw.bb105: br label %return
+sw.bb106: br label %return
+sw.bb107: br label %return
+sw.bb108: br label %return
+sw.bb109: br label %return
+sw.bb110: br label %return
+sw.bb111: br label %return
+sw.bb112: br label %return
+sw.bb113: br label %return
+sw.bb114: br label %return
+sw.bb115: br label %return
+sw.bb116: br label %return
+sw.bb117: br label %return
+sw.bb118: br label %return
+sw.bb119: br label %return
+sw.bb120: br label %return
+sw.bb121: br label %return
+sw.bb122: br label %return
+sw.bb123: br label %return
+sw.bb124: br label %return
+sw.bb125: br label %return
+sw.bb126: br label %return
+sw.bb127: br label %return
+sw.bb128: br label %return
+sw.bb129: br label %return
+sw.bb130: br label %return
+sw.bb131: br label %return
+sw.bb132: br label %return
+sw.bb133: br label %return
+sw.bb134: br label %return
+sw.bb135: br label %return
+sw.bb136: br label %return
+sw.bb137: br label %return
+sw.bb138: br label %return
+sw.bb139: br label %return
+sw.bb140: br label %return
+sw.bb141: br label %return
+sw.bb142: br label %return
+sw.bb143: br label %return
+sw.bb144: br label %return
+sw.bb145: br label %return
+sw.bb146: br label %return
+sw.bb147: br label %return
+sw.bb148: br label %return
+sw.bb149: br label %return
+sw.bb150: br label %return
+sw.bb151: br label %return
+sw.bb152: br label %return
+sw.bb153: br label %return
+sw.bb154: br label %return
+sw.bb155: br label %return
+sw.bb156: br label %return
+sw.bb157: br label %return
+sw.bb158: br label %return
+sw.bb159: br label %return
+sw.bb160: br label %return
+sw.bb161: br label %return
+sw.bb162: br label %return
+sw.bb163: br label %return
+sw.bb164: br label %return
+sw.bb165: br label %return
+sw.bb166: br label %return
+sw.bb167: br label %return
+sw.bb168: br label %return
+sw.bb169: br label %return
+sw.bb170: br label %return
+sw.bb171: br label %return
+sw.bb172: br label %return
+sw.bb173: br label %return
+sw.bb174: br label %return
+sw.bb175: br label %return
+sw.bb176: br label %return
+sw.bb177: br label %return
+sw.bb178: br label %return
+sw.bb179: br label %return
+sw.bb180: br label %return
+sw.bb181: br label %return
+sw.bb182: br label %return
+sw.bb183: br label %return
+sw.bb184: br label %return
+sw.bb185: br label %return
+sw.bb186: br label %return
+sw.bb187: br label %return
+sw.bb188: br label %return
+sw.bb189: br label %return
+sw.bb190: br label %return
+sw.bb191: br label %return
+sw.bb192: br label %return
+sw.bb193: br label %return
+sw.bb194: br label %return
+sw.bb195: br label %return
+sw.bb196: br label %return
+sw.bb197: br label %return
+sw.bb198: br label %return
+sw.bb199: br label %return
+sw.bb200: br label %return
+sw.bb201: br label %return
+sw.bb202: br label %return
+sw.bb203: br label %return
+
+return:
+ %retval.0 = phi i32 [ 39204, %sw.bb202 ], [ 38809, %sw.bb201 ], [ 38416, %sw.bb200 ], [ 38025, %sw.bb199 ], [ 37636, %sw.bb198 ], [ 37249, %sw.bb197 ], [ 36864, %sw.bb196 ], [ 36481, %sw.bb195 ], [ 36100, %sw.bb194 ], [ 35721, %sw.bb193 ], [ 35344, %sw.bb192 ], [ 34969, %sw.bb191 ], [ 34596, %sw.bb190 ], [ 34225, %sw.bb189 ], [ 33856, %sw.bb188 ], [ 33489, %sw.bb187 ], [ 33124, %sw.bb186 ], [ 32761, %sw.bb185 ], [ 32400, %sw.bb184 ], [ 32041, %sw.bb183 ], [ 31684, %sw.bb182 ], [ 31329, %sw.bb181 ], [ 30976, %sw.bb180 ], [ 30625, %sw.bb179 ], [ 30276, %sw.bb178 ], [ 29929, %sw.bb177 ], [ 29584, %sw.bb176 ], [ 29241, %sw.bb175 ], [ 28900, %sw.bb174 ], [ 28561, %sw.bb173 ], [ 28224, %sw.bb172 ], [ 27889, %sw.bb171 ], [ 27556, %sw.bb170 ], [ 27225, %sw.bb169 ], [ 26896, %sw.bb168 ], [ 26569, %sw.bb167 ], [ 26244, %sw.bb166 ], [ 25921, %sw.bb165 ], [ 25600, %sw.bb164 ], [ 25281, %sw.bb163 ], [ 24964, %sw.bb162 ], [ 24649, %sw.bb161 ], [ 24336, %sw.bb160 ], [ 24025, %sw.bb159 ], [ 23716, %sw.bb158 ], [ 23409, %sw.bb157 ], [ 23104, %sw.bb156 ], [ 22801, %sw.bb155 ], [ 22500, %sw.bb154 ], [ 22201, %sw.bb153 ], [ 21904, %sw.bb152 ], [ 21609, %sw.bb151 ], [ 21316, %sw.bb150 ], [ 21025, %sw.bb149 ], [ 20736, %sw.bb148 ], [ 20449, %sw.bb147 ], [ 20164, %sw.bb146 ], [ 19881, %sw.bb145 ], [ 19600, %sw.bb144 ], [ 19321, %sw.bb143 ], [ 19044, %sw.bb142 ], [ 18769, %sw.bb141 ], [ 18496, %sw.bb140 ], [ 18225, %sw.bb139 ], [ 17956, %sw.bb138 ], [ 17689, %sw.bb137 ], [ 17424, %sw.bb136 ], [ 17161, %sw.bb135 ], [ 16900, %sw.bb134 ], [ 16641, %sw.bb133 ], [ 16384, %sw.bb132 ], [ 16129, %sw.bb131 ], [ 15876, %sw.bb130 ], [ 15625, %sw.bb129 ], [ 15376, %sw.bb128 ], [ 15129, %sw.bb127 ], [ 14884, %sw.bb126 ], [ 14641, %sw.bb125 ], [ 14400, %sw.bb124 ], [ 14161, %sw.bb123 ], [ 13924, %sw.bb122 ], [ 13689, %sw.bb121 ], [ 13456, %sw.bb120 ], [ 13225, %sw.bb119 ], [ 12996, %sw.bb118 ], [ 12769, %sw.bb117 ], [ 12544, %sw.bb116 ], [ 12321, %sw.bb115 ], [ 12100, %sw.bb114 ], [ 11881, %sw.bb113 ], [ 11664, %sw.bb112 ], [ 11449, %sw.bb111 ], [ 11236, %sw.bb110 ], [ 11025, %sw.bb109 ], [ 10816, %sw.bb108 ], [ 10609, %sw.bb107 ], [ 10404, %sw.bb106 ], [ 10201, %sw.bb105 ], [ 10000, %sw.bb104 ], [ 9801, %sw.bb103 ], [ 9604, %sw.bb102 ], [ 9409, %sw.bb101 ], [ 9216, %sw.bb100 ], [ 9025, %sw.bb99 ], [ 8836, %sw.bb98 ], [ 8649, %sw.bb97 ], [ 8464, %sw.bb96 ], [ 8281, %sw.bb95 ], [ 8100, %sw.bb94 ], [ 7921, %sw.bb93 ], [ 7744, %sw.bb92 ], [ 7569, %sw.bb91 ], [ 7396, %sw.bb90 ], [ 7225, %sw.bb89 ], [ 7056, %sw.bb88 ], [ 6889, %sw.bb87 ], [ 6724, %sw.bb86 ], [ 6561, %sw.bb85 ], [ 6400, %sw.bb84 ], [ 6241, %sw.bb83 ], [ 6084, %sw.bb82 ], [ 5929, %sw.bb81 ], [ 5776, %sw.bb80 ], [ 5625, %sw.bb79 ], [ 5476, %sw.bb78 ], [ 5329, %sw.bb77 ], [ 5184, %sw.bb76 ], [ 5112, %sw.bb74 ], [ 4900, %sw.bb73 ], [ 4761, %sw.bb72 ], [ 4624, %sw.bb71 ], [ 4489, %sw.bb70 ], [ 4356, %sw.bb69 ], [ 4225, %sw.bb68 ], [ 4096, %sw.bb67 ], [ 3969, %sw.bb66 ], [ 3844, %sw.bb65 ], [ 3721, %sw.bb64 ], [ 3600, %sw.bb63 ], [ 3481, %sw.bb62 ], [ 3364, %sw.bb61 ], [ 3249, %sw.bb60 ], [ 3136, %sw.bb59 ], [ 3025, %sw.bb58 ], [ 2970, %sw.bb56 ], [ 2809, %sw.bb55 ], [ 2704, %sw.bb54 ], [ 2601, %sw.bb53 ], [ 2500, %sw.bb52 ], [ 2401, %sw.bb51 ], [ 2304, %sw.bb50 ], [ 2209, %sw.bb49 ], [ 2116, %sw.bb48 ], [ 2025, %sw.bb47 ], [ 1980, %sw.bb45 ], [ 1849, %sw.bb44 ], [ 1764, %sw.bb43 ], [ 1681, %sw.bb42 ], [ 1600, %sw.bb41 ], [ 1521, %sw.bb40 ], [ 1444, %sw.bb39 ], [ 1369, %sw.bb38 ], [ 1296, %sw.bb37 ], [ 1260, %sw.bb35 ], [ 1156, %sw.bb34 ], [ 1089, %sw.bb33 ], [ 1024, %sw.bb32 ], [ 961, %sw.bb31 ], [ 900, %sw.bb30 ], [ 841, %sw.bb29 ], [ 784, %sw.bb28 ], [ 729, %sw.bb27 ], [ 676, %sw.bb26 ], [ 625, %sw.bb25 ], [ 576, %sw.bb24 ], [ 529, %sw.bb23 ], [ 484, %sw.bb22 ], [ 441, %sw.bb21 ], [ 400, %sw.bb20 ], [ 361, %sw.bb19 ], [ 342, %sw.bb18 ], [ 289, %sw.bb17 ], [ 256, %sw.bb16 ], [ 225, %sw.bb15 ], [ 196, %sw.bb14 ], [ 169, %sw.bb13 ], [ 144, %sw.bb12 ], [ 121, %sw.bb11 ], [ 100, %sw.bb10 ], [ 81, %sw.bb9 ], [ 64, %sw.bb8 ], [ 49, %sw.bb7 ], [ 36, %sw.bb6 ], [ 25, %sw.bb5 ], [ 16, %sw.bb4 ], [ 9, %sw.bb3 ], [ 4, %sw.bb2 ], [ 1, %sw.bb1 ], [ 39601, %sw.bb203 ], [ 0, %if.end ]
+ ret i32 %retval.0
+}
+
+define i32 @cprop(i32 %x) {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 1, label %return
+ i32 2, label %sw.bb1
+ i32 3, label %sw.bb2
+ i32 4, label %sw.bb2
+ i32 5, label %sw.bb2
+ i32 6, label %sw.bb3
+ i32 7, label %sw.bb3
+ ]
+
+sw.bb1: br label %return
+
+sw.bb2:
+ %and = and i32 %x, 1
+ %tobool = icmp ne i32 %and, 0
+ %cond = select i1 %tobool, i32 -123, i32 456
+ %sub = sub nsw i32 %x, %cond
+ br label %return
+
+sw.bb3:
+ %trunc = trunc i32 %x to i8
+ %sext = sext i8 %trunc to i32
+ br label %return
+
+sw.default:
+ br label %return
+
+return:
+ %retval.0 = phi i32 [ 123, %sw.default ], [ %sext, %sw.bb3 ], [ %sub, %sw.bb2 ], [ 42, %sw.bb1 ], [ 5, %entry ]
+ ret i32 %retval.0
+
+; CHECK: @cprop
+; CHECK: switch.lookup:
+; CHECK: %switch.gep = getelementptr inbounds [7 x i32]* @switch.table5, i32 0, i32 %switch.tableidx
+}
+
+define i32 @unreachable(i32 %x) {
+entry:
+ switch i32 %x, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb
+ i32 2, label %sw.bb
+ i32 3, label %sw.bb1
+ i32 4, label %sw.bb2
+ i32 5, label %sw.bb3
+ i32 6, label %sw.bb3
+ i32 7, label %sw.bb3
+ i32 8, label %sw.bb3
+ ]
+
+sw.bb: br label %return
+sw.bb1: unreachable
+sw.bb2: br label %return
+sw.bb3: br label %return
+sw.default: unreachable
+
+return:
+ %retval.0 = phi i32 [ 1, %sw.bb3 ], [ -1, %sw.bb2 ], [ 0, %sw.bb ]
+ ret i32 %retval.0
+
+; CHECK: @unreachable
+; CHECK: switch.lookup:
+; CHECK: getelementptr inbounds [5 x i32]* @switch.table6, i32 0, i32 %switch.tableidx
+}
diff --git a/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll b/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
index 65d888ea01e1..028fb0745631 100644
--- a/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
+++ b/test/Transforms/SimplifyCFG/phi-undef-loadstore.ll
@@ -85,3 +85,31 @@ if.end7: ; preds = %if.else, %if.then4,
; CHECK: if.end7:
; CHECK: phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
}
+
+define i32 @test4(i32* %a, i32 %b, i32* %c, i32 %d) nounwind {
+entry:
+ %tobool = icmp eq i32 %b, 0
+ br i1 %tobool, label %if.else, label %if.then
+
+if.then: ; preds = %entry
+ tail call void @bar() nounwind
+ br label %if.end7
+
+if.else: ; preds = %entry
+ %tobool3 = icmp eq i32 %d, 0
+ br i1 %tobool3, label %if.end7, label %if.then4
+
+if.then4: ; preds = %if.else
+ tail call void @bar() nounwind
+ br label %if.end7
+
+if.end7: ; preds = %if.else, %if.then4, %if.then
+ %x.0 = phi i32* [ %a, %if.then ], [ null, %if.then4 ], [ null, %if.else ]
+ %gep = getelementptr i32* %x.0, i32 10
+ %tmp9 = load i32* %gep
+ %tmp10 = or i32 %tmp9, 1
+ store i32 %tmp10, i32* %gep
+ ret i32 %tmp9
+; CHECK: @test4
+; CHECK-NOT: phi
+}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
new file mode 100644
index 000000000000..53d5448372da
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights-partial.ll
@@ -0,0 +1,37 @@
+; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
+
+; This test case was written to trigger an incorrect assert statement in
+; -simplifycfg. Thus we don't actually want to check the output, just that
+; -simplifycfg ran successfully. Thus we only check that the function still
+; exists, and that it still calls foo().
+;
+; NOTE: There are some obviously dead blocks and missing branch weight
+; metadata. Both of these features were key to triggering the assert.
+; Additionally, the not-taken weight of the branch with a weight had to
+; be 0 to trigger the assert.
+
+declare void @foo() nounwind uwtable
+
+define void @func(i32 %A) nounwind uwtable {
+; CHECK: define void @func
+entry:
+ %cmp11 = icmp eq i32 %A, 1
+ br i1 %cmp11, label %if.then, label %if.else, !prof !0
+
+if.then:
+ call void @foo()
+; CHECK: call void @foo()
+ br label %if.else
+
+if.else:
+ %cmp17 = icmp eq i32 %A, 2
+ br i1 %cmp17, label %if.then2, label %if.end
+
+if.then2:
+ br label %if.end
+
+if.end:
+ ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 1, i32 0}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll b/test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll
new file mode 100644
index 000000000000..941f5ad9d5b6
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights-switch-create.ll
@@ -0,0 +1,140 @@
+; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
+
+declare void @func2(i32)
+declare void @func4(i32)
+declare void @func6(i32)
+declare void @func8(i32)
+
+;; test1 - create a switch with case 2 and case 4 from two branches: N == 2
+;; and N == 4.
+define void @test1(i32 %N) nounwind uwtable {
+entry:
+ %cmp = icmp eq i32 %N, 2
+ br i1 %cmp, label %if.then, label %if.else, !prof !0
+; CHECK: test1
+; CHECK: switch i32 %N
+; CHECK: ], !prof !0
+
+if.then:
+ call void @func2(i32 %N) nounwind
+ br label %if.end9
+
+if.else:
+ %cmp2 = icmp eq i32 %N, 4
+ br i1 %cmp2, label %if.then7, label %if.else8, !prof !1
+
+if.then7:
+ call void @func4(i32 %N) nounwind
+ br label %if.end
+
+if.else8:
+ call void @func8(i32 %N) nounwind
+ br label %if.end
+
+if.end:
+ br label %if.end9
+
+if.end9:
+ ret void
+}
+
+;; test2 - Merge two switches where PredDefault == BB.
+define void @test2(i32 %M, i32 %N) nounwind uwtable {
+entry:
+ %cmp = icmp sgt i32 %M, 2
+ br i1 %cmp, label %sw1, label %sw2
+
+sw1:
+ switch i32 %N, label %sw2 [
+ i32 2, label %sw.bb
+ i32 3, label %sw.bb1
+ ], !prof !2
+; CHECK: test2
+; CHECK: switch i32 %N, label %sw.epilog
+; CHECK: i32 2, label %sw.bb
+; CHECK: i32 3, label %sw.bb1
+; CHECK: i32 4, label %sw.bb5
+; CHECK: ], !prof !1
+
+sw.bb:
+ call void @func2(i32 %N) nounwind
+ br label %sw.epilog
+
+sw.bb1:
+ call void @func4(i32 %N) nounwind
+ br label %sw.epilog
+
+sw2:
+;; Here "case 2" is invalidated if control is transferred through default case
+;; of the first switch.
+ switch i32 %N, label %sw.epilog [
+ i32 2, label %sw.bb4
+ i32 4, label %sw.bb5
+ ], !prof !3
+
+sw.bb4:
+ call void @func6(i32 %N) nounwind
+ br label %sw.epilog
+
+sw.bb5:
+ call void @func8(i32 %N) nounwind
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+}
+
+;; test3 - Merge two switches where PredDefault != BB.
+define void @test3(i32 %M, i32 %N) nounwind uwtable {
+entry:
+ %cmp = icmp sgt i32 %M, 2
+ br i1 %cmp, label %sw1, label %sw2
+
+sw1:
+ switch i32 %N, label %sw.bb [
+ i32 2, label %sw2
+ i32 3, label %sw2
+ i32 1, label %sw.bb1
+ ], !prof !4
+; CHECK: test3
+; CHECK: switch i32 %N, label %sw.bb
+; CHECK: i32 1, label %sw.bb1
+; CHECK: i32 3, label %sw.bb4
+; CHECK: i32 2, label %sw.epilog
+; CHECK: ], !prof !3
+
+sw.bb:
+ call void @func2(i32 %N) nounwind
+ br label %sw.epilog
+
+sw.bb1:
+ call void @func4(i32 %N) nounwind
+ br label %sw.epilog
+
+sw2:
+ switch i32 %N, label %sw.epilog [
+ i32 3, label %sw.bb4
+ i32 4, label %sw.bb5
+ ], !prof !5
+
+sw.bb4:
+ call void @func6(i32 %N) nounwind
+ br label %sw.epilog
+
+sw.bb5:
+ call void @func8(i32 %N) nounwind
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 64, i32 4}
+!1 = metadata !{metadata !"branch_weights", i32 4, i32 64}
+; CHECK: !0 = metadata !{metadata !"branch_weights", i32 256, i32 4352, i32 16}
+!2 = metadata !{metadata !"branch_weights", i32 4, i32 4, i32 8}
+!3 = metadata !{metadata !"branch_weights", i32 8, i32 8, i32 4}
+; CHECK: !1 = metadata !{metadata !"branch_weights", i32 32, i32 48, i32 96, i32 16}
+!4 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 4, i32 3}
+!5 = metadata !{metadata !"branch_weights", i32 17, i32 13, i32 9}
+; CHECK: !3 = metadata !{metadata !"branch_weights", i32 7, i32 3, i32 4, i32 6}
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index c7917857ee60..beef52700820 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -79,10 +79,238 @@ Z:
ret void
}
+;; test5 - The case where it jumps to the default target will be removed.
+define void @test5(i32 %M, i32 %N) nounwind uwtable {
+entry:
+ switch i32 %N, label %sw2 [
+ i32 1, label %sw2
+ i32 2, label %sw.bb
+ i32 3, label %sw.bb1
+ ], !prof !3
+; CHECK: test5
+; CHECK: switch i32 %N, label %sw2 [
+; CHECK: i32 3, label %sw.bb1
+; CHECK: i32 2, label %sw.bb
+; CHECK: ], !prof !2
+
+sw.bb:
+ call void @helper(i32 0)
+ br label %sw.epilog
+
+sw.bb1:
+ call void @helper(i32 1)
+ br label %sw.epilog
+
+sw2:
+ call void @helper(i32 2)
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+}
+
+;; test6 - Some cases of the second switch are pruned during optimization.
+;; Then the second switch will be converted to a branch, finally, the first
+;; switch and the branch will be merged into a single switch.
+define void @test6(i32 %M, i32 %N) nounwind uwtable {
+entry:
+ switch i32 %N, label %sw2 [
+ i32 1, label %sw2
+ i32 2, label %sw.bb
+ i32 3, label %sw.bb1
+ ], !prof !4
+; CHECK: test6
+; CHECK: switch i32 %N, label %sw.epilog
+; CHECK: i32 3, label %sw.bb1
+; CHECK: i32 2, label %sw.bb
+; CHECK: i32 4, label %sw.bb5
+; CHECK: ], !prof !3
+
+sw.bb:
+ call void @helper(i32 0)
+ br label %sw.epilog
+
+sw.bb1:
+ call void @helper(i32 1)
+ br label %sw.epilog
+
+sw2:
+;; Here "case 2" is invalidated since the default case of the first switch
+;; does not include "case 2".
+ switch i32 %N, label %sw.epilog [
+ i32 2, label %sw.bb4
+ i32 4, label %sw.bb5
+ ], !prof !5
+
+sw.bb4:
+ call void @helper(i32 2)
+ br label %sw.epilog
+
+sw.bb5:
+ call void @helper(i32 3)
+ br label %sw.epilog
+
+sw.epilog:
+ ret void
+}
+
+;; This test is based on test1 but swapped the targets of the second branch.
+define void @test1_swap(i1 %a, i1 %b) {
+; CHECK: @test1_swap
+entry:
+ br i1 %a, label %Y, label %X, !prof !0
+; CHECK: br i1 %or.cond, label %Y, label %Z, !prof !4
+
+X:
+ %c = or i1 %b, false
+ br i1 %c, label %Y, label %Z, !prof !1
+
+Y:
+ call void @helper(i32 0)
+ ret void
+
+Z:
+ call void @helper(i32 1)
+ ret void
+}
+
+define void @test7(i1 %a, i1 %b) {
+; CHECK: @test7
+entry:
+ %c = or i1 %b, false
+ br i1 %a, label %Y, label %X, !prof !0
+; CHECK: br i1 %brmerge, label %Y, label %Z, !prof !5
+
+X:
+ br i1 %c, label %Y, label %Z, !prof !6
+
+Y:
+ call void @helper(i32 0)
+ ret void
+
+Z:
+ call void @helper(i32 1)
+ ret void
+}
+
+; Test basic folding to a conditional branch.
+define void @test8(i64 %x, i64 %y) nounwind {
+; CHECK: @test8
+entry:
+ %lt = icmp slt i64 %x, %y
+; CHECK: br i1 %lt, label %a, label %b, !prof !6
+ %qux = select i1 %lt, i32 0, i32 2
+ switch i32 %qux, label %bees [
+ i32 0, label %a
+ i32 1, label %b
+ i32 2, label %b
+ ], !prof !7
+a:
+ call void @helper(i32 0) nounwind
+ ret void
+b:
+ call void @helper(i32 1) nounwind
+ ret void
+bees:
+ call void @helper(i32 2) nounwind
+ ret void
+}
+
+; Test edge splitting when the default target has icmp and unconditinal
+; branch
+define i1 @test9(i32 %x, i32 %y) nounwind {
+; CHECK: @test9
+entry:
+ switch i32 %x, label %bees [
+ i32 0, label %a
+ i32 1, label %end
+ i32 2, label %end
+ ], !prof !7
+; CHECK: switch i32 %x, label %bees [
+; CHECK: i32 0, label %a
+; CHECK: i32 1, label %end
+; CHECK: i32 2, label %end
+; CHECK: i32 92, label %end
+; CHECK: ], !prof !7
+
+a:
+ call void @helper(i32 0) nounwind
+ %reta = icmp slt i32 %x, %y
+ ret i1 %reta
+
+bees:
+ %tmp = icmp eq i32 %x, 92
+ br label %end
+
+end:
+; CHECK: end:
+; CHECK: %ret = phi i1 [ true, %entry ], [ false, %bees ], [ true, %entry ], [ true, %entry ]
+ %ret = phi i1 [ true, %entry ], [%tmp, %bees], [true, %entry]
+ call void @helper(i32 2) nounwind
+ ret i1 %ret
+}
+
+define void @test10(i32 %x) nounwind readnone ssp noredzone {
+entry:
+ switch i32 %x, label %lor.rhs [
+ i32 2, label %lor.end
+ i32 1, label %lor.end
+ i32 3, label %lor.end
+ ], !prof !7
+
+lor.rhs:
+ call void @helper(i32 1) nounwind
+ ret void
+
+lor.end:
+ call void @helper(i32 0) nounwind
+ ret void
+
+; CHECK: test10
+; CHECK: %x.off = add i32 %x, -1
+; CHECK: %switch = icmp ult i32 %x.off, 3
+; CHECK: br i1 %switch, label %lor.end, label %lor.rhs, !prof !8
+}
+
+; Remove dead cases from the switch.
+define void @test11(i32 %x) nounwind {
+ %i = shl i32 %x, 1
+ switch i32 %i, label %a [
+ i32 21, label %b
+ i32 24, label %c
+ ], !prof !8
+; CHECK: %cond = icmp eq i32 %i, 24
+; CHECK: br i1 %cond, label %c, label %a, !prof !9
+
+a:
+ call void @helper(i32 0) nounwind
+ ret void
+b:
+ call void @helper(i32 1) nounwind
+ ret void
+c:
+ call void @helper(i32 2) nounwind
+ ret void
+}
+
!0 = metadata !{metadata !"branch_weights", i32 3, i32 5}
!1 = metadata !{metadata !"branch_weights", i32 1, i32 1}
!2 = metadata !{metadata !"branch_weights", i32 1, i32 2}
+!3 = metadata !{metadata !"branch_weights", i32 4, i32 3, i32 2, i32 1}
+!4 = metadata !{metadata !"branch_weights", i32 4, i32 3, i32 2, i32 1}
+!5 = metadata !{metadata !"branch_weights", i32 7, i32 6, i32 5}
+!6 = metadata !{metadata !"branch_weights", i32 1, i32 3}
+!7 = metadata !{metadata !"branch_weights", i32 33, i32 9, i32 8, i32 7}
+!8 = metadata !{metadata !"branch_weights", i32 33, i32 9, i32 8}
; CHECK: !0 = metadata !{metadata !"branch_weights", i32 5, i32 11}
; CHECK: !1 = metadata !{metadata !"branch_weights", i32 1, i32 5}
-; CHECK-NOT: !2
+; CHECK: !2 = metadata !{metadata !"branch_weights", i32 7, i32 1, i32 2}
+; CHECK: !3 = metadata !{metadata !"branch_weights", i32 49, i32 12, i32 24, i32 35}
+; CHECK: !4 = metadata !{metadata !"branch_weights", i32 11, i32 5}
+; CHECK: !5 = metadata !{metadata !"branch_weights", i32 17, i32 15}
+; CHECK: !6 = metadata !{metadata !"branch_weights", i32 9, i32 7}
+; CHECK: !7 = metadata !{metadata !"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
+; CHECK: !8 = metadata !{metadata !"branch_weights", i32 24, i32 33}
+; CHECK: !9 = metadata !{metadata !"branch_weights", i32 8, i32 33}
+; CHECK-NOT: !9
diff --git a/test/Transforms/SimplifyCFG/sink-common-code.ll b/test/Transforms/SimplifyCFG/sink-common-code.ll
new file mode 100644
index 000000000000..28d727938288
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/sink-common-code.ll
@@ -0,0 +1,53 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+define zeroext i1 @test1(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK: test1
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp ule i32 %add, %blksA
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
+
+define zeroext i1 @test2(i1 zeroext %flag, i32 %blksA, i32 %blksB, i32 %nblks) {
+entry:
+ br i1 %flag, label %if.then, label %if.else
+
+; CHECK: test2
+; CHECK: add
+; CHECK: select
+; CHECK: icmp
+; CHECK-NOT: br
+if.then:
+ %cmp = icmp uge i32 %blksA, %nblks
+ %frombool1 = zext i1 %cmp to i8
+ br label %if.end
+
+if.else:
+ %add = add i32 %nblks, %blksB
+ %cmp2 = icmp uge i32 %blksA, %add
+ %frombool3 = zext i1 %cmp2 to i8
+ br label %if.end
+
+if.end:
+ %obeys.0 = phi i8 [ %frombool1, %if.then ], [ %frombool3, %if.else ]
+ %tobool4 = icmp ne i8 %obeys.0, 0
+ ret i1 %tobool4
+}
diff --git a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll b/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
deleted file mode 100644
index 2717228f7ee1..000000000000
--- a/test/Transforms/SimplifyLibCalls/2009-02-12-StrTo.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; Test that we add nocapture to the declaration, and to the second call only.
-
-; CHECK: declare float @strtol(i8*, i8** nocapture, i32) nounwind
-declare float @strtol(i8* %s, i8** %endptr, i32 %base)
-
-define void @foo(i8* %x, i8** %endptr) {
-; CHECK: call float @strtol(i8* %x, i8** %endptr, i32 10)
- call float @strtol(i8* %x, i8** %endptr, i32 10)
-; CHECK: %2 = call float @strtol(i8* nocapture %x, i8** null, i32 10)
- call float @strtol(i8* %x, i8** null, i32 10)
- ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/FFS.ll b/test/Transforms/SimplifyLibCalls/FFS.ll
index e38d78349d43..6aecbeacd7e6 100644
--- a/test/Transforms/SimplifyLibCalls/FFS.ll
+++ b/test/Transforms/SimplifyLibCalls/FFS.ll
@@ -1,6 +1,7 @@
-; Test that the ToAsciiOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep "call.*@ffs"
+; Test that FFSOpt works correctly
+; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
+
+; CHECK-NOT: call{{.*}}@ffs
@non_const = external global i32 ; <i32*> [#uses=1]
@@ -34,3 +35,11 @@ define i32 @a(i64) nounwind {
%2 = call i32 @ffsll(i64 %0) ; <i32> [#uses=1]
ret i32 %2
}
+
+; PR13028
+define i32 @b() nounwind {
+ %ffs = call i32 @ffsll(i64 0)
+ ret i32 %ffs
+; CHECK: @b
+; CHECK-NEXT: ret i32 0
+}
diff --git a/test/Transforms/SimplifyLibCalls/StpCpy.ll b/test/Transforms/SimplifyLibCalls/StpCpy.ll
deleted file mode 100644
index 914b0955bc90..000000000000
--- a/test/Transforms/SimplifyLibCalls/StpCpy.ll
+++ /dev/null
@@ -1,43 +0,0 @@
-; Test that the StpCpyOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-
-@hello = constant [6 x i8] c"hello\00"
-
-declare i8* @stpcpy(i8*, i8*)
-
-declare i8* @__stpcpy_chk(i8*, i8*, i32) nounwind
-
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
-define i32 @t1() {
-; CHECK: @t1
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %rslt1 = call i8* @stpcpy( i8* %arg1, i8* %arg2 )
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32
- ret i32 0
-}
-
-define i32 @t2() {
-; CHECK: @t2
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false)
- %rslt1 = call i8* @__stpcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1)
-; CHECK: @__memcpy_chk
- ret i32 0
-}
-
-define i8* @t3(i8* %arg) {
-; CHECK: @t3
- %stpcpy = tail call i8* @stpcpy(i8* %arg, i8* %arg)
-; CHECK: [[LEN:%[a-z]+]] = call i32 @strlen(i8* %arg)
-; CHECK-NEXT: getelementptr inbounds i8* %arg, i32 [[LEN]]
- ret i8* %stpcpy
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrCat.ll b/test/Transforms/SimplifyLibCalls/StrCat.ll
deleted file mode 100644
index 3ea691a3cfbe..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrCat.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; Test that the StrCatOptimizer works correctly
-; PR3661
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep "call.*strcat"
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: grep "puts.*%arg1"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00" ; <[7 x i8]*> [#uses=1]
-
-declare i8* @strcat(i8*, i8*)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
- %target = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=1]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 ; <i8*> [#uses=2]
- store i8 0, i8* %arg1
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt1 = call i8* @strcat( i8* %arg1, i8* %arg2 ) ; <i8*> [#uses=1]
- %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt2 = call i8* @strcat( i8* %rslt1, i8* %arg3 ) ; <i8*> [#uses=1]
- %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt3 = call i8* @strcat( i8* %rslt2, i8* %arg4 ) ; <i8*> [#uses=1]
- call i32 @puts( i8* %rslt3 ) ; <i32>:1 [#uses=0]
- ret i32 0
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/StrChr.ll b/test/Transforms/SimplifyLibCalls/StrChr.ll
deleted file mode 100644
index eaabeb2feb8f..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrChr.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; Test that the StrChrOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [14 x i8] c"hello world\5Cn\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i8* @strchr(i8*, i32)
-
-define i32 @foo(i32 %index) {
- %hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
- %world = call i8* @strchr(i8* %hello_p, i32 119)
-; CHECK: getelementptr i8* %hello_p, i64 6
- %ignore = call i8* @strchr(i8* %null_p, i32 119)
-; CHECK-NOT: call i8* strchr
- %null = call i8* @strchr(i8* %hello_p, i32 0)
-; CHECK: getelementptr i8* %hello_p, i64 13
- %result = call i8* @strchr(i8* %hello_p, i32 %index)
-; CHECK: call i8* @memchr(i8* %hello_p, i32 %index, i64 14)
- ret i32 %index
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/StrCmp.ll b/test/Transforms/SimplifyLibCalls/StrCmp.ll
deleted file mode 100644
index 60854d76c97a..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrCmp.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; Test that the StrCmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-
-@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=1]
-@hell = constant [5 x i8] c"hell\00" ; <[5 x i8]*> [#uses=1]
-@bell = constant [5 x i8] c"bell\00" ; <[5 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
-
-declare i32 @strcmp(i8*, i8*)
-
-; strcmp("", x) -> -*x
-define i32 @test1(i8* %str) {
- %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i8* %str)
- ret i32 %temp1
- ; CHECK: @test1
- ; CHECK: %strcmpload = load i8* %str
- ; CHECK: %1 = zext i8 %strcmpload to i32
- ; CHECK: %temp1 = sub i32 0, %1
- ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, "") -> *x
-define i32 @test2(i8* %str) {
- %temp1 = call i32 @strcmp(i8* %str, i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0))
- ret i32 %temp1
- ; CHECK: @test2
- ; CHECK: %strcmpload = load i8* %str
- ; CHECK: %temp1 = zext i8 %strcmpload to i32
- ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, y) -> cnst
-define i32 @test3() {
- %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0))
- ret i32 %temp1
- ; CHECK: @test3
- ; CHECK: ret i32 -1
-}
-define i32 @test4() {
- %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0))
- ret i32 %temp1
- ; CHECK: @test4
- ; CHECK: ret i32 1
-}
-
-; strcmp(x, y) -> memcmp(x, y, <known length>)
-; (This transform is rather difficult to trigger in a useful manner)
-define i32 @test5(i1 %b) {
- %sel = select i1 %b, i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([5 x i8]* @bell, i32 0, i32 0)
- %temp1 = call i32 @strcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %sel)
- ret i32 %temp1
- ; CHECK: @test5
- ; CHECK: %memcmp = call i32 @memcmp(i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i8* %sel, i32 5)
- ; CHECK: ret i32 %memcmp
-}
-
-; strcmp(x,x) -> 0
-define i32 @test6(i8* %str) {
- %temp1 = call i32 @strcmp(i8* %str, i8* %str)
- ret i32 %temp1
- ; CHECK: @test6
- ; CHECK: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrCpy.ll b/test/Transforms/SimplifyLibCalls/StrCpy.ll
deleted file mode 100644
index 83406ff8f868..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrCpy.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; Test that the StrCpyOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-
-@hello = constant [6 x i8] c"hello\00"
-
-declare i8* @strcpy(i8*, i8*)
-
-declare i8* @__strcpy_chk(i8*, i8*, i32) nounwind
-
-declare i32 @llvm.objectsize.i32(i8*, i1) nounwind readonly
-
-; rdar://6839935
-
-define i32 @t1() {
-; CHECK: @t1
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %rslt1 = call i8* @strcpy( i8* %arg1, i8* %arg2 )
-; CHECK: @llvm.memcpy.p0i8.p0i8.i32
- ret i32 0
-}
-
-define i32 @t2() {
-; CHECK: @t2
- %target = alloca [1024 x i8]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0
- %tmp1 = call i32 @llvm.objectsize.i32(i8* %arg1, i1 false)
- %rslt1 = call i8* @__strcpy_chk(i8* %arg1, i8* %arg2, i32 %tmp1)
-; CHECK: @__memcpy_chk
- ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrLen.ll b/test/Transforms/SimplifyLibCalls/StrLen.ll
deleted file mode 100644
index 4a20bbd2ce81..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrLen.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; Test that the StrCatOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep "call.*strlen"
-
-target datalayout = "e-p:32:32"
-@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=3]
-@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=3]
-@null_hello = constant [7 x i8] c"\00hello\00" ; <[7 x i8]*> [#uses=1]
-@nullstring = constant i8 0
-
-declare i32 @strlen(i8*)
-
-define i32 @test1() {
- %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %hello_l = call i32 @strlen( i8* %hello_p ) ; <i32> [#uses=1]
- ret i32 %hello_l
-}
-
-define i32 @test2() {
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1]
- %null_l = call i32 @strlen( i8* %null_p ) ; <i32> [#uses=1]
- ret i32 %null_l
-}
-
-define i32 @test3() {
- %null_hello_p = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %null_hello_l = call i32 @strlen( i8* %null_hello_p ) ; <i32> [#uses=1]
- ret i32 %null_hello_l
-}
-
-define i1 @test4() {
- %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %hello_l = call i32 @strlen( i8* %hello_p ) ; <i32> [#uses=1]
- %eq_hello = icmp eq i32 %hello_l, 0 ; <i1> [#uses=1]
- ret i1 %eq_hello
-}
-
-define i1 @test5() {
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1]
- %null_l = call i32 @strlen( i8* %null_p ) ; <i32> [#uses=1]
- %eq_null = icmp eq i32 %null_l, 0 ; <i1> [#uses=1]
- ret i1 %eq_null
-}
-
-define i1 @test6() {
- %hello_p = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %hello_l = call i32 @strlen( i8* %hello_p ) ; <i32> [#uses=1]
- %ne_hello = icmp ne i32 %hello_l, 0 ; <i1> [#uses=1]
- ret i1 %ne_hello
-}
-
-define i1 @test7() {
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1]
- %null_l = call i32 @strlen( i8* %null_p ) ; <i32> [#uses=1]
- %ne_null = icmp ne i32 %null_l, 0 ; <i1> [#uses=1]
- ret i1 %ne_null
-}
-
-define i32 @test8() {
- %len = tail call i32 @strlen(i8* @nullstring) nounwind
- ret i32 %len
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrNCat.ll b/test/Transforms/SimplifyLibCalls/StrNCat.ll
deleted file mode 100644
index 073792b96a1b..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrNCat.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; Test that the StrNCatOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep "call.*strncat"
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: grep "puts.*%arg1"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00" ; <[7 x i8]*> [#uses=1]
-
-declare i8* @strncat(i8*, i8*, i32)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
- %target = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=1]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 ; <i8*> [#uses=2]
- store i8 0, i8* %arg1
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt1 = call i8* @strncat( i8* %arg1, i8* %arg2, i32 6 ) ; <i8*> [#uses=1]
- %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt2 = call i8* @strncat( i8* %rslt1, i8* %arg3, i32 42 ) ; <i8*> [#uses=1]
- %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt3 = call i8* @strncat( i8* %rslt2, i8* %arg4, i32 42 ) ; <i8*> [#uses=1]
- call i32 @puts( i8* %rslt3 ) ; <i32>:1 [#uses=0]
- ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrNCmp.ll b/test/Transforms/SimplifyLibCalls/StrNCmp.ll
deleted file mode 100644
index 0b2a501a3c8a..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrNCmp.ll
+++ /dev/null
@@ -1,78 +0,0 @@
-; Test that the StrCmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-
-@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=1]
-@hell = constant [5 x i8] c"hell\00" ; <[5 x i8]*> [#uses=1]
-@bell = constant [5 x i8] c"bell\00" ; <[5 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
-
-declare i32 @strncmp(i8*, i8*, i32)
-
-; strcmp("", x) -> -*x
-define i32 @test1(i8* %str) {
- %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i8* %str, i32 10)
- ret i32 %temp1
- ; CHECK: @test1
- ; CHECK: %strcmpload = load i8* %str
- ; CHECK: %1 = zext i8 %strcmpload to i32
- ; CHECK: %temp1 = sub i32 0, %1
- ; CHECK: ret i32 %temp1
-}
-
-; strcmp(x, "") -> *x
-define i32 @test2(i8* %str) {
- %temp1 = call i32 @strncmp(i8* %str, i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i32 10)
- ret i32 %temp1
- ; CHECK: @test2
- ; CHECK: %strcmpload = load i8* %str
- ; CHECK: %temp1 = zext i8 %strcmpload to i32
- ; CHECK: ret i32 %temp1
-}
-
-; strncmp(x, y, n) -> cnst
-define i32 @test3() {
- %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i32 10)
- ret i32 %temp1
- ; CHECK: @test3
- ; CHECK: ret i32 -1
-}
-define i32 @test4() {
- %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([1 x i8]* @null, i32 0, i32 0), i32 10)
- ret i32 %temp1
- ; CHECK: @test4
- ; CHECK: ret i32 1
-}
-define i32 @test5() {
- %temp1 = call i32 @strncmp(i8* getelementptr inbounds ([5 x i8]* @hell, i32 0, i32 0), i8* getelementptr inbounds ([6 x i8]* @hello, i32 0, i32 0), i32 4)
- ret i32 %temp1
- ; CHECK: @test5
- ; CHECK: ret i32 0
-}
-
-; strncmp(x,y,1) -> memcmp(x,y,1)
-define i32 @test6(i8* %str1, i8* %str2) {
- %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 1)
- ret i32 %temp1
- ; CHECK: @test6
- ; CHECK: load i8*
- ; CHECK: load i8*
- ; CHECK: sub i32
-}
-
-; strncmp(x,y,0) -> 0
-define i32 @test7(i8* %str1, i8* %str2) {
- %temp1 = call i32 @strncmp(i8* %str1, i8* %str2, i32 0)
- ret i32 %temp1
- ; CHECK: @test7
- ; CHECK: ret i32 0
-}
-
-; strncmp(x,x,n) -> 0
-define i32 @test8(i8* %str, i32 %n) {
- %temp1 = call i32 @strncmp(i8* %str, i8* %str, i32 %n)
- ret i32 %temp1
- ; CHECK: @test8
- ; CHECK: ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrNCpy.ll b/test/Transforms/SimplifyLibCalls/StrNCpy.ll
deleted file mode 100644
index 4e47b31a6afa..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrNCpy.ll
+++ /dev/null
@@ -1,29 +0,0 @@
-; Test that the StrNCpyOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | \
-; RUN: not grep "call.*strncpy"
-
-; This transformation requires the pointer size, as it assumes that size_t is
-; the size of a pointer.
-target datalayout = "-p:64:64:64"
-
-@hello = constant [6 x i8] c"hello\00" ; <[6 x i8]*> [#uses=1]
-@null = constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
-@null_hello = constant [7 x i8] c"\00hello\00" ; <[7 x i8]*> [#uses=1]
-
-declare i8* @strncpy(i8*, i8*, i32)
-
-declare i32 @puts(i8*)
-
-define i32 @main() {
- %target = alloca [1024 x i8] ; <[1024 x i8]*> [#uses=1]
- %arg1 = getelementptr [1024 x i8]* %target, i32 0, i32 0 ; <i8*> [#uses=2]
- store i8 0, i8* %arg1
- %arg2 = getelementptr [6 x i8]* @hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt1 = call i8* @strncpy( i8* %arg1, i8* %arg2, i32 6 ) ; <i8*> [#uses=1]
- %arg3 = getelementptr [1 x i8]* @null, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt2 = call i8* @strncpy( i8* %rslt1, i8* %arg3, i32 42 ) ; <i8*> [#uses=1]
- %arg4 = getelementptr [7 x i8]* @null_hello, i32 0, i32 0 ; <i8*> [#uses=1]
- %rslt3 = call i8* @strncpy( i8* %rslt2, i8* %arg4, i32 42 ) ; <i8*> [#uses=1]
- call i32 @puts( i8* %rslt3 ) ; <i32>:1 [#uses=0]
- ret i32 0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrPBrk.ll b/test/Transforms/SimplifyLibCalls/StrPBrk.ll
deleted file mode 100644
index 29c3b7477b47..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrPBrk.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
-@hello = constant [12 x i8] c"hello world\00"
-@w = constant [2 x i8] c"w\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i8* @strpbrk(i8*, i8*)
-
-define void @test(i8* %s1, i8* %s2) {
- %hello_p = getelementptr [12 x i8]* @hello, i32 0, i32 0
- %w_p = getelementptr [2 x i8]* @w, i32 0, i32 0
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
- %test1 = call i8* @strpbrk(i8* %null_p, i8* %s2)
- %test2 = call i8* @strpbrk(i8* %s1, i8* %null_p)
-; CHECK-NOT: call i8* @strpbrk
- %test3 = call i8* @strpbrk(i8* %s1, i8* %w_p)
-; CHECK: call i8* @strchr(i8* %s1, i32 119)
- %test4 = call i8* @strpbrk(i8* %hello_p, i8* %w_p)
-; CHECK: getelementptr i8* %hello_p, i64 6
- %test5 = call i8* @strpbrk(i8* %s1, i8* %s2)
-; CHECK: call i8* @strpbrk(i8* %s1, i8* %s2)
- ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrRChr.ll b/test/Transforms/SimplifyLibCalls/StrRChr.ll
deleted file mode 100644
index 2259fc0289fb..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrRChr.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; Test that the StrRChrOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
-@hello = constant [14 x i8] c"hello world\5Cn\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i8* @strrchr(i8*, i32)
-
-define void @foo(i8* %bar) {
- %hello_p = getelementptr [14 x i8]* @hello, i32 0, i32 0
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
- %world = call i8* @strrchr(i8* %hello_p, i32 119)
-; CHECK: getelementptr i8* %hello_p, i64 6
- %ignore = call i8* @strrchr(i8* %null_p, i32 119)
-; CHECK-NOT: call i8* strrchr
- %null = call i8* @strrchr(i8* %hello_p, i32 0)
-; CHECK: getelementptr i8* %hello_p, i64 13
- %strchr = call i8* @strrchr(i8* %bar, i32 0)
-; CHECK: call i8* @strchr(i8* %bar, i32 0)
- ret void
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrSpn.ll b/test/Transforms/SimplifyLibCalls/StrSpn.ll
deleted file mode 100644
index 800c19088337..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrSpn.ll
+++ /dev/null
@@ -1,41 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-target datalayout = "-p:64:64:64"
-
-@abcba = constant [6 x i8] c"abcba\00"
-@abc = constant [4 x i8] c"abc\00"
-@null = constant [1 x i8] zeroinitializer
-
-declare i64 @strspn(i8*, i8*)
-
-define i64 @testspn(i8* %s1, i8* %s2) {
- %abcba_p = getelementptr [6 x i8]* @abcba, i32 0, i32 0
- %abc_p = getelementptr [4 x i8]* @abc, i32 0, i32 0
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
- %test1 = call i64 @strspn(i8* %s1, i8* %null_p)
- %test2 = call i64 @strspn(i8* %null_p, i8* %s2)
- %test3 = call i64 @strspn(i8* %abcba_p, i8* %abc_p)
-; CHECK-NOT: call i64 @strspn
- %test4 = call i64 @strspn(i8* %s1, i8* %s2)
-; CHECK: call i64 @strspn(i8* %s1, i8* %s2)
- ret i64 %test3
-; CHECK: ret i64 5
-}
-
-declare i64 @strcspn(i8*, i8*)
-
-define i64 @testcspn(i8* %s1, i8* %s2) {
- %abcba_p = getelementptr [6 x i8]* @abcba, i32 0, i32 0
- %abc_p = getelementptr [4 x i8]* @abc, i32 0, i32 0
- %null_p = getelementptr [1 x i8]* @null, i32 0, i32 0
- %test1 = call i64 @strcspn(i8* %s1, i8* %null_p)
-; CHECK: call i64 @strlen(i8* %s1)
- %test2 = call i64 @strcspn(i8* %null_p, i8* %s2)
- %test3 = call i64 @strcspn(i8* %abcba_p, i8* %abc_p)
-; CHECK-NOT: call i64 @strcspn
- %test4 = call i64 @strcspn(i8* %s1, i8* %s2)
-; CHECK: call i64 @strcspn(i8* %s1, i8* %s2)
- %add0 = add i64 %test1, %test3
-; CHECK: add i64 %{{.+}}, 0
- ret i64 %add0
-}
diff --git a/test/Transforms/SimplifyLibCalls/StrStr.ll b/test/Transforms/SimplifyLibCalls/StrStr.ll
deleted file mode 100644
index eefd2e8006ab..000000000000
--- a/test/Transforms/SimplifyLibCalls/StrStr.ll
+++ /dev/null
@@ -1,60 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; PR5783
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-darwin9.0"
-
-@.str = private constant [1 x i8] zeroinitializer ; <[1 x i8]*> [#uses=1]
-@.str1 = private constant [2 x i8] c"a\00" ; <[2 x i8]*> [#uses=1]
-@.str2 = private constant [6 x i8] c"abcde\00" ; <[6 x i8]*> [#uses=1]
-@.str3 = private constant [4 x i8] c"bcd\00" ; <[4 x i8]*> [#uses=1]
-
-define i8* @test1(i8* %P) nounwind readonly {
-entry:
- %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([1 x i8]* @.str, i32 0, i32 0)) nounwind ; <i8*> [#uses=1]
- ret i8* %call
-; strstr(P, "") -> P
-; CHECK: @test1
-; CHECK: ret i8* %P
-}
-
-declare i8* @strstr(i8*, i8* nocapture) nounwind readonly
-
-define i8* @test2(i8* %P) nounwind readonly {
-entry:
- %call = tail call i8* @strstr(i8* %P, i8* getelementptr inbounds ([2 x i8]* @.str1, i32 0, i32 0)) nounwind ; <i8*> [#uses=1]
- ret i8* %call
-; strstr(P, "a") -> strchr(P, 'a')
-; CHECK: @test2
-; CHECK: @strchr(i8* %P, i32 97)
-}
-
-define i8* @test3(i8* nocapture %P) nounwind readonly {
-entry:
- %call = tail call i8* @strstr(i8* getelementptr inbounds ([6 x i8]* @.str2, i32 0, i32 0), i8* getelementptr inbounds ([4 x i8]* @.str3, i32 0, i32 0)) nounwind ; <i8*> [#uses=1]
- ret i8* %call
-; strstr("abcde", "bcd") -> "abcde"+1
-; CHECK: @test3
-; CHECK: getelementptr inbounds ([6 x i8]* @.str2, i32 0, i64 1)
-}
-
-define i8* @test4(i8* %P) nounwind readonly {
-entry:
- %call = tail call i8* @strstr(i8* %P, i8* %P) nounwind ; <i8*> [#uses=1]
- ret i8* %call
-; strstr(P, P) -> P
-; CHECK: @test4
-; CHECK: ret i8* %P
-}
-
-define i1 @test5(i8* %P, i8* %Q) nounwind readonly {
-entry:
- %call = tail call i8* @strstr(i8* %P, i8* %Q) nounwind ; <i8*> [#uses=1]
- %cmp = icmp eq i8* %call, %P
- ret i1 %cmp
-; CHECK: @test5
-; CHECK: [[LEN:%[a-z]+]] = call {{i[0-9]+}} @strlen(i8* %Q)
-; CHECK: [[NCMP:%[a-z]+]] = call {{i[0-9]+}} @strncmp(i8* %P, i8* %Q, {{i[0-9]+}} [[LEN]])
-; CHECK: icmp eq {{i[0-9]+}} [[NCMP]], 0
-; CHECK: ret i1
-}
diff --git a/test/Transforms/SimplifyLibCalls/double-float-shrink.ll b/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
new file mode 100644
index 000000000000..b4ab8b4ceb9d
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/double-float-shrink.ll
@@ -0,0 +1,333 @@
+; RUN: opt < %s -simplify-libcalls -enable-double-float-shrink -S | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define float @acos_test(float %f) nounwind readnone {
+; CHECK: acos_test
+ %conv = fpext float %f to double
+ %call = call double @acos(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @acosf(float %f)
+}
+
+define double @acos_test2(float %f) nounwind readnone {
+; CHECK: acos_test2
+ %conv = fpext float %f to double
+ %call = call double @acos(double %conv)
+ ret double %call
+; CHECK: call double @acos(double %conv)
+}
+
+define float @acosh_test(float %f) nounwind readnone {
+; CHECK: acosh_test
+ %conv = fpext float %f to double
+ %call = call double @acosh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @acoshf(float %f)
+}
+
+define double @acosh_test2(float %f) nounwind readnone {
+; CHECK: acosh_test2
+ %conv = fpext float %f to double
+ %call = call double @acosh(double %conv)
+ ret double %call
+; CHECK: call double @acosh(double %conv)
+}
+
+define float @asin_test(float %f) nounwind readnone {
+; CHECK: asin_test
+ %conv = fpext float %f to double
+ %call = call double @asin(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @asinf(float %f)
+}
+
+define double @asin_test2(float %f) nounwind readnone {
+; CHECK: asin_test2
+ %conv = fpext float %f to double
+ %call = call double @asin(double %conv)
+ ret double %call
+; CHECK: call double @asin(double %conv)
+}
+
+define float @asinh_test(float %f) nounwind readnone {
+; CHECK: asinh_test
+ %conv = fpext float %f to double
+ %call = call double @asinh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @asinhf(float %f)
+}
+
+define double @asinh_test2(float %f) nounwind readnone {
+; CHECK: asinh_test2
+ %conv = fpext float %f to double
+ %call = call double @asinh(double %conv)
+ ret double %call
+; CHECK: call double @asinh(double %conv)
+}
+
+define float @atan_test(float %f) nounwind readnone {
+; CHECK: atan_test
+ %conv = fpext float %f to double
+ %call = call double @atan(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @atanf(float %f)
+}
+
+define double @atan_test2(float %f) nounwind readnone {
+; CHECK: atan_test2
+ %conv = fpext float %f to double
+ %call = call double @atan(double %conv)
+ ret double %call
+; CHECK: call double @atan(double %conv)
+}
+define float @atanh_test(float %f) nounwind readnone {
+; CHECK: atanh_test
+ %conv = fpext float %f to double
+ %call = call double @atanh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @atanhf(float %f)
+}
+
+define double @atanh_test2(float %f) nounwind readnone {
+; CHECK: atanh_test2
+ %conv = fpext float %f to double
+ %call = call double @atanh(double %conv)
+ ret double %call
+; CHECK: call double @atanh(double %conv)
+}
+define float @cbrt_test(float %f) nounwind readnone {
+; CHECK: cbrt_test
+ %conv = fpext float %f to double
+ %call = call double @cbrt(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @cbrtf(float %f)
+}
+
+define double @cbrt_test2(float %f) nounwind readnone {
+; CHECK: cbrt_test2
+ %conv = fpext float %f to double
+ %call = call double @cbrt(double %conv)
+ ret double %call
+; CHECK: call double @cbrt(double %conv)
+}
+define float @exp_test(float %f) nounwind readnone {
+; CHECK: exp_test
+ %conv = fpext float %f to double
+ %call = call double @exp(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @expf(float %f)
+}
+
+define double @exp_test2(float %f) nounwind readnone {
+; CHECK: exp_test2
+ %conv = fpext float %f to double
+ %call = call double @exp(double %conv)
+ ret double %call
+; CHECK: call double @exp(double %conv)
+}
+define float @expm1_test(float %f) nounwind readnone {
+; CHECK: expm1_test
+ %conv = fpext float %f to double
+ %call = call double @expm1(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @expm1f(float %f)
+}
+
+define double @expm1_test2(float %f) nounwind readnone {
+; CHECK: expm1_test2
+ %conv = fpext float %f to double
+ %call = call double @expm1(double %conv)
+ ret double %call
+; CHECK: call double @expm1(double %conv)
+}
+define float @exp10_test(float %f) nounwind readnone {
+; CHECK: exp10_test
+ %conv = fpext float %f to double
+ %call = call double @exp10(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @exp10f(float %f)
+}
+
+define double @exp10_test2(float %f) nounwind readnone {
+; CHECK: exp10_test2
+ %conv = fpext float %f to double
+ %call = call double @exp10(double %conv)
+ ret double %call
+; CHECK: call double @exp10(double %conv)
+}
+define float @log_test(float %f) nounwind readnone {
+; CHECK: log_test
+ %conv = fpext float %f to double
+ %call = call double @log(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @logf(float %f)
+}
+
+define double @log_test2(float %f) nounwind readnone {
+; CHECK: log_test2
+ %conv = fpext float %f to double
+ %call = call double @log(double %conv)
+ ret double %call
+; CHECK: call double @log(double %conv)
+}
+define float @log10_test(float %f) nounwind readnone {
+; CHECK: log10_test
+ %conv = fpext float %f to double
+ %call = call double @log10(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @log10f(float %f)
+}
+
+define double @log10_test2(float %f) nounwind readnone {
+; CHECK: log10_test2
+ %conv = fpext float %f to double
+ %call = call double @log10(double %conv)
+ ret double %call
+; CHECK: call double @log10(double %conv)
+}
+define float @log1p_test(float %f) nounwind readnone {
+; CHECK: log1p_test
+ %conv = fpext float %f to double
+ %call = call double @log1p(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @log1pf(float %f)
+}
+
+define double @log1p_test2(float %f) nounwind readnone {
+; CHECK: log1p_test2
+ %conv = fpext float %f to double
+ %call = call double @log1p(double %conv)
+ ret double %call
+; CHECK: call double @log1p(double %conv)
+}
+define float @log2_test(float %f) nounwind readnone {
+; CHECK: log2_test
+ %conv = fpext float %f to double
+ %call = call double @log2(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @log2f(float %f)
+}
+
+define double @log2_test2(float %f) nounwind readnone {
+; CHECK: log2_test2
+ %conv = fpext float %f to double
+ %call = call double @log2(double %conv)
+ ret double %call
+; CHECK: call double @log2(double %conv)
+}
+define float @logb_test(float %f) nounwind readnone {
+; CHECK: logb_test
+ %conv = fpext float %f to double
+ %call = call double @logb(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @logbf(float %f)
+}
+
+define double @logb_test2(float %f) nounwind readnone {
+; CHECK: logb_test2
+ %conv = fpext float %f to double
+ %call = call double @logb(double %conv)
+ ret double %call
+; CHECK: call double @logb(double %conv)
+}
+define float @sin_test(float %f) nounwind readnone {
+; CHECK: sin_test
+ %conv = fpext float %f to double
+ %call = call double @sin(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @sinf(float %f)
+}
+
+define double @sin_test2(float %f) nounwind readnone {
+; CHECK: sin_test2
+ %conv = fpext float %f to double
+ %call = call double @sin(double %conv)
+ ret double %call
+; CHECK: call double @sin(double %conv)
+}
+define float @sqrt_test(float %f) nounwind readnone {
+; CHECK: sqrt_test
+ %conv = fpext float %f to double
+ %call = call double @sqrt(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @sqrtf(float %f)
+}
+
+define double @sqrt_test2(float %f) nounwind readnone {
+; CHECK: sqrt_test2
+ %conv = fpext float %f to double
+ %call = call double @sqrt(double %conv)
+ ret double %call
+; CHECK: call double @sqrt(double %conv)
+}
+define float @tan_test(float %f) nounwind readnone {
+; CHECK: tan_test
+ %conv = fpext float %f to double
+ %call = call double @tan(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @tanf(float %f)
+}
+
+define double @tan_test2(float %f) nounwind readnone {
+; CHECK: tan_test2
+ %conv = fpext float %f to double
+ %call = call double @tan(double %conv)
+ ret double %call
+; CHECK: call double @tan(double %conv)
+}
+define float @tanh_test(float %f) nounwind readnone {
+; CHECK: tanh_test
+ %conv = fpext float %f to double
+ %call = call double @tanh(double %conv)
+ %conv1 = fptrunc double %call to float
+ ret float %conv1
+; CHECK: call float @tanhf(float %f)
+}
+
+define double @tanh_test2(float %f) nounwind readnone {
+; CHECK: tanh_test2
+ %conv = fpext float %f to double
+ %call = call double @tanh(double %conv)
+ ret double %call
+; CHECK: call double @tanh(double %conv)
+}
+
+declare double @tanh(double) nounwind readnone
+declare double @tan(double) nounwind readnone
+declare double @sqrt(double) nounwind readnone
+declare double @sin(double) nounwind readnone
+declare double @log2(double) nounwind readnone
+declare double @log1p(double) nounwind readnone
+declare double @log10(double) nounwind readnone
+declare double @log(double) nounwind readnone
+declare double @logb(double) nounwind readnone
+declare double @exp10(double) nounwind readnone
+declare double @expm1(double) nounwind readnone
+declare double @exp(double) nounwind readnone
+declare double @cbrt(double) nounwind readnone
+declare double @atanh(double) nounwind readnone
+declare double @atan(double) nounwind readnone
+declare double @acos(double) nounwind readnone
+declare double @acosh(double) nounwind readnone
+declare double @asin(double) nounwind readnone
+declare double @asinh(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
new file mode 100644
index 000000000000..aecb887beb3a
--- /dev/null
+++ b/test/Transforms/SimplifyLibCalls/float-shrink-compare.ll
@@ -0,0 +1,179 @@
+; RUN: opt -S -simplify-libcalls -instcombine %s | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.8.0"
+
+define i32 @test1(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %x to double
+ %2 = call double @ceil(double %1) nounwind readnone
+ %3 = fpext float %y to double
+ %4 = fcmp oeq double %2, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test1
+; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
+; CHECK-NEXT: fcmp oeq float %ceilf, %y
+}
+
+define i32 @test2(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %x to double
+ %2 = call double @fabs(double %1) nounwind readnone
+ %3 = fpext float %y to double
+ %4 = fcmp oeq double %2, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test2
+; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
+; CHECK-NEXT: fcmp oeq float %fabsf, %y
+}
+
+define i32 @test3(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %x to double
+ %2 = call double @floor(double %1) nounwind readnone
+ %3 = fpext float %y to double
+ %4 = fcmp oeq double %2, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test3
+; CHECK-NEXT: %floorf = call float @floorf(float %x)
+; CHECK-NEXT: fcmp oeq float %floorf, %y
+}
+
+define i32 @test4(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %x to double
+ %2 = call double @nearbyint(double %1) nounwind
+ %3 = fpext float %y to double
+ %4 = fcmp oeq double %2, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test4
+; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+}
+
+define i32 @test5(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %x to double
+ %2 = call double @rint(double %1) nounwind
+ %3 = fpext float %y to double
+ %4 = fcmp oeq double %2, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test5
+; CHECK-NEXT: %rintf = call float @rintf(float %x)
+; CHECK-NEXT: fcmp oeq float %rintf, %y
+}
+
+define i32 @test6(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %x to double
+ %2 = call double @round(double %1) nounwind readnone
+ %3 = fpext float %y to double
+ %4 = fcmp oeq double %2, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test6
+; CHECK-NEXT: %roundf = call float @roundf(float %x)
+; CHECK-NEXT: fcmp oeq float %roundf, %y
+}
+
+define i32 @test7(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %x to double
+ %2 = call double @trunc(double %1) nounwind
+ %3 = fpext float %y to double
+ %4 = fcmp oeq double %2, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test7
+; CHECK-NEXT: %truncf = call float @truncf(float %x)
+; CHECK-NEXT: fcmp oeq float %truncf, %y
+}
+
+define i32 @test8(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %y to double
+ %2 = fpext float %x to double
+ %3 = call double @ceil(double %2) nounwind readnone
+ %4 = fcmp oeq double %1, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test8
+; CHECK-NEXT: %ceilf = call float @ceilf(float %x)
+; CHECK-NEXT: fcmp oeq float %ceilf, %y
+}
+
+define i32 @test9(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %y to double
+ %2 = fpext float %x to double
+ %3 = call double @fabs(double %2) nounwind readnone
+ %4 = fcmp oeq double %1, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test9
+; CHECK-NEXT: %fabsf = call float @fabsf(float %x)
+; CHECK-NEXT: fcmp oeq float %fabsf, %y
+}
+
+define i32 @test10(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %y to double
+ %2 = fpext float %x to double
+ %3 = call double @floor(double %2) nounwind readnone
+ %4 = fcmp oeq double %1, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test10
+; CHECK-NEXT: %floorf = call float @floorf(float %x)
+; CHECK-NEXT: fcmp oeq float %floorf, %y
+}
+
+define i32 @test11(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %y to double
+ %2 = fpext float %x to double
+ %3 = call double @nearbyint(double %2) nounwind
+ %4 = fcmp oeq double %1, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test11
+; CHECK-NEXT: %nearbyintf = call float @nearbyintf(float %x)
+; CHECK-NEXT: fcmp oeq float %nearbyintf, %y
+}
+
+define i32 @test12(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %y to double
+ %2 = fpext float %x to double
+ %3 = call double @rint(double %2) nounwind
+ %4 = fcmp oeq double %1, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test12
+; CHECK-NEXT: %rintf = call float @rintf(float %x)
+; CHECK-NEXT: fcmp oeq float %rintf, %y
+}
+
+define i32 @test13(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %y to double
+ %2 = fpext float %x to double
+ %3 = call double @round(double %2) nounwind readnone
+ %4 = fcmp oeq double %1, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test13
+; CHECK-NEXT: %roundf = call float @roundf(float %x)
+; CHECK-NEXT: fcmp oeq float %roundf, %y
+}
+
+define i32 @test14(float %x, float %y) nounwind uwtable {
+ %1 = fpext float %y to double
+ %2 = fpext float %x to double
+ %3 = call double @trunc(double %2) nounwind
+ %4 = fcmp oeq double %1, %3
+ %5 = zext i1 %4 to i32
+ ret i32 %5
+; CHECK: @test14
+; CHECK-NEXT: %truncf = call float @truncf(float %x)
+; CHECK-NEXT: fcmp oeq float %truncf, %y
+}
+
+declare double @fabs(double) nounwind readnone
+declare double @ceil(double) nounwind readnone
+declare double @floor(double) nounwind readnone
+declare double @nearbyint(double) nounwind readnone
+declare double @rint(double) nounwind readnone
+declare double @round(double) nounwind readnone
+declare double @trunc(double) nounwind readnone
diff --git a/test/Transforms/SimplifyLibCalls/floor.ll b/test/Transforms/SimplifyLibCalls/floor.ll
index 03dcdf585f9a..93c62c20023d 100644
--- a/test/Transforms/SimplifyLibCalls/floor.ll
+++ b/test/Transforms/SimplifyLibCalls/floor.ll
@@ -9,6 +9,8 @@
; DO-SIMPLIFY: call float @ceilf(
; DO-SIMPLIFY: call float @roundf(
; DO-SIMPLIFY: call float @nearbyintf(
+; DO-SIMPLIFY: call float @truncf(
+; DO-SIMPLIFY: call float @fabsf(
; C89-SIMPLIFY: call float @floorf(
; C89-SIMPLIFY: call float @ceilf(
@@ -19,6 +21,8 @@
; DONT-SIMPLIFY: call double @ceil(
; DONT-SIMPLIFY: call double @round(
; DONT-SIMPLIFY: call double @nearbyint(
+; DONT-SIMPLIFY: call double @trunc(
+; DONT-SIMPLIFY: call double @fabs(
declare double @floor(double)
@@ -28,6 +32,10 @@ declare double @round(double)
declare double @nearbyint(double)
+declare double @trunc(double)
+
+declare double @fabs(double)
+
define float @test_floor(float %C) {
%D = fpext float %C to double ; <double> [#uses=1]
; --> floorf
@@ -60,3 +68,18 @@ define float @test_nearbyint(float %C) {
ret float %F
}
+define float @test_trunc(float %C) {
+ %D = fpext float %C to double
+ ; --> truncf
+ %E = call double @trunc(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
+
+define float @test_fabs(float %C) {
+ %D = fpext float %C to double
+ ; --> fabsf
+ %E = call double @fabs(double %D)
+ %F = fptrunc double %E to float
+ ret float %F
+}
diff --git a/test/Transforms/SimplifyLibCalls/memcmp.ll b/test/Transforms/SimplifyLibCalls/memcmp.ll
deleted file mode 100644
index 6ca4dc97a194..000000000000
--- a/test/Transforms/SimplifyLibCalls/memcmp.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; Test that the memcmpOptimizer works correctly
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-
-@h = constant [2 x i8] c"h\00" ; <[2 x i8]*> [#uses=0]
-@hel = constant [4 x i8] c"hel\00" ; <[4 x i8]*> [#uses=0]
-@hello_u = constant [8 x i8] c"hello_u\00" ; <[8 x i8]*> [#uses=0]
-
-declare i32 @memcmp(i8*, i8*, i32)
-
-define void @test(i8* %P, i8* %Q, i32 %N, i32* %IP, i1* %BP) {
- %A = call i32 @memcmp( i8* %P, i8* %P, i32 %N ) ; <i32> [#uses=1]
-; CHECK-NOT: call {{.*}} memcmp
-; CHECK: store volatile
- store volatile i32 %A, i32* %IP
- %B = call i32 @memcmp( i8* %P, i8* %Q, i32 0 ) ; <i32> [#uses=1]
-; CHECK-NOT: call {{.*}} memcmp
-; CHECK: store volatile
- store volatile i32 %B, i32* %IP
- %C = call i32 @memcmp( i8* %P, i8* %Q, i32 1 ) ; <i32> [#uses=1]
-; CHECK: load
-; CHECK: zext
-; CHECK: load
-; CHECK: zext
-; CHECK: sub
-; CHECK: store volatile
- store volatile i32 %C, i32* %IP
- %F = call i32 @memcmp(i8* getelementptr ([4 x i8]* @hel, i32 0, i32 0),
- i8* getelementptr ([8 x i8]* @hello_u, i32 0, i32 0),
- i32 3)
-; CHECK-NOT: call {{.*}} memcmp
-; CHECK: store volatile
- store volatile i32 %F, i32* %IP
- ret void
-}
-
diff --git a/test/Transforms/SimplifyLibCalls/memmove.ll b/test/Transforms/SimplifyLibCalls/memmove.ll
deleted file mode 100644
index 5aaeeeb024f7..000000000000
--- a/test/Transforms/SimplifyLibCalls/memmove.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memmove"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define i8* @test(i8* %a, i8* %b, i32 %x) {
-entry:
- %call = call i8* @memmove(i8* %a, i8* %b, i32 %x )
- ret i8* %call
-}
-
-declare i8* @memmove(i8*,i8*,i32)
-
diff --git a/test/Transforms/SimplifyLibCalls/memset-64.ll b/test/Transforms/SimplifyLibCalls/memset-64.ll
deleted file mode 100644
index 92412dee71ad..000000000000
--- a/test/Transforms/SimplifyLibCalls/memset-64.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset"
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-target triple = "x86_64-pc-linux-gnu"
-
-define void @a(i8* %x) nounwind {
-entry:
- %call = call i8* @memset(i8* %x, i32 1, i64 100) ; <i8*> [#uses=0]
- ret void
-}
-
-declare i8* @memset(i8*, i32, i64)
-
diff --git a/test/Transforms/SimplifyLibCalls/memset.ll b/test/Transforms/SimplifyLibCalls/memset.ll
deleted file mode 100644
index 853215a4d24c..000000000000
--- a/test/Transforms/SimplifyLibCalls/memset.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | grep "llvm.memset"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i686-pc-linux-gnu"
-
-define i8* @test(i8* %a, i32 %b, i32 %x) {
-entry:
- %call = call i8* @memset(i8* %a, i32 %b, i32 %x )
- ret i8* %call
-}
-
-declare i8* @memset(i8*,i32,i32)
-
diff --git a/test/Transforms/SimplifyLibCalls/weak-symbols.ll b/test/Transforms/SimplifyLibCalls/weak-symbols.ll
deleted file mode 100644
index 5875b211f776..000000000000
--- a/test/Transforms/SimplifyLibCalls/weak-symbols.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -simplify-libcalls -S | FileCheck %s
-; PR4738
-
-; SimplifyLibcalls shouldn't assume anything about weak symbols.
-
-@real_init = weak_odr constant [2 x i8] c"y\00"
-@fake_init = weak constant [2 x i8] c"y\00"
-@.str = private constant [2 x i8] c"y\00"
-
-; CHECK: define i32 @foo
-; CHECK: call i32 @strcmp
-define i32 @foo() nounwind {
-entry:
- %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @fake_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
- ret i32 %t0
-}
-
-; CHECK: define i32 @bar
-; CHECK: ret i32 0
-define i32 @bar() nounwind {
-entry:
- %t0 = call i32 @strcmp(i8* getelementptr inbounds ([2 x i8]* @real_init, i64 0, i64 0), i8* getelementptr inbounds ([2 x i8]* @.str, i64 0, i64 0)) nounwind readonly
- ret i32 %t0
-}
-
-declare i32 @strcmp(i8*, i8*) nounwind readonly