aboutsummaryrefslogtreecommitdiff
path: root/test/Transforms
diff options
context:
space:
mode:
authorDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
committerDimitry Andric <dim@FreeBSD.org>2016-07-23 20:41:05 +0000
commit01095a5d43bbfde13731688ddcf6048ebb8b7721 (patch)
tree4def12e759965de927d963ac65840d663ef9d1ea /test/Transforms
parentf0f4822ed4b66e3579e92a89f368f8fb860e218e (diff)
Vendor import of llvm release_39 branch r276489:vendor/llvm/llvm-release_39-r276489
Diffstat (limited to 'test/Transforms')
-rw-r--r--test/Transforms/ADCE/debug-info-intrinsic.ll100
-rw-r--r--test/Transforms/ADCE/delete-profiling-calls-to-constant.ll19
-rw-r--r--test/Transforms/AddDiscriminators/basic.ll12
-rw-r--r--test/Transforms/AddDiscriminators/call-nested.ll50
-rw-r--r--test/Transforms/AddDiscriminators/call.ll14
-rw-r--r--test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll9
-rw-r--r--test/Transforms/AddDiscriminators/diamond.ll6
-rw-r--r--test/Transforms/AddDiscriminators/first-only.ll12
-rw-r--r--test/Transforms/AddDiscriminators/multiple.ll12
-rw-r--r--test/Transforms/AddDiscriminators/no-discriminators.ll8
-rw-r--r--test/Transforms/AddDiscriminators/oneline.ll12
-rw-r--r--test/Transforms/AlignmentFromAssumptions/simple.ll1
-rw-r--r--test/Transforms/AlignmentFromAssumptions/simple32.ll1
-rw-r--r--test/Transforms/AlignmentFromAssumptions/start-unk.ll1
-rw-r--r--test/Transforms/ArgumentPromotion/dbg.ll5
-rw-r--r--test/Transforms/ArgumentPromotion/inalloca.ll4
-rw-r--r--test/Transforms/ArgumentPromotion/pr27568.ll31
-rw-r--r--test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll88
-rw-r--r--test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll2
-rw-r--r--test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll55
-rw-r--r--test/Transforms/AtomicExpand/SPARC/libcalls.ll257
-rw-r--r--test/Transforms/AtomicExpand/SPARC/lit.local.cfg2
-rw-r--r--test/Transforms/AtomicExpand/SPARC/partword.ll166
-rw-r--r--test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll85
-rw-r--r--test/Transforms/BBVectorize/vector-sel.ll33
-rw-r--r--test/Transforms/BDCE/dce-pure.ll1
-rw-r--r--test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll4
-rw-r--r--test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll7
-rw-r--r--test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll29
-rw-r--r--test/Transforms/CodeGenPrepare/X86/pr27536.ll32
-rw-r--r--test/Transforms/CodeGenPrepare/X86/select.ll38
-rw-r--r--test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll196
-rw-r--r--test/Transforms/CodeGenPrepare/builtin-condition.ll90
-rw-r--r--test/Transforms/CodeGenPrepare/dom-tree.ll41
-rw-r--r--test/Transforms/ConstProp/calls.ll205
-rw-r--r--test/Transforms/ConstantHoisting/ARM/bad-cases.ll109
-rw-r--r--test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll42
-rw-r--r--test/Transforms/ConstantHoisting/ARM/lit.local.cfg2
-rw-r--r--test/Transforms/ConstantHoisting/X86/cast-inst.ll1
-rw-r--r--test/Transforms/ConstantMerge/merge-both.ll2
-rw-r--r--test/Transforms/CorrelatedValuePropagation/basic.ll286
-rw-r--r--test/Transforms/CorrelatedValuePropagation/conflict.ll50
-rw-r--r--test/Transforms/CorrelatedValuePropagation/icmp.ll1
-rw-r--r--test/Transforms/CorrelatedValuePropagation/range.ll14
-rw-r--r--test/Transforms/CorrelatedValuePropagation/sdiv.ll54
-rw-r--r--test/Transforms/CorrelatedValuePropagation/select.ll143
-rw-r--r--test/Transforms/CorrelatedValuePropagation/srem.ll21
-rw-r--r--test/Transforms/CrossDSOCFI/basic.ll83
-rw-r--r--test/Transforms/DCE/basic.ll11
-rw-r--r--test/Transforms/DCE/guards.ll11
-rw-r--r--test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll6
-rw-r--r--test/Transforms/DeadArgElim/comdat.ll14
-rw-r--r--test/Transforms/DeadArgElim/dbginfo.ll7
-rw-r--r--test/Transforms/DeadArgElim/funclet.ll29
-rw-r--r--test/Transforms/DeadArgElim/returned.ll2
-rw-r--r--test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll90
-rw-r--r--test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll239
-rw-r--r--test/Transforms/DeadStoreElimination/fence.ll96
-rw-r--r--test/Transforms/DeadStoreElimination/inst-limits.ll7
-rw-r--r--test/Transforms/DeadStoreElimination/operand-bundles.ll43
-rw-r--r--test/Transforms/DeadStoreElimination/simple.ll1
-rw-r--r--test/Transforms/EarlyCSE/basic.ll5
-rw-r--r--test/Transforms/EarlyCSE/flags.ll18
-rw-r--r--test/Transforms/EarlyCSE/guards.ll181
-rw-r--r--test/Transforms/EarlyCSE/invariant-loads.ll99
-rw-r--r--test/Transforms/EliminateAvailableExternally/visibility.ll2
-rw-r--r--test/Transforms/Float2Int/basic.ll1
-rw-r--r--test/Transforms/FunctionAttrs/assume.ll4
-rw-r--r--test/Transforms/FunctionAttrs/comdat-ipo.ll16
-rw-r--r--test/Transforms/FunctionAttrs/convergent.ll106
-rw-r--r--test/Transforms/FunctionAttrs/nocapture.ll25
-rw-r--r--test/Transforms/FunctionAttrs/norecurse.ll8
-rw-r--r--test/Transforms/FunctionAttrs/noreturn.ll18
-rw-r--r--test/Transforms/FunctionAttrs/operand-bundles-scc.ll13
-rw-r--r--test/Transforms/FunctionAttrs/optnone.ll4
-rw-r--r--test/Transforms/FunctionAttrs/readattrs.ll9
-rw-r--r--test/Transforms/FunctionImport/Inputs/adjustable_threshold.ll37
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport.ll47
-rw-r--r--test/Transforms/FunctionImport/Inputs/funcimport_debug.ll5
-rw-r--r--test/Transforms/FunctionImport/Inputs/inlineasm.ll11
-rw-r--r--test/Transforms/FunctionImport/adjustable_threshold.ll31
-rw-r--r--test/Transforms/FunctionImport/funcimport.ll107
-rw-r--r--test/Transforms/FunctionImport/funcimport_alias.ll6
-rw-r--r--test/Transforms/FunctionImport/funcimport_debug.ll26
-rw-r--r--test/Transforms/FunctionImport/inlineasm.ll19
-rw-r--r--test/Transforms/GCOVProfiling/function-numbering.ll13
-rw-r--r--test/Transforms/GCOVProfiling/global-ctor.ll11
-rw-r--r--test/Transforms/GCOVProfiling/linezero.ll18
-rw-r--r--test/Transforms/GCOVProfiling/linkagename.ll9
-rw-r--r--test/Transforms/GCOVProfiling/modules.ll3
-rw-r--r--test/Transforms/GCOVProfiling/return-block.ll14
-rw-r--r--test/Transforms/GCOVProfiling/version.ll13
-rw-r--r--test/Transforms/GVN/2007-07-25-InfiniteLoop.ll3
-rw-r--r--test/Transforms/GVN/2007-07-26-PhiErasure.ll18
-rw-r--r--test/Transforms/GVN/2007-07-31-NoDomInherit.ll3
-rw-r--r--test/Transforms/GVN/2007-07-31-RedundantPhi.ll3
-rw-r--r--test/Transforms/GVN/2008-02-12-UndefLoad.ll3
-rw-r--r--test/Transforms/GVN/2008-07-02-Unreachable.ll3
-rw-r--r--test/Transforms/GVN/2008-12-09-SelfRemove.ll14
-rw-r--r--test/Transforms/GVN/PRE/2009-02-17-LoadPRECrash.ll (renamed from test/Transforms/GVN/2009-02-17-LoadPRECrash.ll)0
-rw-r--r--test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll (renamed from test/Transforms/GVN/2009-06-17-InvalidPRE.ll)3
-rw-r--r--test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll (renamed from test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll)0
-rw-r--r--test/Transforms/GVN/PRE/atomic.ll503
-rw-r--r--test/Transforms/GVN/PRE/invariant-load.ll (renamed from test/Transforms/GVN/invariant-load.ll)0
-rw-r--r--test/Transforms/GVN/PRE/load-metadata.ll24
-rw-r--r--test/Transforms/GVN/PRE/load-pre-align.ll (renamed from test/Transforms/GVN/load-pre-align.ll)0
-rw-r--r--test/Transforms/GVN/PRE/load-pre-licm.ll (renamed from test/Transforms/GVN/load-pre-licm.ll)0
-rw-r--r--test/Transforms/GVN/PRE/load-pre-nonlocal.ll (renamed from test/Transforms/GVN/load-pre-nonlocal.ll)8
-rw-r--r--test/Transforms/GVN/PRE/local-pre.ll (renamed from test/Transforms/GVN/local-pre.ll)6
-rw-r--r--test/Transforms/GVN/PRE/lpre-call-wrap-2.ll (renamed from test/Transforms/GVN/lpre-call-wrap-2.ll)0
-rw-r--r--test/Transforms/GVN/PRE/lpre-call-wrap.ll (renamed from test/Transforms/GVN/lpre-call-wrap.ll)0
-rw-r--r--test/Transforms/GVN/PRE/phi-translate.ll (renamed from test/Transforms/GVN/phi-translate.ll)7
-rw-r--r--test/Transforms/GVN/PRE/pre-basic-add.ll (renamed from test/Transforms/GVN/pre-basic-add.ll)7
-rw-r--r--test/Transforms/GVN/PRE/pre-gep-load.ll (renamed from test/Transforms/GVN/pre-gep-load.ll)4
-rw-r--r--test/Transforms/GVN/PRE/pre-load.ll (renamed from test/Transforms/GVN/pre-load.ll)0
-rw-r--r--test/Transforms/GVN/PRE/pre-no-cost-phi.ll (renamed from test/Transforms/GVN/pre-no-cost-phi.ll)0
-rw-r--r--test/Transforms/GVN/PRE/pre-single-pred.ll (renamed from test/Transforms/GVN/pre-single-pred.ll)0
-rw-r--r--test/Transforms/GVN/PRE/preserve-tbaa.ll (renamed from test/Transforms/GVN/preserve-tbaa.ll)0
-rw-r--r--test/Transforms/GVN/PRE/rle-phi-translate.ll (renamed from test/Transforms/GVN/rle-phi-translate.ll)0
-rw-r--r--test/Transforms/GVN/PRE/rle-semidominated.ll36
-rw-r--r--test/Transforms/GVN/PRE/rle.ll (renamed from test/Transforms/GVN/rle.ll)0
-rw-r--r--test/Transforms/GVN/PRE/volatile.ll (renamed from test/Transforms/GVN/volatile.ll)10
-rw-r--r--test/Transforms/GVN/assume-equal.ll40
-rw-r--r--test/Transforms/GVN/atomic.ll109
-rw-r--r--test/Transforms/GVN/basic.ll8
-rw-r--r--test/Transforms/GVN/big-endian.ll40
-rw-r--r--test/Transforms/GVN/bitcast-of-call.ll3
-rw-r--r--test/Transforms/GVN/calls-nonlocal.ll28
-rw-r--r--test/Transforms/GVN/calls-readonly.ll18
-rw-r--r--test/Transforms/GVN/fence.ll69
-rw-r--r--test/Transforms/GVN/flags.ll18
-rw-r--r--test/Transforms/GVN/fold-const-expr.ll99
-rw-r--r--test/Transforms/GVN/hoist-pr20242.ll74
-rw-r--r--test/Transforms/GVN/hoist-pr22005.ll30
-rw-r--r--test/Transforms/GVN/hoist-pr28606.ll50
-rw-r--r--test/Transforms/GVN/hoist.ll691
-rw-r--r--test/Transforms/GVN/load-constant-mem.ll8
-rw-r--r--test/Transforms/GVN/nonescaping-malloc.ll4
-rw-r--r--test/Transforms/GVN/pr28562.ll9
-rw-r--r--test/Transforms/GVN/pr28626.ll42
-rw-r--r--test/Transforms/GVN/rle-must-alias.ll3
-rw-r--r--test/Transforms/GVN/rle-semidominated.ll20
-rw-r--r--test/Transforms/GVN/volatile-nonvolatile.ll61
-rw-r--r--test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll3
-rw-r--r--test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll4
-rw-r--r--test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll4
-rw-r--r--test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll4
-rw-r--r--test/Transforms/GlobalDCE/basicvariabletest.ll3
-rw-r--r--test/Transforms/GlobalDCE/externally_available.ll4
-rw-r--r--test/Transforms/GlobalDCE/global-ifunc.ll13
-rw-r--r--test/Transforms/GlobalMerge/basic.ll20
-rw-r--r--test/Transforms/GlobalOpt/2007-05-13-Crash.ll2
-rw-r--r--test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll6
-rw-r--r--test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll3
-rw-r--r--test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll8
-rw-r--r--test/Transforms/GlobalOpt/2008-07-17-addrspace.ll8
-rw-r--r--test/Transforms/GlobalOpt/2009-01-13-phi-user.ll3
-rw-r--r--test/Transforms/GlobalOpt/2009-03-05-dbg.ll12
-rw-r--r--test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll4
-rw-r--r--test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll4
-rw-r--r--test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll2
-rw-r--r--test/Transforms/GlobalOpt/GSROA-section.ll30
-rw-r--r--test/Transforms/GlobalOpt/MallocSROA-section.ll28
-rw-r--r--test/Transforms/GlobalOpt/SROA-section.ll27
-rw-r--r--test/Transforms/GlobalOpt/alias-used-address-space.ll2
-rw-r--r--test/Transforms/GlobalOpt/alias-used.ll25
-rw-r--r--test/Transforms/GlobalOpt/assume.ll2
-rw-r--r--test/Transforms/GlobalOpt/basictest.ll5
-rw-r--r--test/Transforms/GlobalOpt/constantexpr-dangle.ll5
-rw-r--r--test/Transforms/GlobalOpt/constantfold-initializers.ll12
-rw-r--r--test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll4
-rw-r--r--test/Transforms/GlobalOpt/deadglobal-2.ll4
-rw-r--r--test/Transforms/GlobalOpt/globalsra-partial.ll4
-rw-r--r--test/Transforms/GlobalOpt/globalsra-unknown-index.ll21
-rw-r--r--test/Transforms/GlobalOpt/globalsra.ll23
-rw-r--r--test/Transforms/GlobalOpt/heap-sra-phi.ll5
-rw-r--r--test/Transforms/GlobalOpt/invariant.group.barrier.ll4
-rw-r--r--test/Transforms/GlobalOpt/invoke.ll2
-rw-r--r--test/Transforms/GlobalOpt/iterate.ll4
-rw-r--r--test/Transforms/GlobalOpt/memcpy.ll4
-rw-r--r--test/Transforms/GlobalOpt/phi-select.ll8
-rw-r--r--test/Transforms/GlobalOpt/pr21191.ll4
-rw-r--r--test/Transforms/GlobalOpt/storepointer-compare.ll5
-rw-r--r--test/Transforms/GlobalOpt/storepointer.ll4
-rw-r--r--test/Transforms/GlobalOpt/trivialstore.ll4
-rw-r--r--test/Transforms/GlobalOpt/undef-init.ll4
-rw-r--r--test/Transforms/GlobalOpt/unnamed-addr.ll2
-rw-r--r--test/Transforms/GuardWidening/basic.ll381
-rw-r--r--test/Transforms/GuardWidening/range-check-merging.ll235
-rw-r--r--test/Transforms/IPConstantProp/comdat-ipo.ll28
-rw-r--r--test/Transforms/IPConstantProp/fp-bc-icmp-const-fold.ll52
-rw-r--r--test/Transforms/IPConstantProp/global.ll1
-rw-r--r--test/Transforms/IRCE/conjunctive-checks.ll99
-rw-r--r--test/Transforms/IRCE/decrementing-loop.ll1
-rw-r--r--test/Transforms/IRCE/only-lower-check.ll2
-rw-r--r--test/Transforms/IRCE/only-upper-check.ll2
-rw-r--r--test/Transforms/IndVarSimplify/AMDGPU/lit.local.cfg2
-rw-r--r--test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll98
-rw-r--r--test/Transforms/IndVarSimplify/backedge-on-min-max.ll1
-rw-r--r--test/Transforms/IndVarSimplify/elim-extend.ll2
-rw-r--r--test/Transforms/IndVarSimplify/iv-widen.ll1
-rw-r--r--test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll4
-rw-r--r--test/Transforms/IndVarSimplify/loop_evaluate10.ll2
-rw-r--r--test/Transforms/IndVarSimplify/overflow-intrinsics.ll137
-rw-r--r--test/Transforms/IndVarSimplify/overflowcheck.ll56
-rw-r--r--test/Transforms/IndVarSimplify/pr24783.ll5
-rw-r--r--test/Transforms/IndVarSimplify/pr25576.ll31
-rw-r--r--test/Transforms/IndVarSimplify/pr26973.ll33
-rw-r--r--test/Transforms/IndVarSimplify/pr26974.ll60
-rw-r--r--test/Transforms/IndVarSimplify/pr27133.ll38
-rw-r--r--test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll63
-rw-r--r--test/Transforms/IndVarSimplify/sharpen-range.ll1
-rw-r--r--test/Transforms/IndVarSimplify/sink-trapping.ll2
-rw-r--r--test/Transforms/IndVarSimplify/udiv.ll6
-rw-r--r--test/Transforms/IndVarSimplify/ult-sub-to-eq.ll10
-rw-r--r--test/Transforms/InferFunctionAttrs/annotate.ll887
-rw-r--r--test/Transforms/InferFunctionAttrs/no-proto.ll850
-rw-r--r--test/Transforms/Inline/PR4909.ll1
-rw-r--r--test/Transforms/Inline/alloca-dbgdeclare-merge.ll9
-rw-r--r--test/Transforms/Inline/alloca-dbgdeclare.ll25
-rw-r--r--test/Transforms/Inline/alloca_test.ll50
-rw-r--r--test/Transforms/Inline/array-alloca.ll36
-rw-r--r--test/Transforms/Inline/attributes.ll62
-rw-r--r--test/Transforms/Inline/basictest.ll2
-rw-r--r--test/Transforms/Inline/blockaddress.ll22
-rw-r--r--test/Transforms/Inline/comdat-ipo.ll19
-rw-r--r--test/Transforms/Inline/crash2.ll2
-rw-r--r--test/Transforms/Inline/debug-info-duplicate-calls.ll37
-rw-r--r--test/Transforms/Inline/debug-invoke.ll9
-rw-r--r--test/Transforms/Inline/deoptimize-intrinsic-cconv.ll19
-rw-r--r--test/Transforms/Inline/deoptimize-intrinsic.ll123
-rw-r--r--test/Transforms/Inline/devirtualize-3.ll2
-rw-r--r--test/Transforms/Inline/guard-intrinsic.ll39
-rw-r--r--test/Transforms/Inline/ignore-debug-info.ll4
-rw-r--r--test/Transforms/Inline/inline-cold-callee.ll29
-rw-r--r--test/Transforms/Inline/inline-hot-callee.ll29
-rw-r--r--test/Transforms/Inline/inline-hot-callsite.ll52
-rw-r--r--test/Transforms/Inline/inline-invoke-tail.ll4
-rw-r--r--test/Transforms/Inline/inline-optsize.ll13
-rw-r--r--test/Transforms/Inline/inline-threshold.ll89
-rw-r--r--test/Transforms/Inline/inline_dbg_declare.ll9
-rw-r--r--test/Transforms/Inline/inline_unreachable-2.ll19
-rw-r--r--test/Transforms/Inline/inline_unreachable.ll130
-rw-r--r--test/Transforms/Inline/local-as-metadata-undominated-use.ll48
-rw-r--r--test/Transforms/Inline/parallel-loop-md.ll57
-rw-r--r--test/Transforms/Inline/pr26698.ll65
-rw-r--r--test/Transforms/Inline/pr28298.ll19
-rw-r--r--test/Transforms/Inline/profile-meta.ll44
-rw-r--r--test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll9
-rw-r--r--test/Transforms/InstCombine/2002-05-14-SubFailure.ll11
-rw-r--r--test/Transforms/InstCombine/2002-08-02-CastTest.ll11
-rw-r--r--test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll13
-rw-r--r--test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll297
-rw-r--r--test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll15
-rw-r--r--test/Transforms/InstCombine/2007-01-27-AndICmp.ll8
-rw-r--r--test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll16
-rw-r--r--test/Transforms/InstCombine/2008-07-08-AndICmp.ll10
-rw-r--r--test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll20
-rw-r--r--test/Transforms/InstCombine/2008-07-10-CastSextBool.ll27
-rw-r--r--test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll13
-rw-r--r--test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll2
-rw-r--r--test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll57
-rw-r--r--test/Transforms/InstCombine/addrspacecast.ll39
-rw-r--r--test/Transforms/InstCombine/align-attr.ll13
-rw-r--r--test/Transforms/InstCombine/all-bits-shift.ll3
-rw-r--r--test/Transforms/InstCombine/allocsize-32.ll29
-rw-r--r--test/Transforms/InstCombine/allocsize.ll141
-rw-r--r--test/Transforms/InstCombine/amdgcn-intrinsics.ll364
-rw-r--r--test/Transforms/InstCombine/and-compare.ll3
-rw-r--r--test/Transforms/InstCombine/and-fcmp.ll1551
-rw-r--r--test/Transforms/InstCombine/and-or-icmps.ll53
-rw-r--r--test/Transforms/InstCombine/and-or-not.ll24
-rw-r--r--test/Transforms/InstCombine/and-or.ll78
-rw-r--r--test/Transforms/InstCombine/and.ll407
-rw-r--r--test/Transforms/InstCombine/and2.ll121
-rw-r--r--test/Transforms/InstCombine/apint-add.ll116
-rw-r--r--test/Transforms/InstCombine/apint-add1.ll34
-rw-r--r--test/Transforms/InstCombine/apint-add2.ll46
-rw-r--r--test/Transforms/InstCombine/apint-and-xor-merge.ll29
-rw-r--r--test/Transforms/InstCombine/apint-select.ll153
-rw-r--r--test/Transforms/InstCombine/assoc-cast-assoc.ll77
-rw-r--r--test/Transforms/InstCombine/atomic.ll253
-rw-r--r--test/Transforms/InstCombine/bitcast-bigendian.ll40
-rw-r--r--test/Transforms/InstCombine/bitcast.ll60
-rw-r--r--test/Transforms/InstCombine/bitreverse-fold.ll89
-rw-r--r--test/Transforms/InstCombine/bswap.ll173
-rw-r--r--test/Transforms/InstCombine/builtin-object-size-offset.ll58
-rw-r--r--test/Transforms/InstCombine/builtin-object-size-ptr.ll34
-rw-r--r--test/Transforms/InstCombine/cast-set.ll89
-rw-r--r--test/Transforms/InstCombine/cast.ll1203
-rw-r--r--test/Transforms/InstCombine/compare-signs.ll83
-rw-r--r--test/Transforms/InstCombine/compare-udiv.ll132
-rw-r--r--test/Transforms/InstCombine/compare-unescaped.ll164
-rw-r--r--test/Transforms/InstCombine/convergent.ll44
-rw-r--r--test/Transforms/InstCombine/cos-intrinsic.ll26
-rw-r--r--test/Transforms/InstCombine/debug-line.ll6
-rw-r--r--test/Transforms/InstCombine/debuginfo.ll5
-rw-r--r--test/Transforms/InstCombine/demorgan-zext.ll78
-rw-r--r--test/Transforms/InstCombine/div-shift.ll76
-rw-r--r--test/Transforms/InstCombine/div.ll417
-rw-r--r--test/Transforms/InstCombine/dom-conditions.ll152
-rw-r--r--test/Transforms/InstCombine/double-float-shrink-1.ll38
-rw-r--r--test/Transforms/InstCombine/exact.ll156
-rw-r--r--test/Transforms/InstCombine/fabs.ll25
-rw-r--r--test/Transforms/InstCombine/fast-math-scalarization.ll39
-rw-r--r--test/Transforms/InstCombine/fcmp-special.ll135
-rw-r--r--test/Transforms/InstCombine/fmul.ll29
-rw-r--r--test/Transforms/InstCombine/fputs-opt-size.ll28
-rw-r--r--test/Transforms/InstCombine/gc.relocate.ll29
-rw-r--r--test/Transforms/InstCombine/getelementptr-folding.ll13
-rw-r--r--test/Transforms/InstCombine/getelementptr.ll8
-rw-r--r--test/Transforms/InstCombine/icmp-vec.ll193
-rw-r--r--test/Transforms/InstCombine/icmp.ll1546
-rw-r--r--test/Transforms/InstCombine/indexed-gep-compares.ll170
-rw-r--r--test/Transforms/InstCombine/insert-val-extract-elem.ll74
-rw-r--r--test/Transforms/InstCombine/lifetime.ll5
-rw-r--r--test/Transforms/InstCombine/load-cmp.ll172
-rw-r--r--test/Transforms/InstCombine/logical-select.ll470
-rw-r--r--test/Transforms/InstCombine/masked_intrinsics.ll66
-rw-r--r--test/Transforms/InstCombine/mem-gep-zidx.ll12
-rw-r--r--test/Transforms/InstCombine/memchr.ll154
-rw-r--r--test/Transforms/InstCombine/memset-1.ll48
-rw-r--r--test/Transforms/InstCombine/memset_chk-1.ll38
-rw-r--r--test/Transforms/InstCombine/min-positive.ll34
-rw-r--r--test/Transforms/InstCombine/minmax-fp.ll26
-rw-r--r--test/Transforms/InstCombine/misc-2002.ll50
-rw-r--r--test/Transforms/InstCombine/mul-masked-bits.ll11
-rw-r--r--test/Transforms/InstCombine/narrow-switch.ll80
-rw-r--r--test/Transforms/InstCombine/opaque.ll32
-rw-r--r--test/Transforms/InstCombine/or-fcmp.ll1508
-rw-r--r--test/Transforms/InstCombine/or-to-xor.ll81
-rw-r--r--test/Transforms/InstCombine/or.ll619
-rw-r--r--test/Transforms/InstCombine/phi-preserve-ir-flags.ll89
-rw-r--r--test/Transforms/InstCombine/phi.ll119
-rw-r--r--test/Transforms/InstCombine/pow-4.ll41
-rw-r--r--test/Transforms/InstCombine/pr20678.ll8
-rw-r--r--test/Transforms/InstCombine/pr21210.ll7
-rw-r--r--test/Transforms/InstCombine/pr21651.ll22
-rw-r--r--test/Transforms/InstCombine/pr26992.ll37
-rw-r--r--test/Transforms/InstCombine/pr26993.ll24
-rw-r--r--test/Transforms/InstCombine/pr27236.ll19
-rw-r--r--test/Transforms/InstCombine/pr27332.ll23
-rw-r--r--test/Transforms/InstCombine/pr28143.ll12
-rw-r--r--test/Transforms/InstCombine/pr8547.ll26
-rw-r--r--test/Transforms/InstCombine/prevent-cmp-merge.ll41
-rw-r--r--test/Transforms/InstCombine/printf-1.ll12
-rw-r--r--test/Transforms/InstCombine/printf-2.ll12
-rw-r--r--test/Transforms/InstCombine/r600-intrinsics.ll47
-rw-r--r--test/Transforms/InstCombine/rem.ll159
-rw-r--r--test/Transforms/InstCombine/select-implied.ll123
-rw-r--r--test/Transforms/InstCombine/select.ll421
-rw-r--r--test/Transforms/InstCombine/set.ll288
-rw-r--r--test/Transforms/InstCombine/shift-shift.ll75
-rw-r--r--test/Transforms/InstCombine/shift.ll846
-rw-r--r--test/Transforms/InstCombine/shufflevec-constant.ll19
-rw-r--r--test/Transforms/InstCombine/signed-comparison.ll45
-rw-r--r--test/Transforms/InstCombine/signext.ll101
-rw-r--r--test/Transforms/InstCombine/sink-into-catchswitch.ll45
-rw-r--r--test/Transforms/InstCombine/stacksaverestore.ll58
-rw-r--r--test/Transforms/InstCombine/strlen-1.ll94
-rw-r--r--test/Transforms/InstCombine/tbaa-store-to-load.ll17
-rw-r--r--test/Transforms/InstCombine/trunc.ll21
-rw-r--r--test/Transforms/InstCombine/unpack-fca.ll69
-rw-r--r--test/Transforms/InstCombine/urem.ll15
-rw-r--r--test/Transforms/InstCombine/vararg.ll30
-rw-r--r--test/Transforms/InstCombine/vec_demanded_elts.ll60
-rw-r--r--test/Transforms/InstCombine/vec_phi_extract.ll65
-rw-r--r--test/Transforms/InstCombine/volatile_store.ll24
-rw-r--r--test/Transforms/InstCombine/x86-avx.ll158
-rw-r--r--test/Transforms/InstCombine/x86-avx2.ll85
-rw-r--r--test/Transforms/InstCombine/x86-f16c.ll37
-rw-r--r--test/Transforms/InstCombine/x86-masked-memops.ll302
-rw-r--r--test/Transforms/InstCombine/x86-movmsk.ll324
-rw-r--r--test/Transforms/InstCombine/x86-pmovsx.ll136
-rw-r--r--test/Transforms/InstCombine/x86-pmovzx.ll136
-rw-r--r--test/Transforms/InstCombine/x86-pshufb.ll239
-rw-r--r--test/Transforms/InstCombine/x86-sse.ll661
-rw-r--r--test/Transforms/InstCombine/x86-sse2.ll500
-rw-r--r--test/Transforms/InstCombine/x86-sse41.ll98
-rw-r--r--test/Transforms/InstCombine/x86-sse4a.ll228
-rw-r--r--test/Transforms/InstCombine/x86-vector-shifts.ll1310
-rw-r--r--test/Transforms/InstCombine/x86-xop.ll186
-rw-r--r--test/Transforms/InstCombine/xor.ll375
-rw-r--r--test/Transforms/InstCombine/zero-point-zero-add.ll15
-rw-r--r--test/Transforms/InstCombine/zeroext-and-reduce.ll15
-rw-r--r--test/Transforms/InstCombine/zext-fold.ll20
-rw-r--r--test/Transforms/InstCombine/zext-or-icmp.ll50
-rw-r--r--test/Transforms/InstCombine/zext.ll68
-rw-r--r--test/Transforms/InstMerge/exceptions.ll61
-rw-r--r--test/Transforms/InstMerge/st_sink_bugfix_22613.ll2
-rw-r--r--test/Transforms/InstMerge/st_sink_no_barrier_call.ll2
-rw-r--r--test/Transforms/InstSimplify/2010-12-20-Boolean.ll13
-rw-r--r--test/Transforms/InstSimplify/AndOrXor.ll198
-rw-r--r--test/Transforms/InstSimplify/add-mask.ll38
-rw-r--r--test/Transforms/InstSimplify/apint-or.ll41
-rw-r--r--test/Transforms/InstSimplify/assume.ll7
-rw-r--r--test/Transforms/InstSimplify/bswap.ll21
-rw-r--r--test/Transforms/InstSimplify/call.ll35
-rw-r--r--test/Transforms/InstSimplify/compare.ll168
-rw-r--r--test/Transforms/InstSimplify/fast-math.ll118
-rw-r--r--test/Transforms/InstSimplify/fdiv.ll7
-rw-r--r--test/Transforms/InstSimplify/floating-point-arithmetic.ll63
-rw-r--r--test/Transforms/InstSimplify/floating-point-compare.ll55
-rw-r--r--test/Transforms/InstSimplify/implies.ll120
-rw-r--r--test/Transforms/InstSimplify/load-relative-32.ll19
-rw-r--r--test/Transforms/InstSimplify/load-relative.ll75
-rw-r--r--test/Transforms/InstSimplify/load.ll19
-rw-r--r--test/Transforms/InstSimplify/maxmin.ll89
-rw-r--r--test/Transforms/InstSimplify/past-the-end.ll40
-rw-r--r--test/Transforms/InstSimplify/phi.ll4
-rw-r--r--test/Transforms/InstSimplify/ptr_diff.ll28
-rw-r--r--test/Transforms/InstSimplify/reassociate.ll70
-rw-r--r--test/Transforms/InstSimplify/rem.ll24
-rw-r--r--test/Transforms/InstSimplify/returned.ll30
-rw-r--r--test/Transforms/InstSimplify/shift-128-kb.ll16
-rw-r--r--test/Transforms/InstSimplify/shift-knownbits.ll147
-rw-r--r--test/Transforms/InstSimplify/shr-nop.ll509
-rw-r--r--test/Transforms/InstSimplify/undef.ll226
-rw-r--r--test/Transforms/InstSimplify/vec-cmp.ll65
-rw-r--r--test/Transforms/InstSimplify/vector_gep.ll9
-rw-r--r--test/Transforms/Internalize/stackguard.ll9
-rw-r--r--test/Transforms/Internalize/used.ll1
-rw-r--r--test/Transforms/JumpThreading/basic.ll34
-rw-r--r--test/Transforms/JumpThreading/crash-assertingvh.ll19
-rw-r--r--test/Transforms/JumpThreading/implied-cond.ll79
-rw-r--r--test/Transforms/JumpThreading/induction.ll25
-rw-r--r--test/Transforms/JumpThreading/pr26096.ll11
-rw-r--r--test/Transforms/JumpThreading/thread-loads.ll140
-rw-r--r--test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll5
-rw-r--r--test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll9
-rw-r--r--test/Transforms/LCSSA/2006-07-09-NoDominator.ll1
-rw-r--r--test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll1
-rw-r--r--test/Transforms/LCSSA/basictest.ll9
-rw-r--r--test/Transforms/LCSSA/invoke-dest.ll1
-rw-r--r--test/Transforms/LCSSA/mixed-catch.ll1
-rw-r--r--test/Transforms/LCSSA/unused-phis.ll1
-rw-r--r--test/Transforms/LICM/AliasSetMemSet.ll51
-rw-r--r--test/Transforms/LICM/alias-set-tracker-loss.ll39
-rw-r--r--test/Transforms/LICM/argmemonly-call.ll1
-rw-r--r--test/Transforms/LICM/assume.ll52
-rw-r--r--test/Transforms/LICM/atomics.ll1
-rw-r--r--test/Transforms/LICM/basictest.ll1
-rw-r--r--test/Transforms/LICM/constexpr.ll1
-rw-r--r--test/Transforms/LICM/crash.ll1
-rw-r--r--test/Transforms/LICM/debug-value.ll16
-rw-r--r--test/Transforms/LICM/extra-copies.ll1
-rw-r--r--test/Transforms/LICM/funclet.ll1
-rw-r--r--test/Transforms/LICM/hoist-bitcast-load.ll1
-rw-r--r--test/Transforms/LICM/hoist-deref-load.ll123
-rw-r--r--test/Transforms/LICM/hoist-nounwind.ll72
-rw-r--r--test/Transforms/LICM/hoist-round.ll62
-rw-r--r--test/Transforms/LICM/hoisting.ll24
-rw-r--r--test/Transforms/LICM/lcssa-ssa-promoter.ll1
-rw-r--r--test/Transforms/LICM/no-preheader-test.ll1
-rw-r--r--test/Transforms/LICM/pr26843.ll32
-rw-r--r--test/Transforms/LICM/pr27262.ll33
-rw-r--r--test/Transforms/LICM/preheader-safe.ll4
-rw-r--r--test/Transforms/LICM/promote-order.ll1
-rw-r--r--test/Transforms/LICM/promote-tls.ll134
-rw-r--r--test/Transforms/LICM/scalar-promote-memmodel.ll1
-rw-r--r--test/Transforms/LICM/scalar_promote-unwind.ll72
-rw-r--r--test/Transforms/LICM/scalar_promote.ll3
-rw-r--r--test/Transforms/LICM/speculate.ll1
-rw-r--r--test/Transforms/LICM/update-scev.ll31
-rw-r--r--test/Transforms/LICM/volatile-alias.ll3
-rw-r--r--test/Transforms/LoadCombine/load-combine-negativegep.ll19
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll32
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll150
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll62
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll28
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/lit.local.cfg3
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll53
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll638
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll91
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll30
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll20
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll22
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll311
-rw-r--r--test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll199
-rw-r--r--test/Transforms/LoadStoreVectorizer/X86/correct-order.ll26
-rw-r--r--test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg (renamed from test/Transforms/TailDup/X86/lit.local.cfg)0
-rw-r--r--test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll28
-rw-r--r--test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll77
-rw-r--r--test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll87
-rw-r--r--test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll51
-rw-r--r--test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll52
-rw-r--r--test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg4
-rw-r--r--test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll78
-rw-r--r--test/Transforms/LoopDataPrefetch/PowerPC/basic.ll25
-rw-r--r--test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg2
-rw-r--r--test/Transforms/LoopDeletion/multiple-exit-conditions.ll1
-rw-r--r--test/Transforms/LoopDeletion/multiple-exits.ll58
-rw-r--r--test/Transforms/LoopDeletion/update-scev.ll56
-rw-r--r--test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll82
-rw-r--r--test/Transforms/LoopDistribute/diagnostics-with-hotness.ll77
-rw-r--r--test/Transforms/LoopDistribute/diagnostics.ll176
-rw-r--r--test/Transforms/LoopDistribute/metadata.ll149
-rw-r--r--test/Transforms/LoopDistribute/pr28443.ll36
-rw-r--r--test/Transforms/LoopDistribute/symbolic-stride.ll65
-rw-r--r--test/Transforms/LoopIdiom/AMDGPU/popcnt.ll25
-rw-r--r--test/Transforms/LoopIdiom/basic.ll37
-rw-r--r--test/Transforms/LoopIdiom/debug-line.ll6
-rw-r--r--test/Transforms/LoopIdiom/nontemporal_store.ll32
-rw-r--r--test/Transforms/LoopIdiom/pr28196.ll26
-rw-r--r--test/Transforms/LoopIdiom/struct.ll221
-rw-r--r--test/Transforms/LoopIdiom/struct_pattern.ll186
-rw-r--r--test/Transforms/LoopIdiom/unroll.ll80
-rw-r--r--test/Transforms/LoopIdiom/unwind.ll33
-rw-r--r--test/Transforms/LoopLoadElim/cond-load.ll42
-rw-r--r--test/Transforms/LoopLoadElim/forward.ll2
-rw-r--r--test/Transforms/LoopLoadElim/loop-simplify-dep.ll33
-rw-r--r--test/Transforms/LoopLoadElim/non-consecutive.ll43
-rw-r--r--test/Transforms/LoopLoadElim/opt-size.ll76
-rw-r--r--test/Transforms/LoopLoadElim/symbolic-stride.ll92
-rw-r--r--test/Transforms/LoopLoadElim/type-mismatch.ll89
-rw-r--r--test/Transforms/LoopReroll/basic.ll8
-rw-r--r--test/Transforms/LoopReroll/basic32iters.ll328
-rw-r--r--test/Transforms/LoopReroll/complex_reroll.ll134
-rw-r--r--test/Transforms/LoopReroll/indvar_with_ext.ll186
-rw-r--r--test/Transforms/LoopReroll/nonconst_lb.ll8
-rw-r--r--test/Transforms/LoopReroll/ptrindvar.ll81
-rw-r--r--test/Transforms/LoopReroll/reroll_with_dbg.ll5
-rw-r--r--test/Transforms/LoopRotate/basic.ll2
-rw-r--r--test/Transforms/LoopRotate/convergent.ll31
-rw-r--r--test/Transforms/LoopRotate/dbgvalue.ll46
-rw-r--r--test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll2
-rw-r--r--test/Transforms/LoopSimplify/basictest.ll1
-rw-r--r--test/Transforms/LoopSimplify/dbg-loc.ll7
-rw-r--r--test/Transforms/LoopSimplify/pr26682.ll32
-rw-r--r--test/Transforms/LoopSimplify/single-backedge.ll7
-rw-r--r--test/Transforms/LoopSimplifyCFG/merge-header.ll35
-rw-r--r--test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll34
-rw-r--r--test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll62
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll2
-rw-r--r--test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll5
-rw-r--r--test/Transforms/LoopStrengthReduce/pr27056.ll50
-rw-r--r--test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll1
-rw-r--r--test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll47
-rw-r--r--test/Transforms/LoopUnroll/AArch64/runtime-loop.ll20
-rw-r--r--test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll12
-rw-r--r--test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll35
-rw-r--r--test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll49
-rw-r--r--test/Transforms/LoopUnroll/X86/mmx.ll6
-rw-r--r--test/Transforms/LoopUnroll/convergent.ll83
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-crashers.ll124
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll34
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll97
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll131
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll38
-rw-r--r--test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll28
-rw-r--r--test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll34
-rw-r--r--test/Transforms/LoopUnroll/nsw-tripcount.ll32
-rw-r--r--test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll29
-rw-r--r--test/Transforms/LoopUnroll/partial-unroll-optsize.ll53
-rw-r--r--test/Transforms/LoopUnroll/pr27157.ll53
-rw-r--r--test/Transforms/LoopUnroll/pr28132.ll77
-rw-r--r--test/Transforms/LoopUnroll/rebuild_lcssa.ll34
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop.ll60
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop1.ll47
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop2.ll16
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop4.ll22
-rw-r--r--test/Transforms/LoopUnroll/runtime-loop5.ll7
-rw-r--r--test/Transforms/LoopUnroll/tripcount-overflow.ll30
-rw-r--r--test/Transforms/LoopUnroll/unroll-cleanup.ll8
-rw-r--r--test/Transforms/LoopUnroll/unroll-cleanuppad.ll40
-rw-r--r--test/Transforms/LoopUnroll/unroll-count.ll25
-rw-r--r--test/Transforms/LoopUnroll/unroll-opt-attribute.ll130
-rw-r--r--test/Transforms/LoopUnroll/unroll-pragmas.ll76
-rw-r--r--test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll2
-rw-r--r--test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll10
-rw-r--r--test/Transforms/LoopUnswitch/exponential-behavior.ll51
-rw-r--r--test/Transforms/LoopUnswitch/guards.ll97
-rw-r--r--test/Transforms/LoopUnswitch/infinite-loop.ll6
-rw-r--r--test/Transforms/LoopUnswitch/msan.ll153
-rw-r--r--test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll166
-rw-r--r--test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll299
-rw-r--r--test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll42
-rw-r--r--test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll58
-rw-r--r--test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll56
-rw-r--r--test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll47
-rw-r--r--test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll330
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll4
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll4
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll62
-rw-r--r--test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/avx1.ll17
-rw-r--r--test/Transforms/LoopVectorize/X86/cost-model.ll41
-rw-r--r--test/Transforms/LoopVectorize/X86/force-ifcvt.ll41
-rw-r--r--test/Transforms/LoopVectorize/X86/funclet.ll45
-rw-r--r--test/Transforms/LoopVectorize/X86/gather_scatter.ll236
-rw-r--r--test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll75
-rw-r--r--test/Transforms/LoopVectorize/X86/masked_load_store.ll164
-rw-r--r--test/Transforms/LoopVectorize/X86/max-mstore.ll46
-rw-r--r--test/Transforms/LoopVectorize/X86/no_fpmath.ll9
-rw-r--r--test/Transforms/LoopVectorize/X86/propagate-metadata.ll25
-rw-r--r--test/Transforms/LoopVectorize/X86/reg-usage.ll71
-rw-r--r--test/Transforms/LoopVectorize/X86/register-assumption.ll32
-rwxr-xr-xtest/Transforms/LoopVectorize/X86/scatter_crash.ll218
-rw-r--r--test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll4
-rw-r--r--test/Transforms/LoopVectorize/X86/uniform-phi.ll50
-rw-r--r--test/Transforms/LoopVectorize/X86/uniform_load.ll47
-rw-r--r--test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll2
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll74
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll12
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll7
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorization-remarks.ll7
-rw-r--r--test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll39
-rw-r--r--test/Transforms/LoopVectorize/cast-induction.ll2
-rw-r--r--test/Transforms/LoopVectorize/conditional-assignment.ll6
-rw-r--r--test/Transforms/LoopVectorize/control-flow.ll5
-rw-r--r--test/Transforms/LoopVectorize/dbg.value.ll5
-rw-r--r--test/Transforms/LoopVectorize/debugloc.ll5
-rw-r--r--test/Transforms/LoopVectorize/gcc-examples.ll2
-rw-r--r--test/Transforms/LoopVectorize/gep_with_bitcast.ll11
-rw-r--r--test/Transforms/LoopVectorize/global_alias.ll8
-rw-r--r--test/Transforms/LoopVectorize/hints-trans.ll30
-rw-r--r--test/Transforms/LoopVectorize/if-conversion.ll2
-rw-r--r--test/Transforms/LoopVectorize/if-pred-stores.ll8
-rw-r--r--test/Transforms/LoopVectorize/induction-step.ll124
-rw-r--r--test/Transforms/LoopVectorize/induction.ll374
-rw-r--r--test/Transforms/LoopVectorize/induction_plus.ll9
-rw-r--r--test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll164
-rw-r--r--test/Transforms/LoopVectorize/interleaved-accesses.ll410
-rw-r--r--test/Transforms/LoopVectorize/iv_outside_user.ll135
-rw-r--r--test/Transforms/LoopVectorize/multiple-strides-vectorization.ll65
-rw-r--r--test/Transforms/LoopVectorize/no_array_bounds.ll5
-rw-r--r--test/Transforms/LoopVectorize/no_outside_user.ll32
-rw-r--r--test/Transforms/LoopVectorize/no_switch.ll5
-rw-r--r--test/Transforms/LoopVectorize/noalias-md-licm.ll59
-rw-r--r--test/Transforms/LoopVectorize/noalias-md.ll78
-rw-r--r--test/Transforms/LoopVectorize/phi-hang.ll2
-rw-r--r--test/Transforms/LoopVectorize/pr25281.ll59
-rw-r--r--test/Transforms/LoopVectorize/reverse_induction.ll44
-rw-r--r--test/Transforms/LoopVectorize/runtime-check.ll9
-rw-r--r--test/Transforms/LoopVectorize/same-base-access.ll10
-rw-r--r--test/Transforms/LoopVectorize/unsafe-dep-remark.ll74
-rw-r--r--test/Transforms/LoopVersioning/basic.ll47
-rw-r--r--test/Transforms/LoopVersioning/incorrect-phi.ll62
-rw-r--r--test/Transforms/LoopVersioning/lcssa.ll35
-rw-r--r--test/Transforms/LoopVersioning/noalias-version-twice.ll106
-rw-r--r--test/Transforms/LoopVersioning/noalias.ll54
-rw-r--r--test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll67
-rw-r--r--test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll52
-rw-r--r--test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll45
-rw-r--r--test/Transforms/LoopVersioningLICM/metadata.ll104
-rw-r--r--test/Transforms/LowerAtomic/atomic-load.ll1
-rw-r--r--test/Transforms/LowerBitSets/constant.ll34
-rw-r--r--test/Transforms/LowerBitSets/layout.ll35
-rw-r--r--test/Transforms/LowerBitSets/nonglobal.ll19
-rw-r--r--test/Transforms/LowerBitSets/pr25902.ll21
-rw-r--r--test/Transforms/LowerBitSets/unnamed.ll20
-rw-r--r--test/Transforms/LowerExpectIntrinsic/basic.ll8
-rw-r--r--test/Transforms/LowerGuardIntrinsic/basic.ll76
-rw-r--r--test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll15
-rw-r--r--test/Transforms/LowerTypeTests/constant.ll33
-rw-r--r--test/Transforms/LowerTypeTests/function-ext.ll (renamed from test/Transforms/LowerBitSets/function-ext.ll)12
-rw-r--r--test/Transforms/LowerTypeTests/function.ll (renamed from test/Transforms/LowerBitSets/function.ll)19
-rw-r--r--test/Transforms/LowerTypeTests/layout.ll27
-rw-r--r--test/Transforms/LowerTypeTests/nonstring.ll (renamed from test/Transforms/LowerBitSets/nonstring.ll)18
-rw-r--r--test/Transforms/LowerTypeTests/pr25902.ll19
-rw-r--r--test/Transforms/LowerTypeTests/section.ll25
-rw-r--r--test/Transforms/LowerTypeTests/simple.ll (renamed from test/Transforms/LowerBitSets/simple.ll)62
-rw-r--r--test/Transforms/LowerTypeTests/single-offset.ll (renamed from test/Transforms/LowerBitSets/single-offset.ll)23
-rw-r--r--test/Transforms/LowerTypeTests/unnamed.ll18
-rw-r--r--test/Transforms/Mem2Reg/ConvertDebugInfo.ll4
-rw-r--r--test/Transforms/Mem2Reg/ConvertDebugInfo2.ll6
-rw-r--r--test/Transforms/Mem2Reg/pr24179.ll1
-rw-r--r--test/Transforms/MemCpyOpt/callslot_throw.ll34
-rw-r--r--test/Transforms/MemCpyOpt/fca2memcpy.ll36
-rw-r--r--test/Transforms/MemCpyOpt/lifetime.ll25
-rw-r--r--test/Transforms/MemCpyOpt/loadstore-sret.ll2
-rw-r--r--test/Transforms/MemCpyOpt/profitable-memset.ll20
-rw-r--r--test/Transforms/MemCpyOpt/smaller.ll1
-rw-r--r--test/Transforms/MergeFunc/alloca.ll42
-rw-r--r--test/Transforms/MergeFunc/merge-weak-crash.ll47
-rw-r--r--test/Transforms/MergeFunc/phi-check-blocks.ll50
-rw-r--r--test/Transforms/NameAnonFunctions/rename.ll27
-rw-r--r--test/Transforms/ObjCARC/basic.ll31
-rw-r--r--test/Transforms/ObjCARC/comdat-ipo.ll53
-rw-r--r--test/Transforms/ObjCARC/contract-marker.ll24
-rw-r--r--test/Transforms/ObjCARC/contract-storestrong.ll26
-rw-r--r--test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll7
-rw-r--r--test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll15
-rw-r--r--test/Transforms/ObjCARC/unsafe-claim-rv.ll47
-rw-r--r--test/Transforms/PGOProfile/Inputs/PR28219.proftext10
-rw-r--r--test/Transforms/PGOProfile/Inputs/branch1.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/branch2.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/criticaledge.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/diag.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/diag_FE.proftext5
-rw-r--r--test/Transforms/PGOProfile/Inputs/indirect_call.proftext43
-rw-r--r--test/Transforms/PGOProfile/Inputs/landingpad.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/loop1.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/loop2.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/switch.proftext2
-rw-r--r--test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll7
-rw-r--r--test/Transforms/PGOProfile/PR28219.ll12
-rw-r--r--test/Transforms/PGOProfile/X86/lit.local.cfg (renamed from test/Transforms/TailDup/lit.local.cfg)1
-rw-r--r--test/Transforms/PGOProfile/X86/macho.ll10
-rw-r--r--test/Transforms/PGOProfile/branch1.ll23
-rw-r--r--test/Transforms/PGOProfile/branch2.ll4
-rw-r--r--test/Transforms/PGOProfile/comdat_internal.ll26
-rw-r--r--test/Transforms/PGOProfile/criticaledge.ll4
-rw-r--r--test/Transforms/PGOProfile/diag_FE_profile.ll13
-rw-r--r--test/Transforms/PGOProfile/diag_mismatch.ll1
-rw-r--r--test/Transforms/PGOProfile/diag_no_funcprofdata.ll1
-rw-r--r--test/Transforms/PGOProfile/diag_no_profile.ll1
-rw-r--r--test/Transforms/PGOProfile/do-not-instrument.ll28
-rw-r--r--test/Transforms/PGOProfile/icp_covariant_call_return.ll45
-rw-r--r--test/Transforms/PGOProfile/icp_covariant_invoke_return.ll111
-rw-r--r--test/Transforms/PGOProfile/icp_invoke.ll105
-rw-r--r--test/Transforms/PGOProfile/icp_mismatch_msg.ll40
-rw-r--r--test/Transforms/PGOProfile/icp_vararg.ll34
-rw-r--r--test/Transforms/PGOProfile/indirect_call_annotation.ll36
-rw-r--r--test/Transforms/PGOProfile/indirect_call_profile.ll71
-rw-r--r--test/Transforms/PGOProfile/indirect_call_promotion.ll56
-rw-r--r--test/Transforms/PGOProfile/landingpad.ll4
-rw-r--r--test/Transforms/PGOProfile/loop1.ll4
-rw-r--r--test/Transforms/PGOProfile/loop2.ll4
-rw-r--r--test/Transforms/PGOProfile/preinline.ll22
-rw-r--r--test/Transforms/PGOProfile/single_bb.ll3
-rw-r--r--test/Transforms/PGOProfile/statics_counter_naming.ll11
-rw-r--r--test/Transforms/PGOProfile/switch.ll4
-rw-r--r--test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll32
-rw-r--r--test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll1
-rw-r--r--test/Transforms/PhaseOrdering/globalaa-retained.ll26
-rw-r--r--test/Transforms/PlaceSafepoints/basic.ll37
-rw-r--r--test/Transforms/PlaceSafepoints/call-in-loop.ll7
-rw-r--r--test/Transforms/PlaceSafepoints/finite-loops.ll32
-rw-r--r--test/Transforms/PlaceSafepoints/memset.ll2
-rw-r--r--test/Transforms/PlaceSafepoints/no-statepoints.ll23
-rw-r--r--test/Transforms/PlaceSafepoints/split-backedge.ll8
-rw-r--r--test/Transforms/PlaceSafepoints/statepoint-coreclr.ll6
-rw-r--r--test/Transforms/PlaceSafepoints/statepoint-frameescape.ll4
-rw-r--r--test/Transforms/PreISelIntrinsicLowering/load-relative.ll27
-rw-r--r--test/Transforms/Reassociate/basictest.ll1
-rw-r--r--test/Transforms/Reassociate/prev_insts_canonicalized.ll57
-rw-r--r--test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll6
-rw-r--r--test/Transforms/Reassociate/xor_reassoc.ll4
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll13
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll33
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll6
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll6
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll6
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll10
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll7
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll40
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll14
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll24
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll29
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll15
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll9
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-pointers.ll109
-rw-r--r--test/Transforms/RewriteStatepointsForGC/base-vector.ll123
-rw-r--r--test/Transforms/RewriteStatepointsForGC/basic.ll (renamed from test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll)10
-rw-r--r--test/Transforms/RewriteStatepointsForGC/basics.ll54
-rw-r--r--test/Transforms/RewriteStatepointsForGC/call-gc-result.ll (renamed from test/Transforms/PlaceSafepoints/call_gc_result.ll)6
-rw-r--r--test/Transforms/RewriteStatepointsForGC/codegen-cond.ll87
-rw-r--r--test/Transforms/RewriteStatepointsForGC/constants.ll205
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll25
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll35
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll24
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll19
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll44
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll28
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll37
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll45
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll37
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll20
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll151
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll167
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll88
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll81
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll51
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll104
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll149
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll165
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll44
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll62
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll279
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll150
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-intrinsic-cconv.ll16
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deopt-intrinsic.ll35
-rw-r--r--test/Transforms/RewriteStatepointsForGC/deref-pointers.ll102
-rw-r--r--test/Transforms/RewriteStatepointsForGC/gc-relocate-creation.ll (renamed from test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll)2
-rw-r--r--test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll20
-rw-r--r--test/Transforms/RewriteStatepointsForGC/invokes.ll (renamed from test/Transforms/PlaceSafepoints/invokes.ll)28
-rw-r--r--test/Transforms/RewriteStatepointsForGC/leaf-function.ll (renamed from test/Transforms/PlaceSafepoints/leaf-function.ll)8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/live-vector-nosplit.ll (renamed from test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector-nosplit.ll)8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/live-vector.ll152
-rw-r--r--test/Transforms/RewriteStatepointsForGC/liveness-basics.ll81
-rw-r--r--test/Transforms/RewriteStatepointsForGC/patchable-statepoints.ll (renamed from test/Transforms/PlaceSafepoints/patchable-statepoints.ll)2
-rw-r--r--test/Transforms/RewriteStatepointsForGC/preprocess.ll37
-rw-r--r--test/Transforms/RewriteStatepointsForGC/relocate-invoke-result.ll (renamed from test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll)2
-rw-r--r--test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll33
-rw-r--r--test/Transforms/RewriteStatepointsForGC/relocation.ll140
-rw-r--r--test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll261
-rw-r--r--test/Transforms/RewriteStatepointsForGC/rewrite-invoke.ll (renamed from test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll)2
-rw-r--r--test/Transforms/RewriteStatepointsForGC/statepoint-calling-conventions.ll (renamed from test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll)8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/statepoint-coreclr.ll31
-rw-r--r--test/Transforms/RewriteStatepointsForGC/statepoint-format.ll (renamed from test/Transforms/PlaceSafepoints/statepoint-format.ll)8
-rw-r--r--test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll6
-rw-r--r--test/Transforms/SCCP/bitcast.ll9
-rw-r--r--test/Transforms/SCCP/comdat-ipo.ll28
-rw-r--r--test/Transforms/SCCP/constant-struct.ll72
-rw-r--r--test/Transforms/SCCP/global-alias-constprop.ll1
-rw-r--r--test/Transforms/SCCP/ipsccp-basic.ll15
-rw-r--r--test/Transforms/SCCP/pr27712.ll30
-rw-r--r--test/Transforms/SCCP/ub-shift.ll69
-rw-r--r--test/Transforms/SCCP/undef-resolve.ll10
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll258
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/getelementptr.ll111
-rw-r--r--test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll55
-rw-r--r--test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg2
-rw-r--r--test/Transforms/SLPVectorizer/PowerPC/pr27897.ll29
-rw-r--r--test/Transforms/SLPVectorizer/X86/bitreverse.ll741
-rw-r--r--test/Transforms/SLPVectorizer/X86/bswap.ll247
-rw-r--r--test/Transforms/SLPVectorizer/X86/call.ll23
-rw-r--r--test/Transforms/SLPVectorizer/X86/cast.ll102
-rw-r--r--test/Transforms/SLPVectorizer/X86/ctlz.ll1222
-rw-r--r--test/Transforms/SLPVectorizer/X86/ctpop.ll403
-rw-r--r--test/Transforms/SLPVectorizer/X86/cttz.ll1222
-rw-r--r--test/Transforms/SLPVectorizer/X86/debug_info.ll9
-rw-r--r--test/Transforms/SLPVectorizer/X86/fma.ll562
-rw-r--r--test/Transforms/SLPVectorizer/X86/fround.ll2158
-rw-r--r--test/Transforms/SLPVectorizer/X86/funclet.ll48
-rw-r--r--test/Transforms/SLPVectorizer/X86/gep.ll1
-rw-r--r--test/Transforms/SLPVectorizer/X86/gep_mismatch.ll22
-rw-r--r--test/Transforms/SLPVectorizer/X86/insertvalue.ll189
-rw-r--r--test/Transforms/SLPVectorizer/X86/ordering.ll2
-rw-r--r--test/Transforms/SLPVectorizer/X86/pr27163.ll50
-rw-r--r--test/Transforms/SLPVectorizer/X86/sqrt.ll274
-rw-r--r--test/Transforms/SLPVectorizer/X86/vector_gep.ll24
-rw-r--r--test/Transforms/SROA/address-spaces.ll17
-rw-r--r--test/Transforms/SROA/basictest.ll36
-rw-r--r--test/Transforms/SROA/dbg-single-piece.ll38
-rw-r--r--test/Transforms/SROA/pr26972.ll17
-rw-r--r--test/Transforms/SafeStack/AArch64/abi.ll2
-rw-r--r--test/Transforms/SafeStack/AArch64/abi_ssp.ll22
-rw-r--r--test/Transforms/SafeStack/ARM/setjmp.ll4
-rw-r--r--test/Transforms/SafeStack/X86/abi_ssp.ll19
-rw-r--r--test/Transforms/SafeStack/X86/ssp.ll30
-rw-r--r--test/Transforms/SafeStack/array-aligned.ll7
-rw-r--r--test/Transforms/SafeStack/array.ll7
-rw-r--r--test/Transforms/SafeStack/coloring.ll44
-rw-r--r--test/Transforms/SafeStack/coloring2.ll482
-rw-r--r--test/Transforms/SafeStack/debug-loc-dynamic.ll57
-rw-r--r--test/Transforms/SafeStack/debug-loc.ll13
-rw-r--r--test/Transforms/SafeStack/debug-loc2.ll98
-rw-r--r--test/Transforms/SafeStack/dynamic-alloca.ll3
-rw-r--r--test/Transforms/SafeStack/phi.ll35
-rw-r--r--test/Transforms/SafeStack/setjmp2.ll5
-rw-r--r--test/Transforms/SafeStack/sink-to-use.ll22
-rw-r--r--test/Transforms/SafeStack/struct.ll7
-rw-r--r--test/Transforms/SampleProfile/Inputs/fnptr.binprofbin105 -> 221 bytes
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline-act.prof3
-rw-r--r--test/Transforms/SampleProfile/Inputs/inline-combine.prof2
-rw-r--r--test/Transforms/SampleProfile/Inputs/propagate.prof29
-rw-r--r--test/Transforms/SampleProfile/Inputs/summary.prof4
-rw-r--r--test/Transforms/SampleProfile/branch.ll6
-rw-r--r--test/Transforms/SampleProfile/calls.ll18
-rw-r--r--test/Transforms/SampleProfile/cov-zero-samples.ll12
-rw-r--r--test/Transforms/SampleProfile/coverage-warning.ll6
-rw-r--r--test/Transforms/SampleProfile/discriminator.ll6
-rw-r--r--test/Transforms/SampleProfile/entry_counts.ll13
-rw-r--r--test/Transforms/SampleProfile/fnptr.ll19
-rw-r--r--test/Transforms/SampleProfile/gcc-simple.ll8
-rw-r--r--test/Transforms/SampleProfile/inline-act.ll72
-rw-r--r--test/Transforms/SampleProfile/inline-combine.ll47
-rw-r--r--test/Transforms/SampleProfile/inline-coverage.ll12
-rw-r--r--test/Transforms/SampleProfile/inline-hint.ll38
-rw-r--r--test/Transforms/SampleProfile/inline.ll9
-rw-r--r--test/Transforms/SampleProfile/nolocinfo.ll6
-rw-r--r--test/Transforms/SampleProfile/offset.ll6
-rw-r--r--test/Transforms/SampleProfile/propagate.ll396
-rw-r--r--test/Transforms/SampleProfile/remarks.ll8
-rw-r--r--test/Transforms/SampleProfile/summary.ll14
-rw-r--r--test/Transforms/SampleProfile/syntax.ll9
-rw-r--r--test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll13
-rw-r--r--test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll12
-rw-r--r--test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll16
-rw-r--r--test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll20
-rw-r--r--test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll24
-rw-r--r--test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll36
-rw-r--r--test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll21
-rw-r--r--test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll16
-rw-r--r--test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll33
-rw-r--r--test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll17
-rw-r--r--test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll23
-rw-r--r--test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll25
-rw-r--r--test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll16
-rw-r--r--test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll20
-rw-r--r--test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll19
-rw-r--r--test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll90
-rw-r--r--test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll18
-rw-r--r--test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll26
-rw-r--r--test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll75
-rw-r--r--test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll37
-rw-r--r--test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll40
-rw-r--r--test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll22
-rw-r--r--test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll19
-rw-r--r--test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll26
-rw-r--r--test/Transforms/ScalarRepl/AggregatePromote.ll51
-rw-r--r--test/Transforms/ScalarRepl/DifferingTypes.ll16
-rw-r--r--test/Transforms/ScalarRepl/address-space.ll35
-rw-r--r--test/Transforms/ScalarRepl/arraytest.ll11
-rw-r--r--test/Transforms/ScalarRepl/badarray.ll57
-rw-r--r--test/Transforms/ScalarRepl/basictest.ll30
-rw-r--r--test/Transforms/ScalarRepl/bitfield-sroa.ll17
-rw-r--r--test/Transforms/ScalarRepl/copy-aggregate.ll107
-rw-r--r--test/Transforms/ScalarRepl/crash.ll286
-rw-r--r--test/Transforms/ScalarRepl/debuginfo-preserved.ll65
-rw-r--r--test/Transforms/ScalarRepl/inline-vector.ll53
-rw-r--r--test/Transforms/ScalarRepl/lifetime.ll139
-rw-r--r--test/Transforms/ScalarRepl/load-store-aggregate.ll31
-rw-r--r--test/Transforms/ScalarRepl/memcpy-align.ll32
-rw-r--r--test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll23
-rw-r--r--test/Transforms/ScalarRepl/memset-aggregate.ll67
-rw-r--r--test/Transforms/ScalarRepl/negative-memset.ll20
-rw-r--r--test/Transforms/ScalarRepl/nonzero-first-index.ll53
-rw-r--r--test/Transforms/ScalarRepl/not-a-vector.ll20
-rw-r--r--test/Transforms/ScalarRepl/only-memcpy-uses.ll27
-rw-r--r--test/Transforms/ScalarRepl/phi-cycle.ll80
-rw-r--r--test/Transforms/ScalarRepl/phi-select.ll153
-rw-r--r--test/Transforms/ScalarRepl/phinodepromote.ll34
-rw-r--r--test/Transforms/ScalarRepl/select_promote.ll18
-rw-r--r--test/Transforms/ScalarRepl/sroa-fca.ll21
-rw-r--r--test/Transforms/ScalarRepl/sroa_two.ll13
-rw-r--r--test/Transforms/ScalarRepl/union-fp-int.ll14
-rw-r--r--test/Transforms/ScalarRepl/union-packed.ll14
-rw-r--r--test/Transforms/ScalarRepl/union-pointer.ll74
-rw-r--r--test/Transforms/ScalarRepl/vector_memcpy.ll28
-rw-r--r--test/Transforms/ScalarRepl/vector_promote.ll137
-rw-r--r--test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll27
-rw-r--r--test/Transforms/ScalarRepl/volatile.ll13
-rw-r--r--test/Transforms/Scalarizer/crash-bug.ll24
-rw-r--r--test/Transforms/Scalarizer/dbginfo.ll5
-rw-r--r--test/Transforms/Scalarizer/dbgloc-bug.ll43
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll6
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll10
-rw-r--r--test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll33
-rw-r--r--test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll139
-rw-r--r--test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll32
-rw-r--r--test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll47
-rw-r--r--test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll32
-rw-r--r--test/Transforms/SimplifyCFG/InfLoop.ll101
-rw-r--r--test/Transforms/SimplifyCFG/PR16069.ll13
-rw-r--r--test/Transforms/SimplifyCFG/PR27615-simplify-cond-br.ll68
-rw-r--r--test/Transforms/SimplifyCFG/PhiBlockMerge.ll29
-rw-r--r--test/Transforms/SimplifyCFG/PhiEliminate2.ll37
-rw-r--r--test/Transforms/SimplifyCFG/UnreachableEliminate.ll31
-rw-r--r--test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll4
-rw-r--r--test/Transforms/SimplifyCFG/attr-convergent.ll28
-rw-r--r--test/Transforms/SimplifyCFG/basictest.ll52
-rw-r--r--test/Transforms/SimplifyCFG/branch-fold-dbg.ll6
-rw-r--r--test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll55
-rw-r--r--test/Transforms/SimplifyCFG/empty-cleanuppad.ll55
-rw-r--r--test/Transforms/SimplifyCFG/guards.ll86
-rw-r--r--test/Transforms/SimplifyCFG/hoist-dbgvalue.ll7
-rw-r--r--test/Transforms/SimplifyCFG/implied-cond-matching-false-dest.ll339
-rw-r--r--test/Transforms/SimplifyCFG/implied-cond-matching-imm.ll123
-rw-r--r--test/Transforms/SimplifyCFG/implied-cond-matching.ll1029
-rw-r--r--test/Transforms/SimplifyCFG/merge-cleanuppads.ll39
-rw-r--r--test/Transforms/SimplifyCFG/merge-cond-stores-2.ll4
-rw-r--r--test/Transforms/SimplifyCFG/preserve-branchweights.ll179
-rw-r--r--test/Transforms/SimplifyCFG/speculate-store.ll100
-rw-r--r--test/Transforms/SimplifyCFG/switch-masked-bits.ll59
-rw-r--r--test/Transforms/SimplifyCFG/switch_create.ll106
-rw-r--r--test/Transforms/SimplifyCFG/switch_switch_fold.ll26
-rw-r--r--test/Transforms/SimplifyCFG/switch_thread.ll38
-rw-r--r--test/Transforms/SimplifyCFG/trap-debugloc.ll6
-rw-r--r--test/Transforms/SimplifyCFG/two-entry-phi-return.ll25
-rw-r--r--test/Transforms/SimplifyCFG/unreachable-cleanuppad.ll40
-rw-r--r--test/Transforms/Sink/basic.ll1
-rw-r--r--test/Transforms/Sink/call.ll112
-rw-r--r--test/Transforms/Sink/convergent.ll1
-rw-r--r--test/Transforms/StraightLineStrengthReduce/slsr-add.ll15
-rw-r--r--test/Transforms/StraightLineStrengthReduce/slsr-gep.ll89
-rw-r--r--test/Transforms/StripSymbols/2010-06-30-StripDebug.ll11
-rw-r--r--test/Transforms/StripSymbols/2010-08-25-crash.ll5
-rw-r--r--test/Transforms/StripSymbols/strip-dead-debug-info.ll13
-rw-r--r--test/Transforms/StructurizeCFG/invert-constantexpr.ll30
-rw-r--r--test/Transforms/StructurizeCFG/nested-loop-order.ll13
-rw-r--r--test/Transforms/TailCallElim/accum_recursion.ll1
-rw-r--r--test/Transforms/TailCallElim/dont_reorder_load.ll20
-rw-r--r--test/Transforms/TailCallElim/reorder_load.ll23
-rw-r--r--test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll28
-rw-r--r--test/Transforms/Util/MemorySSA/assume.ll19
-rw-r--r--test/Transforms/Util/MemorySSA/atomic-clobber.ll18
-rw-r--r--test/Transforms/Util/MemorySSA/cyclicphi.ll124
-rw-r--r--test/Transforms/Util/MemorySSA/forward-unreachable.ll23
-rw-r--r--test/Transforms/Util/MemorySSA/function-clobber.ll54
-rw-r--r--test/Transforms/Util/MemorySSA/function-mem-attrs.ll59
-rw-r--r--test/Transforms/Util/MemorySSA/livein.ll53
-rw-r--r--test/Transforms/Util/MemorySSA/load-invariant.ll25
-rw-r--r--test/Transforms/Util/MemorySSA/many-dom-backedge.ll77
-rw-r--r--test/Transforms/Util/MemorySSA/many-doms.ll67
-rw-r--r--test/Transforms/Util/MemorySSA/multi-edges.ll32
-rw-r--r--test/Transforms/Util/MemorySSA/multiple-backedges-hal.ll73
-rw-r--r--test/Transforms/Util/MemorySSA/multiple-locations.ll25
-rw-r--r--test/Transforms/Util/MemorySSA/no-disconnected.ll43
-rw-r--r--test/Transforms/Util/MemorySSA/optimize-use.ll37
-rw-r--r--test/Transforms/Util/MemorySSA/phi-translation.ll182
-rw-r--r--test/Transforms/Util/MemorySSA/volatile-clobber.ll22
-rw-r--r--test/Transforms/Util/simplify-dbg-declare-load.ll9
-rw-r--r--test/Transforms/Util/split-bit-piece.ll4
-rw-r--r--test/Transforms/Util/store-first-op.ll36
-rw-r--r--test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll63
-rw-r--r--test/Transforms/WholeProgramDevirt/constant-arg.ll77
-rw-r--r--test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll41
-rw-r--r--test/Transforms/WholeProgramDevirt/devirt-single-impl.ll31
-rw-r--r--test/Transforms/WholeProgramDevirt/expand-check.ll63
-rw-r--r--test/Transforms/WholeProgramDevirt/non-array-vtable.ll30
-rw-r--r--test/Transforms/WholeProgramDevirt/non-constant-vtable.ll32
-rw-r--r--test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll43
-rw-r--r--test/Transforms/WholeProgramDevirt/uniform-retval.ll36
-rw-r--r--test/Transforms/WholeProgramDevirt/unique-retval.ll56
-rw-r--r--test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll35
-rw-r--r--test/Transforms/WholeProgramDevirt/vcp-no-this.ll35
-rw-r--r--test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll35
-rw-r--r--test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll35
-rw-r--r--test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll65
-rw-r--r--test/Transforms/WholeProgramDevirt/vcp-uses-this.ll37
-rw-r--r--test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll136
-rw-r--r--test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll141
-rw-r--r--test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll131
1028 files changed, 56243 insertions, 13154 deletions
diff --git a/test/Transforms/ADCE/debug-info-intrinsic.ll b/test/Transforms/ADCE/debug-info-intrinsic.ll
new file mode 100644
index 000000000000..da158b3a50e2
--- /dev/null
+++ b/test/Transforms/ADCE/debug-info-intrinsic.ll
@@ -0,0 +1,100 @@
+; RUN: opt -adce -S < %s | FileCheck %s
+; Test that debug info intrinsics in dead scopes get eliminated by -adce.
+
+; Generated with 'clang -g -S -emit-llvm | opt -mem2reg -inline' at r262899
+; (before -adce was augmented) and then hand-reduced. This was the input:
+;
+;;void sink(void);
+;;
+;;void variable_in_unused_subscope(void) {
+;; { int i = 0; }
+;; sink();
+;;}
+;;
+;;void variable_in_parent_scope(void) {
+;; int i = 0;
+;; { sink(); }
+;;}
+;;
+;;static int empty_function_with_unused_variable(void) {
+;; { int i = 0; }
+;; return 0;
+;;}
+;;
+;;void calls_empty_function_with_unused_variable_in_unused_subscope(void) {
+;; { empty_function_with_unused_variable(); }
+;; sink();
+;;}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+declare void @sink()
+
+; CHECK-LABEL: define void @variable_in_unused_subscope(
+define void @variable_in_unused_subscope() !dbg !4 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @sink
+; CHECK-NEXT: ret void
+entry:
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !15, metadata !17), !dbg !18
+ call void @sink(), !dbg !19
+ ret void, !dbg !20
+}
+
+; CHECK-LABEL: define void @variable_in_parent_scope(
+define void @variable_in_parent_scope() !dbg !7 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @llvm.dbg.value
+; CHECK-NEXT: call void @sink
+; CHECK-NEXT: ret void
+entry:
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !21, metadata !17), !dbg !22
+ call void @sink(), !dbg !23
+ ret void, !dbg !25
+}
+
+; CHECK-LABEL: define void @calls_empty_function_with_unused_variable_in_unused_subscope(
+define void @calls_empty_function_with_unused_variable_in_unused_subscope() !dbg !8 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void @sink
+; CHECK-NEXT: ret void
+entry:
+ call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !26, metadata !17), !dbg !28
+ call void @sink(), !dbg !31
+ ret void, !dbg !32
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!14}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "t.c", directory: "/path/to/test/Transforms/ADCE")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "variable_in_unused_subscope", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{null}
+!7 = distinct !DISubprogram(name: "variable_in_parent_scope", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!8 = distinct !DISubprogram(name: "calls_empty_function_with_unused_variable_in_unused_subscope", scope: !1, file: !1, line: 18, type: !5, isLocal: false, isDefinition: true, scopeLine: 18, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!10 = distinct !DISubprogram(name: "empty_function_with_unused_variable", scope: !1, file: !1, line: 13, type: !11, isLocal: true, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{!13}
+!13 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!14 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = !DILocalVariable(name: "i", scope: !16, file: !1, line: 4, type: !13)
+!16 = distinct !DILexicalBlock(scope: !4, file: !1, line: 4, column: 3)
+!17 = !DIExpression()
+!18 = !DILocation(line: 4, column: 9, scope: !16)
+!19 = !DILocation(line: 5, column: 3, scope: !4)
+!20 = !DILocation(line: 6, column: 1, scope: !4)
+!21 = !DILocalVariable(name: "i", scope: !7, file: !1, line: 9, type: !13)
+!22 = !DILocation(line: 9, column: 7, scope: !7)
+!23 = !DILocation(line: 10, column: 5, scope: !24)
+!24 = distinct !DILexicalBlock(scope: !7, file: !1, line: 10, column: 3)
+!25 = !DILocation(line: 11, column: 1, scope: !7)
+!26 = !DILocalVariable(name: "i", scope: !27, file: !1, line: 14, type: !13)
+!27 = distinct !DILexicalBlock(scope: !10, file: !1, line: 14, column: 3)
+!28 = !DILocation(line: 14, column: 9, scope: !27, inlinedAt: !29)
+!29 = distinct !DILocation(line: 19, column: 5, scope: !30)
+!30 = distinct !DILexicalBlock(scope: !8, file: !1, line: 19, column: 3)
+!31 = !DILocation(line: 20, column: 3, scope: !8)
+!32 = !DILocation(line: 21, column: 1, scope: !8)
diff --git a/test/Transforms/ADCE/delete-profiling-calls-to-constant.ll b/test/Transforms/ADCE/delete-profiling-calls-to-constant.ll
new file mode 100644
index 000000000000..a61e8f8caccb
--- /dev/null
+++ b/test/Transforms/ADCE/delete-profiling-calls-to-constant.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -adce | FileCheck %s
+; RUN: opt < %s -passes=adce | FileCheck %s
+
+; Verify that a call to instrument a constant is deleted.
+
+@__profc_foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", align 8
+@__profd_foo = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 6699318081062747564, i64 0, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo, i32 0, i32 0), i8* bitcast (i32 ()* @foo to i8*), i8* null, i32 1, [1 x i16] [i16 1] }, section "__llvm_prf_data", align 8
+
+define i32 @foo() {
+; CHECK-NOT: __llvm_profile_instrument_target
+entry:
+ tail call void @__llvm_profile_instrument_target(i64 ptrtoint (i32 (i32)* @bar to i64), i8* bitcast ({ i64, i64, i64*, i8*, i8*, i32, [1 x i16] }* @__profd_foo to i8*), i32 0)
+ %call = tail call i32 @bar(i32 21)
+ ret i32 %call
+}
+
+declare i32 @bar(i32)
+
+declare void @__llvm_profile_instrument_target(i64, i8*, i32)
diff --git a/test/Transforms/AddDiscriminators/basic.ll b/test/Transforms/AddDiscriminators/basic.ll
index 0588562c7377..801eda2b0665 100644
--- a/test/Transforms/AddDiscriminators/basic.ll
+++ b/test/Transforms/AddDiscriminators/basic.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Basic DWARF discriminator test. All the instructions in block
; 'if.then' should have a different discriminator value than
@@ -37,19 +38,14 @@ if.end: ; preds = %if.then, %entry
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-; We should be able to add discriminators even in the absence of llvm.dbg.cu.
-; When using sample profiles, the front end will generate line tables but it
-; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
-; to the final binary.
-; !llvm.dbg.cu = !{!0}
+!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "basic.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "basic.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/AddDiscriminators/call-nested.ll b/test/Transforms/AddDiscriminators/call-nested.ll
new file mode 100644
index 000000000000..481d6f260047
--- /dev/null
+++ b/test/Transforms/AddDiscriminators/call-nested.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
+
+; Discriminator support for calls that are defined in one line:
+; #1 int foo(int, int);
+; #2 int bar();
+; #3 int baz() {
+; #4 return foo(bar(),
+; #5 bar());
+; #6 }
+
+; Function Attrs: uwtable
+define i32 @_Z3bazv() #0 !dbg !4 {
+ %1 = call i32 @_Z3barv(), !dbg !11
+; CHECK: %1 = call i32 @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
+ %2 = call i32 @_Z3barv(), !dbg !12
+; CHECK: %2 = call i32 @_Z3barv(), !dbg ![[CALL1:[0-9]+]]
+ %3 = call i32 @_Z3fooii(i32 %1, i32 %2), !dbg !13
+; CHECK: %3 = call i32 @_Z3fooii(i32 %1, i32 %2), !dbg ![[CALL2:[0-9]+]]
+ ret i32 %3, !dbg !14
+}
+
+declare i32 @_Z3fooii(i32, i32) #1
+
+declare i32 @_Z3barv() #1
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!8, !9}
+!llvm.ident = !{!10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 266269)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "test.cc", directory: "")
+!2 = !{}
+!4 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !{i32 2, !"Dwarf Version", i32 4}
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !{!"clang version 3.9.0 (trunk 266269)"}
+!11 = !DILocation(line: 4, column: 14, scope: !4)
+!12 = !DILocation(line: 5, column: 14, scope: !4)
+!13 = !DILocation(line: 4, column: 10, scope: !4)
+!14 = !DILocation(line: 4, column: 3, scope: !4)
+
+; CHECK: ![[CALL2]] = !DILocation(line: 4, column: 10, scope: ![[CALL2BLOCK:[0-9]+]])
+; CHECK: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 1)
diff --git a/test/Transforms/AddDiscriminators/call.ll b/test/Transforms/AddDiscriminators/call.ll
index b123b25f2af2..6844a9851ec5 100644
--- a/test/Transforms/AddDiscriminators/call.ll
+++ b/test/Transforms/AddDiscriminators/call.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for calls that are defined in one line:
; #1 void bar();
@@ -8,7 +9,7 @@
; #5 }
; Function Attrs: uwtable
-define void @_Z3foov() #0 {
+define void @_Z3foov() #0 !dbg !4 {
call void @_Z3barv(), !dbg !10
; CHECK: call void @_Z3barv(), !dbg ![[CALL0:[0-9]+]]
call void @_Z3barv(), !dbg !11
@@ -23,19 +24,14 @@ declare void @_Z3barv() #1
attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
-; We should be able to add discriminators even in the absence of llvm.dbg.cu.
-; When using sample profiles, the front end will generate line tables but it
-; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
-; to the final binary.
-; !llvm.dbg.cu = !{!0}
+!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915) (llvm/trunk 251830)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "c.cc", directory: "/tmp")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
index 5e90d32a62eb..bc2328f4a42a 100644
--- a/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
+++ b/test/Transforms/AddDiscriminators/dbg-declare-discriminator.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -add-discriminators < %s | FileCheck %s
+; RUN: opt -S -passes=add-discriminators < %s | FileCheck %s
declare void @llvm.dbg.declare(metadata, metadata, metadata)
@@ -15,16 +16,18 @@ define void @test_valid_metadata() {
}
!llvm.module.flags = !{!0, !1}
+!llvm.dbg.cu = !{!12}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !DILocalVariable(scope: !3)
-!3 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!3 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
!4 = !DIFile(filename: "a.cpp", directory: "/tmp")
!5 = !DIExpression()
!6 = !DILocation(line: 0, scope: !3, inlinedAt: !7)
!7 = distinct !DILocation(line: 0, scope: !8)
-!8 = distinct !DISubprogram(linkageName: "test_valid_metadata", scope: null, isLocal: false, isDefinition: true, isOptimized: false)
+!8 = distinct !DISubprogram(linkageName: "test_valid_metadata", scope: null, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
!9 = !DILocalVariable(scope: !10)
-!10 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false)
+!10 = distinct !DISubprogram(scope: null, file: !4, isLocal: false, isDefinition: true, isOptimized: false, unit: !12)
!11 = !DILocation(line: 0, scope: !10)
+!12 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4)
diff --git a/test/Transforms/AddDiscriminators/diamond.ll b/test/Transforms/AddDiscriminators/diamond.ll
index 2ca638a83ec3..b3afe7285472 100644
--- a/test/Transforms/AddDiscriminators/diamond.ll
+++ b/test/Transforms/AddDiscriminators/diamond.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for diamond-shaped CFG.:
; #1 void bar(int);
@@ -46,11 +47,10 @@ attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253273)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253273)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "a.cc", directory: "/tmp")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/AddDiscriminators/first-only.ll b/test/Transforms/AddDiscriminators/first-only.ll
index 20d88b55e96e..1bd8dae5d05c 100644
--- a/test/Transforms/AddDiscriminators/first-only.ll
+++ b/test/Transforms/AddDiscriminators/first-only.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Test that the only instructions that receive a new discriminator in
; the block 'if.then' are those that share the same line number as
@@ -46,19 +47,14 @@ if.end: ; preds = %if.then, %entry
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-; We should be able to add discriminators even in the absence of llvm.dbg.cu.
-; When using sample profiles, the front end will generate line tables but it
-; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
-; to the final binary.
-; !llvm.dbg.cu = !{!0}
+!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "first-only.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "first-only.c", directory: ".")
!6 = !DISubroutineType(types: !{null})
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/AddDiscriminators/multiple.ll b/test/Transforms/AddDiscriminators/multiple.ll
index 9a05fcd86864..387689caddff 100644
--- a/test/Transforms/AddDiscriminators/multiple.ll
+++ b/test/Transforms/AddDiscriminators/multiple.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for multiple CFG paths on the same line.
;
@@ -47,19 +48,14 @@ if.end: ; preds = %if.else, %if.then
attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-; We should be able to add discriminators even in the absence of llvm.dbg.cu.
-; When using sample profiles, the front end will generate line tables but it
-; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
-; to the final binary.
-; !llvm.dbg.cu = !{!0}
+!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 (trunk 199750) (llvm/trunk 199751)", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "multiple.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "multiple.c", directory: ".")
!6 = !DISubroutineType(types: !{null, !13})
!13 = !DIBasicType(encoding: DW_ATE_signed, name: "int", size: 32, align: 32)
diff --git a/test/Transforms/AddDiscriminators/no-discriminators.ll b/test/Transforms/AddDiscriminators/no-discriminators.ll
index bbba9dc62c4e..5fd8b0fa5a23 100644
--- a/test/Transforms/AddDiscriminators/no-discriminators.ll
+++ b/test/Transforms/AddDiscriminators/no-discriminators.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; We should not generate discriminators for DWARF versions prior to 4.
;
@@ -52,11 +53,11 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+; CHECK: !{i32 2, !"Dwarf Version", i32 2}
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "no-discriminators", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
; CHECK: ![[FOO:[0-9]+]] = distinct !DISubprogram(name: "foo"
!5 = !DIFile(filename: "no-discriminators", directory: ".")
!6 = !DISubroutineType(types: !7)
@@ -64,7 +65,6 @@ attributes #1 = { nounwind readnone }
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
!10 = !{i32 2, !"Dwarf Version", i32 2}
-; CHECK: !{i32 2, !"Dwarf Version", i32 2}
!11 = !{i32 1, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.5.0 "}
!13 = !DILocalVariable(name: "i", line: 1, arg: 1, scope: !4, file: !5, type: !9)
diff --git a/test/Transforms/AddDiscriminators/oneline.ll b/test/Transforms/AddDiscriminators/oneline.ll
index ebee3935dd66..72d7a6eadbf9 100644
--- a/test/Transforms/AddDiscriminators/oneline.ll
+++ b/test/Transforms/AddDiscriminators/oneline.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -add-discriminators -S | FileCheck %s
+; RUN: opt < %s -passes=add-discriminators -S | FileCheck %s
; Discriminator support for code that is written in one line:
; #1 int foo(int i) {
@@ -54,19 +55,14 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
attributes #1 = { nounwind readnone }
-; We should be able to add discriminators even in the absence of llvm.dbg.cu.
-; When using sample profiles, the front end will generate line tables but it
-; does not generate llvm.dbg.cu to prevent codegen from emitting debug info
-; to the final binary.
-; !llvm.dbg.cu = !{!0}
+!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250915)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "a.cc", directory: "/usr/local/google/home/dehao/discr")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !8)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/AlignmentFromAssumptions/simple.ll b/test/Transforms/AlignmentFromAssumptions/simple.ll
index 851e6dc3ccc6..b91722839c60 100644
--- a/test/Transforms/AlignmentFromAssumptions/simple.ll
+++ b/test/Transforms/AlignmentFromAssumptions/simple.ll
@@ -1,5 +1,6 @@
target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
entry:
diff --git a/test/Transforms/AlignmentFromAssumptions/simple32.ll b/test/Transforms/AlignmentFromAssumptions/simple32.ll
index 2edc2e95f41a..e474bd33c317 100644
--- a/test/Transforms/AlignmentFromAssumptions/simple32.ll
+++ b/test/Transforms/AlignmentFromAssumptions/simple32.ll
@@ -1,5 +1,6 @@
target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-n32-S64"
; RUN: opt < %s -alignment-from-assumptions -S | FileCheck %s
+; RUN: opt < %s -passes=alignment-from-assumptions -S | FileCheck %s
define i32 @foo(i32* nocapture %a) nounwind uwtable readonly {
entry:
diff --git a/test/Transforms/AlignmentFromAssumptions/start-unk.ll b/test/Transforms/AlignmentFromAssumptions/start-unk.ll
index 99533cf6ccb9..9357734a350a 100644
--- a/test/Transforms/AlignmentFromAssumptions/start-unk.ll
+++ b/test/Transforms/AlignmentFromAssumptions/start-unk.ll
@@ -1,4 +1,5 @@
; RUN: opt -alignment-from-assumptions -S < %s | FileCheck %s
+; RUN: opt -passes=alignment-from-assumptions -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/ArgumentPromotion/dbg.ll b/test/Transforms/ArgumentPromotion/dbg.ll
index dbdccacf42ba..3d353db105fd 100644
--- a/test/Transforms/ArgumentPromotion/dbg.ll
+++ b/test/Transforms/ArgumentPromotion/dbg.ll
@@ -23,7 +23,6 @@ define void @caller(i32** %Y) {
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !DILocation(line: 8, scope: !2)
-!2 = distinct !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, scope: null)
-!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: 2, file: !5, subprograms: !4)
-!4 = !{!2}
+!2 = distinct !DISubprogram(name: "test", line: 3, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !3, scopeLine: 3, scope: null)
+!3 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !5)
!5 = !DIFile(filename: "test.c", directory: "")
diff --git a/test/Transforms/ArgumentPromotion/inalloca.ll b/test/Transforms/ArgumentPromotion/inalloca.ll
index 80bd6fdbc406..5bf57c8ff465 100644
--- a/test/Transforms/ArgumentPromotion/inalloca.ll
+++ b/test/Transforms/ArgumentPromotion/inalloca.ll
@@ -1,10 +1,10 @@
-; RUN: opt %s -argpromotion -scalarrepl -S | FileCheck %s
+; RUN: opt %s -argpromotion -sroa -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.ss = type { i32, i32 }
-; Argpromote + scalarrepl should change this to passing the two integers by value.
+; Argpromote + sroa should change this to passing the two integers by value.
define internal i32 @f(%struct.ss* inalloca %s) {
entry:
%f0 = getelementptr %struct.ss, %struct.ss* %s, i32 0, i32 0
diff --git a/test/Transforms/ArgumentPromotion/pr27568.ll b/test/Transforms/ArgumentPromotion/pr27568.ll
new file mode 100644
index 000000000000..648317aee0da
--- /dev/null
+++ b/test/Transforms/ArgumentPromotion/pr27568.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -argpromotion < %s | FileCheck %s
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @callee(i8*) {
+entry:
+ call void @thunk()
+ ret void
+}
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @thunk()
+ to label %out unwind label %cpad
+
+out:
+ ret void
+
+cpad:
+ %pad = cleanuppad within none []
+ call void @callee(i8* null) [ "funclet"(token %pad) ]
+ cleanupret from %pad unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[pad:.*]] = cleanuppad within none []
+; CHECK-NEXT: call void @callee() [ "funclet"(token %[[pad]]) ]
+; CHECK-NEXT: cleanupret from %[[pad]] unwind to caller
+
+declare void @thunk()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
index 4647e8fd6d9e..5e84460b9c0c 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v7.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand %s | FileCheck %s
+; RUN: opt -S -o - -mtriple=armv7-apple-ios7.0 -atomic-expand -codegen-opt-level=1 %s | FileCheck %s
define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
; CHECK-LABEL: @test_atomic_xchg_i8
@@ -222,26 +222,37 @@ define i8 @test_atomic_umin_i8(i8* %ptr, i8 %uminend) {
define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK-LABEL: @test_cmpxchg_i8_seqcst_seqcst
-; CHECK: call void @llvm.arm.dmb(i32 11)
-; CHECK: br label %[[LOOP:.*]]
+; CHECK: br label %[[START:.*]]
-; CHECK: [[LOOP]]:
+; CHECK: [[START]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
-; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i8
+; CHECK: [[OLDVAL:%.*]] = trunc i32 [[OLDVAL32]] to i8
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i8 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-; CHECK: [[TRY_STORE]]:
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[LOADED_LOOP:%.*]] = phi i8 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i8 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i8(i32 [[NEWVAL32]], i8* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD]]
+
+; CHECK: [[RELEASED_LOAD]]:
+; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i8(i8* %ptr)
+; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i8
+; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i8 [[OLDVAL_LOOP]], %desired
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i8 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
@@ -251,7 +262,8 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i8 [[OLDVAL]]
+; CHECK: [[LOADED:%.*]] = phi i8 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
+; CHECK: ret i8 [[LOADED]]
%pairold = cmpxchg i8* %ptr, i8 %desired, i8 %newval seq_cst seq_cst
%old = extractvalue { i8, i1 } %pairold, 0
@@ -260,26 +272,37 @@ define i8 @test_cmpxchg_i8_seqcst_seqcst(i8* %ptr, i8 %desired, i8 %newval) {
define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newval) {
; CHECK-LABEL: @test_cmpxchg_i16_seqcst_monotonic
-; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[LOOP:.*]]
; CHECK: [[LOOP]]:
; CHECK: [[OLDVAL32:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
; CHECK: [[OLDVAL:%.*]] = trunc i32 %1 to i16
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i16 [[OLDVAL]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
-; CHECK: [[TRY_STORE]]:
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[LOOP:.*]]
+
+; CHECK: [[LOOP]]:
+; CHECK: [[LOADED_LOOP:%.*]] = phi i16 [ [[OLDVAL]], %[[FENCED_STORE]] ], [ [[OLDVAL_LOOP:%.*]], %[[RELEASED_LOAD:.*]] ]
; CHECK: [[NEWVAL32:%.*]] = zext i16 %newval to i32
; CHECK: [[TRYAGAIN:%.*]] = call i32 @llvm.arm.strex.p0i16(i32 [[NEWVAL32]], i16* %ptr)
; CHECK: [[TST:%.*]] = icmp eq i32 [[TRYAGAIN]], 0
-; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[LOOP]]
+; CHECK: br i1 [[TST]], label %[[SUCCESS_BB:.*]], label %[[RELEASED_LOAD:.*]]
+
+; CHECK: [[RELEASED_LOAD]]:
+; CHECK: [[OLDVAL32_LOOP:%.*]] = call i32 @llvm.arm.ldrex.p0i16(i16* %ptr)
+; CHECK: [[OLDVAL_LOOP]] = trunc i32 [[OLDVAL32_LOOP]] to i16
+; CHECK: [[SHOULD_STORE_LOOP:%.*]] = icmp eq i16 [[OLDVAL_LOOP]], %desired
+; CHECK: br i1 [[SHOULD_STORE_LOOP]], label %[[LOOP]], label %[[NO_STORE_BB]]
; CHECK: [[SUCCESS_BB]]:
; CHECK: call void @llvm.arm.dmb(i32 11)
; CHECK: br label %[[DONE:.*]]
; CHECK: [[NO_STORE_BB]]:
+; CHECK-NEXT: [[LOADED_NO_STORE:%.*]] = phi i16 [ [[OLDVAL]], %[[START]] ], [ [[OLDVAL_LOOP]], %[[RELEASED_LOAD]] ]
; CHECK-NEXT: call void @llvm.arm.clrex()
; CHECK-NEXT: br label %[[FAILURE_BB:.*]]
@@ -289,7 +312,8 @@ define i16 @test_cmpxchg_i16_seqcst_monotonic(i16* %ptr, i16 %desired, i16 %newv
; CHECK: [[DONE]]:
; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
-; CHECK: ret i16 [[OLDVAL]]
+; CHECK: [[LOADED:%.*]] = phi i16 [ [[LOADED_LOOP]], %[[SUCCESS_BB]] ], [ [[LOADED_NO_STORE]], %[[FAILURE_BB]] ]
+; CHECK: ret i16 [[LOADED]]
%pairold = cmpxchg i16* %ptr, i16 %desired, i16 %newval seq_cst monotonic
%old = extractvalue { i16, i1 } %pairold, 0
@@ -378,3 +402,39 @@ define i64 @test_cmpxchg_i64_monotonic_monotonic(i64* %ptr, i64 %desired, i64 %n
%old = extractvalue { i64, i1 } %pairold, 0
ret i64 %old
}
+
+define i32 @test_cmpxchg_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
+; CHECK-LABEL: @test_cmpxchg_minsize
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[START]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END:.*]]
+
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[LOADED]]
+
+ %pair = cmpxchg i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+ %oldval = extractvalue { i32, i1 } %pair, 0
+ ret i32 %oldval
+}
diff --git a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
index 7bb6ffed397d..8397182e7e8f 100644
--- a/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
+++ b/test/Transforms/AtomicExpand/ARM/atomic-expansion-v8.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s | FileCheck %s
+; RUN: opt -S -o - -mtriple=armv8-linux-gnueabihf -atomic-expand %s -codegen-opt-level=1 | FileCheck %s
define i8 @test_atomic_xchg_i8(i8* %ptr, i8 %xchgend) {
; CHECK-LABEL: @test_atomic_xchg_i8
diff --git a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
index f9aa524fac98..375b41a26dbd 100644
--- a/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
+++ b/test/Transforms/AtomicExpand/ARM/cmpxchg-weak.ll
@@ -1,15 +1,18 @@
-; RUN: opt -atomic-expand -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
+; RUN: opt -atomic-expand -codegen-opt-level=1 -S -mtriple=thumbv7s-apple-ios7.0 %s | FileCheck %s
define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
; CHECK-LABEL: @test_cmpxchg_seq_cst
; Intrinsic for "dmb ishst" is then expected
-; CHECK: call void @llvm.arm.dmb(i32 10)
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 10)
+; CHECK: br label %[[TRY_STORE:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -39,13 +42,16 @@ define i32 @test_cmpxchg_seq_cst(i32* %addr, i32 %desired, i32 %new) {
define i1 @test_cmpxchg_weak_fail(i32* %addr, i32 %desired, i32 %new) {
; CHECK-LABEL: @test_cmpxchg_weak_fail
-; CHECK: call void @llvm.arm.dmb(i32 10)
; CHECK: br label %[[START:.*]]
; CHECK: [[START]]:
; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
-; CHECK: br i1 [[SHOULD_STORE]], label %[[TRY_STORE:.*]], label %[[NO_STORE_BB:.*]]
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 10)
+; CHECK: br label %[[TRY_STORE:.*]]
; CHECK: [[TRY_STORE]]:
; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
@@ -108,3 +114,42 @@ define i32 @test_cmpxchg_monotonic(i32* %addr, i32 %desired, i32 %new) {
%oldval = extractvalue { i32, i1 } %pair, 0
ret i32 %oldval
}
+
+define i32 @test_cmpxchg_seq_cst_minsize(i32* %addr, i32 %desired, i32 %new) minsize {
+; CHECK-LABEL: @test_cmpxchg_seq_cst_minsize
+; CHECK: br label %[[START:.*]]
+
+; CHECK: [[START]]:
+; CHECK: [[LOADED:%.*]] = call i32 @llvm.arm.ldrex.p0i32(i32* %addr)
+; CHECK: [[SHOULD_STORE:%.*]] = icmp eq i32 [[LOADED]], %desired
+; CHECK: br i1 [[SHOULD_STORE]], label %[[FENCED_STORE:.*]], label %[[NO_STORE_BB:.*]]
+
+; CHECK: [[FENCED_STORE]]:
+; CHECK: call void @llvm.arm.dmb(i32 10)
+; CHECK: br label %[[TRY_STORE:.*]]
+
+; CHECK: [[TRY_STORE]]:
+; CHECK: [[STREX:%.*]] = call i32 @llvm.arm.strex.p0i32(i32 %new, i32* %addr)
+; CHECK: [[SUCCESS:%.*]] = icmp eq i32 [[STREX]], 0
+; CHECK: br i1 [[SUCCESS]], label %[[SUCCESS_BB:.*]], label %[[FAILURE_BB:.*]]
+
+; CHECK: [[SUCCESS_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END:.*]]
+
+; CHECK: [[NO_STORE_BB]]:
+; CHECK: call void @llvm.arm.clrex()
+; CHECK: br label %[[FAILURE_BB]]
+
+; CHECK: [[FAILURE_BB]]:
+; CHECK: call void @llvm.arm.dmb(i32 11)
+; CHECK: br label %[[END]]
+
+; CHECK: [[END]]:
+; CHECK: [[SUCCESS:%.*]] = phi i1 [ true, %[[SUCCESS_BB]] ], [ false, %[[FAILURE_BB]] ]
+; CHECK: ret i32 [[LOADED]]
+
+ %pair = cmpxchg weak i32* %addr, i32 %desired, i32 %new seq_cst seq_cst
+ %oldval = extractvalue { i32, i1 } %pair, 0
+ ret i32 %oldval
+}
diff --git a/test/Transforms/AtomicExpand/SPARC/libcalls.ll b/test/Transforms/AtomicExpand/SPARC/libcalls.ll
new file mode 100644
index 000000000000..afab7a39b278
--- /dev/null
+++ b/test/Transforms/AtomicExpand/SPARC/libcalls.ll
@@ -0,0 +1,257 @@
+; RUN: opt -S %s -atomic-expand | FileCheck %s
+
+;;; NOTE: this test is actually target-independent -- any target which
+;;; doesn't support inline atomics can be used. (E.g. X86 i386 would
+;;; work, if LLVM is properly taught about what it's missing vs i586.)
+
+;target datalayout = "e-m:e-p:32:32-f64:32:64-f80:32-n8:16:32-S128"
+;target triple = "i386-unknown-unknown"
+target datalayout = "e-m:e-p:32:32-i64:64-f128:64-n32-S64"
+target triple = "sparc-unknown-unknown"
+
+;; First, check the sized calls. Except for cmpxchg, these are fairly
+;; straightforward.
+
+; CHECK-LABEL: @test_load_i16(
+; CHECK: %1 = bitcast i16* %arg to i8*
+; CHECK: %2 = call i16 @__atomic_load_2(i8* %1, i32 5)
+; CHECK: ret i16 %2
+define i16 @test_load_i16(i16* %arg) {
+ %ret = load atomic i16, i16* %arg seq_cst, align 4
+ ret i16 %ret
+}
+
+; CHECK-LABEL: @test_store_i16(
+; CHECK: %1 = bitcast i16* %arg to i8*
+; CHECK: call void @__atomic_store_2(i8* %1, i16 %val, i32 5)
+; CHECK: ret void
+define void @test_store_i16(i16* %arg, i16 %val) {
+ store atomic i16 %val, i16* %arg seq_cst, align 4
+ ret void
+}
+
+; CHECK-LABEL: @test_exchange_i16(
+; CHECK: %1 = bitcast i16* %arg to i8*
+; CHECK: %2 = call i16 @__atomic_exchange_2(i8* %1, i16 %val, i32 5)
+; CHECK: ret i16 %2
+define i16 @test_exchange_i16(i16* %arg, i16 %val) {
+ %ret = atomicrmw xchg i16* %arg, i16 %val seq_cst
+ ret i16 %ret
+}
+
+; CHECK-LABEL: @test_cmpxchg_i16(
+; CHECK: %1 = bitcast i16* %arg to i8*
+; CHECK: %2 = alloca i16, align 2
+; CHECK: %3 = bitcast i16* %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 2, i8* %3)
+; CHECK: store i16 %old, i16* %2, align 2
+; CHECK: %4 = call zeroext i1 @__atomic_compare_exchange_2(i8* %1, i8* %3, i16 %new, i32 5, i32 0)
+; CHECK: %5 = load i16, i16* %2, align 2
+; CHECK: call void @llvm.lifetime.end(i64 2, i8* %3)
+; CHECK: %6 = insertvalue { i16, i1 } undef, i16 %5, 0
+; CHECK: %7 = insertvalue { i16, i1 } %6, i1 %4, 1
+; CHECK: %ret = extractvalue { i16, i1 } %7, 0
+; CHECK: ret i16 %ret
+define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) {
+ %ret_succ = cmpxchg i16* %arg, i16 %old, i16 %new seq_cst monotonic
+ %ret = extractvalue { i16, i1 } %ret_succ, 0
+ ret i16 %ret
+}
+
+; CHECK-LABEL: @test_add_i16(
+; CHECK: %1 = bitcast i16* %arg to i8*
+; CHECK: %2 = call i16 @__atomic_fetch_add_2(i8* %1, i16 %val, i32 5)
+; CHECK: ret i16 %2
+define i16 @test_add_i16(i16* %arg, i16 %val) {
+ %ret = atomicrmw add i16* %arg, i16 %val seq_cst
+ ret i16 %ret
+}
+
+
+;; Now, check the output for the unsized libcalls. i128 is used for
+;; these tests because the "16" suffixed functions aren't available on
+;; 32-bit i386.
+
+; CHECK-LABEL: @test_load_i128(
+; CHECK: %1 = bitcast i128* %arg to i8*
+; CHECK: %2 = alloca i128, align 8
+; CHECK: %3 = bitcast i128* %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
+; CHECK: call void @__atomic_load(i32 16, i8* %1, i8* %3, i32 5)
+; CHECK: %4 = load i128, i128* %2, align 8
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
+; CHECK: ret i128 %4
+define i128 @test_load_i128(i128* %arg) {
+ %ret = load atomic i128, i128* %arg seq_cst, align 16
+ ret i128 %ret
+}
+
+; CHECK-LABEL @test_store_i128(
+; CHECK: %1 = bitcast i128* %arg to i8*
+; CHECK: %2 = alloca i128, align 8
+; CHECK: %3 = bitcast i128* %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
+; CHECK: store i128 %val, i128* %2, align 8
+; CHECK: call void @__atomic_store(i32 16, i8* %1, i8* %3, i32 5)
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
+; CHECK: ret void
+define void @test_store_i128(i128* %arg, i128 %val) {
+ store atomic i128 %val, i128* %arg seq_cst, align 16
+ ret void
+}
+
+; CHECK-LABEL: @test_exchange_i128(
+; CHECK: %1 = bitcast i128* %arg to i8*
+; CHECK: %2 = alloca i128, align 8
+; CHECK: %3 = bitcast i128* %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
+; CHECK: store i128 %val, i128* %2, align 8
+; CHECK: %4 = alloca i128, align 8
+; CHECK: %5 = bitcast i128* %4 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %5)
+; CHECK: call void @__atomic_exchange(i32 16, i8* %1, i8* %3, i8* %5, i32 5)
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
+; CHECK: %6 = load i128, i128* %4, align 8
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %5)
+; CHECK: ret i128 %6
+define i128 @test_exchange_i128(i128* %arg, i128 %val) {
+ %ret = atomicrmw xchg i128* %arg, i128 %val seq_cst
+ ret i128 %ret
+}
+
+; CHECK-LABEL: @test_cmpxchg_i128(
+; CHECK: %1 = bitcast i128* %arg to i8*
+; CHECK: %2 = alloca i128, align 8
+; CHECK: %3 = bitcast i128* %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
+; CHECK: store i128 %old, i128* %2, align 8
+; CHECK: %4 = alloca i128, align 8
+; CHECK: %5 = bitcast i128* %4 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %5)
+; CHECK: store i128 %new, i128* %4, align 8
+; CHECK: %6 = call zeroext i1 @__atomic_compare_exchange(i32 16, i8* %1, i8* %3, i8* %5, i32 5, i32 0)
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %5)
+; CHECK: %7 = load i128, i128* %2, align 8
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
+; CHECK: %8 = insertvalue { i128, i1 } undef, i128 %7, 0
+; CHECK: %9 = insertvalue { i128, i1 } %8, i1 %6, 1
+; CHECK: %ret = extractvalue { i128, i1 } %9, 0
+; CHECK: ret i128 %ret
+define i128 @test_cmpxchg_i128(i128* %arg, i128 %old, i128 %new) {
+ %ret_succ = cmpxchg i128* %arg, i128 %old, i128 %new seq_cst monotonic
+ %ret = extractvalue { i128, i1 } %ret_succ, 0
+ ret i128 %ret
+}
+
+; This one is a verbose expansion, as there is no generic
+; __atomic_fetch_add function, so it needs to expand to a cmpxchg
+; loop, which then itself expands into a libcall.
+
+; CHECK-LABEL: @test_add_i128(
+; CHECK: %1 = alloca i128, align 8
+; CHECK: %2 = alloca i128, align 8
+; CHECK: %3 = load i128, i128* %arg, align 16
+; CHECK: br label %atomicrmw.start
+; CHECK:atomicrmw.start:
+; CHECK: %loaded = phi i128 [ %3, %0 ], [ %newloaded, %atomicrmw.start ]
+; CHECK: %new = add i128 %loaded, %val
+; CHECK: %4 = bitcast i128* %arg to i8*
+; CHECK: %5 = bitcast i128* %1 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %5)
+; CHECK: store i128 %loaded, i128* %1, align 8
+; CHECK: %6 = bitcast i128* %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %6)
+; CHECK: store i128 %new, i128* %2, align 8
+; CHECK: %7 = call zeroext i1 @__atomic_compare_exchange(i32 16, i8* %4, i8* %5, i8* %6, i32 5, i32 5)
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %6)
+; CHECK: %8 = load i128, i128* %1, align 8
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %5)
+; CHECK: %9 = insertvalue { i128, i1 } undef, i128 %8, 0
+; CHECK: %10 = insertvalue { i128, i1 } %9, i1 %7, 1
+; CHECK: %success = extractvalue { i128, i1 } %10, 1
+; CHECK: %newloaded = extractvalue { i128, i1 } %10, 0
+; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start
+; CHECK:atomicrmw.end:
+; CHECK: ret i128 %newloaded
+define i128 @test_add_i128(i128* %arg, i128 %val) {
+ %ret = atomicrmw add i128* %arg, i128 %val seq_cst
+ ret i128 %ret
+}
+
+;; Ensure that non-integer types get bitcast correctly on the way in and out of a libcall:
+
+; CHECK-LABEL: @test_load_double(
+; CHECK: %1 = bitcast double* %arg to i8*
+; CHECK: %2 = call i64 @__atomic_load_8(i8* %1, i32 5)
+; CHECK: %3 = bitcast i64 %2 to double
+; CHECK: ret double %3
+define double @test_load_double(double* %arg, double %val) {
+ %1 = load atomic double, double* %arg seq_cst, align 16
+ ret double %1
+}
+
+; CHECK-LABEL: @test_store_double(
+; CHECK: %1 = bitcast double* %arg to i8*
+; CHECK: %2 = bitcast double %val to i64
+; CHECK: call void @__atomic_store_8(i8* %1, i64 %2, i32 5)
+; CHECK: ret void
+define void @test_store_double(double* %arg, double %val) {
+ store atomic double %val, double* %arg seq_cst, align 16
+ ret void
+}
+
+; CHECK-LABEL: @test_cmpxchg_ptr(
+; CHECK: %1 = bitcast i16** %arg to i8*
+; CHECK: %2 = alloca i16*, align 4
+; CHECK: %3 = bitcast i16** %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 4, i8* %3)
+; CHECK: store i16* %old, i16** %2, align 4
+; CHECK: %4 = ptrtoint i16* %new to i32
+; CHECK: %5 = call zeroext i1 @__atomic_compare_exchange_4(i8* %1, i8* %3, i32 %4, i32 5, i32 2)
+; CHECK: %6 = load i16*, i16** %2, align 4
+; CHECK: call void @llvm.lifetime.end(i64 4, i8* %3)
+; CHECK: %7 = insertvalue { i16*, i1 } undef, i16* %6, 0
+; CHECK: %8 = insertvalue { i16*, i1 } %7, i1 %5, 1
+; CHECK: %ret = extractvalue { i16*, i1 } %8, 0
+; CHECK: ret i16* %ret
+; CHECK: }
+define i16* @test_cmpxchg_ptr(i16** %arg, i16* %old, i16* %new) {
+ %ret_succ = cmpxchg i16** %arg, i16* %old, i16* %new seq_cst acquire
+ %ret = extractvalue { i16*, i1 } %ret_succ, 0
+ ret i16* %ret
+}
+
+;; ...and for a non-integer type of large size too.
+
+; CHECK-LABEL: @test_store_fp128
+; CHECK: %1 = bitcast fp128* %arg to i8*
+; CHECK: %2 = alloca fp128, align 8
+; CHECK: %3 = bitcast fp128* %2 to i8*
+; CHECK: call void @llvm.lifetime.start(i64 16, i8* %3)
+; CHECK: store fp128 %val, fp128* %2, align 8
+; CHECK: call void @__atomic_store(i32 16, i8* %1, i8* %3, i32 5)
+; CHECK: call void @llvm.lifetime.end(i64 16, i8* %3)
+; CHECK: ret void
+define void @test_store_fp128(fp128* %arg, fp128 %val) {
+ store atomic fp128 %val, fp128* %arg seq_cst, align 16
+ ret void
+}
+
+;; Unaligned loads and stores should be expanded to the generic
+;; libcall, just like large loads/stores, and not a specialized one.
+;; NOTE: atomicrmw and cmpxchg don't yet support an align attribute;
+;; when such support is added, they should also be tested here.
+
+; CHECK-LABEL: @test_unaligned_load_i16(
+; CHECK: __atomic_load(
+define i16 @test_unaligned_load_i16(i16* %arg) {
+ %ret = load atomic i16, i16* %arg seq_cst, align 1
+ ret i16 %ret
+}
+
+; CHECK-LABEL: @test_unaligned_store_i16(
+; CHECK: __atomic_store(
+define void @test_unaligned_store_i16(i16* %arg, i16 %val) {
+ store atomic i16 %val, i16* %arg seq_cst, align 1
+ ret void
+}
diff --git a/test/Transforms/AtomicExpand/SPARC/lit.local.cfg b/test/Transforms/AtomicExpand/SPARC/lit.local.cfg
new file mode 100644
index 000000000000..9a34b6578154
--- /dev/null
+++ b/test/Transforms/AtomicExpand/SPARC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'Sparc' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/AtomicExpand/SPARC/partword.ll b/test/Transforms/AtomicExpand/SPARC/partword.ll
new file mode 100644
index 000000000000..9963d17c2424
--- /dev/null
+++ b/test/Transforms/AtomicExpand/SPARC/partword.ll
@@ -0,0 +1,166 @@
+; RUN: opt -S %s -atomic-expand | FileCheck %s
+
+;; Verify the cmpxchg and atomicrmw expansions where sub-word-size
+;; instructions are not available.
+
+;;; NOTE: this test is mostly target-independent -- any target which
+;;; doesn't support cmpxchg of sub-word sizes would do.
+target datalayout = "E-m:e-i64:64-n32:64-S128"
+target triple = "sparcv9-unknown-unknown"
+
+; CHECK-LABEL: @test_cmpxchg_i8(
+; CHECK: fence seq_cst
+; CHECK: %0 = ptrtoint i8* %arg to i64
+; CHECK: %1 = and i64 %0, -4
+; CHECK: %AlignedAddr = inttoptr i64 %1 to i32*
+; CHECK: %PtrLSB = and i64 %0, 3
+; CHECK: %2 = xor i64 %PtrLSB, 3
+; CHECK: %3 = shl i64 %2, 3
+; CHECK: %ShiftAmt = trunc i64 %3 to i32
+; CHECK: %Mask = shl i32 255, %ShiftAmt
+; CHECK: %Inv_Mask = xor i32 %Mask, -1
+; CHECK: %4 = zext i8 %new to i32
+; CHECK: %5 = shl i32 %4, %ShiftAmt
+; CHECK: %6 = zext i8 %old to i32
+; CHECK: %7 = shl i32 %6, %ShiftAmt
+; CHECK: %8 = load i32, i32* %AlignedAddr
+; CHECK: %9 = and i32 %8, %Inv_Mask
+; CHECK: br label %partword.cmpxchg.loop
+; CHECK:partword.cmpxchg.loop:
+; CHECK: %10 = phi i32 [ %9, %entry ], [ %16, %partword.cmpxchg.failure ]
+; CHECK: %11 = or i32 %10, %5
+; CHECK: %12 = or i32 %10, %7
+; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %12, i32 %11 monotonic monotonic
+; CHECK: %14 = extractvalue { i32, i1 } %13, 0
+; CHECK: %15 = extractvalue { i32, i1 } %13, 1
+; CHECK: br i1 %15, label %partword.cmpxchg.end, label %partword.cmpxchg.failure
+; CHECK:partword.cmpxchg.failure:
+; CHECK: %16 = and i32 %14, %Inv_Mask
+; CHECK: %17 = icmp ne i32 %10, %16
+; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end
+; CHECK:partword.cmpxchg.end:
+; CHECK: %18 = lshr i32 %14, %ShiftAmt
+; CHECK: %19 = trunc i32 %18 to i8
+; CHECK: %20 = insertvalue { i8, i1 } undef, i8 %19, 0
+; CHECK: %21 = insertvalue { i8, i1 } %20, i1 %15, 1
+; CHECK: fence seq_cst
+; CHECK: %ret = extractvalue { i8, i1 } %21, 0
+; CHECK: ret i8 %ret
+define i8 @test_cmpxchg_i8(i8* %arg, i8 %old, i8 %new) {
+entry:
+ %ret_succ = cmpxchg i8* %arg, i8 %old, i8 %new seq_cst monotonic
+ %ret = extractvalue { i8, i1 } %ret_succ, 0
+ ret i8 %ret
+}
+
+; CHECK-LABEL: @test_cmpxchg_i16(
+; CHECK: fence seq_cst
+; CHECK: %0 = ptrtoint i16* %arg to i64
+; CHECK: %1 = and i64 %0, -4
+; CHECK: %AlignedAddr = inttoptr i64 %1 to i32*
+; CHECK: %PtrLSB = and i64 %0, 3
+; CHECK: %2 = xor i64 %PtrLSB, 2
+; CHECK: %3 = shl i64 %2, 3
+; CHECK: %ShiftAmt = trunc i64 %3 to i32
+; CHECK: %Mask = shl i32 65535, %ShiftAmt
+; CHECK: %Inv_Mask = xor i32 %Mask, -1
+; CHECK: %4 = zext i16 %new to i32
+; CHECK: %5 = shl i32 %4, %ShiftAmt
+; CHECK: %6 = zext i16 %old to i32
+; CHECK: %7 = shl i32 %6, %ShiftAmt
+; CHECK: %8 = load i32, i32* %AlignedAddr
+; CHECK: %9 = and i32 %8, %Inv_Mask
+; CHECK: br label %partword.cmpxchg.loop
+; CHECK:partword.cmpxchg.loop:
+; CHECK: %10 = phi i32 [ %9, %entry ], [ %16, %partword.cmpxchg.failure ]
+; CHECK: %11 = or i32 %10, %5
+; CHECK: %12 = or i32 %10, %7
+; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %12, i32 %11 monotonic monotonic
+; CHECK: %14 = extractvalue { i32, i1 } %13, 0
+; CHECK: %15 = extractvalue { i32, i1 } %13, 1
+; CHECK: br i1 %15, label %partword.cmpxchg.end, label %partword.cmpxchg.failure
+; CHECK:partword.cmpxchg.failure:
+; CHECK: %16 = and i32 %14, %Inv_Mask
+; CHECK: %17 = icmp ne i32 %10, %16
+; CHECK: br i1 %17, label %partword.cmpxchg.loop, label %partword.cmpxchg.end
+; CHECK:partword.cmpxchg.end:
+; CHECK: %18 = lshr i32 %14, %ShiftAmt
+; CHECK: %19 = trunc i32 %18 to i16
+; CHECK: %20 = insertvalue { i16, i1 } undef, i16 %19, 0
+; CHECK: %21 = insertvalue { i16, i1 } %20, i1 %15, 1
+; CHECK: fence seq_cst
+; CHECK: %ret = extractvalue { i16, i1 } %21, 0
+; CHECK: ret i16 %ret
+define i16 @test_cmpxchg_i16(i16* %arg, i16 %old, i16 %new) {
+entry:
+ %ret_succ = cmpxchg i16* %arg, i16 %old, i16 %new seq_cst monotonic
+ %ret = extractvalue { i16, i1 } %ret_succ, 0
+ ret i16 %ret
+}
+
+
+; CHECK-LABEL: @test_add_i16(
+; CHECK: fence seq_cst
+; CHECK: %0 = ptrtoint i16* %arg to i64
+; CHECK: %1 = and i64 %0, -4
+; CHECK: %AlignedAddr = inttoptr i64 %1 to i32*
+; CHECK: %PtrLSB = and i64 %0, 3
+; CHECK: %2 = xor i64 %PtrLSB, 2
+; CHECK: %3 = shl i64 %2, 3
+; CHECK: %ShiftAmt = trunc i64 %3 to i32
+; CHECK: %Mask = shl i32 65535, %ShiftAmt
+; CHECK: %Inv_Mask = xor i32 %Mask, -1
+; CHECK: %4 = zext i16 %val to i32
+; CHECK: %ValOperand_Shifted = shl i32 %4, %ShiftAmt
+; CHECK: %5 = load i32, i32* %AlignedAddr, align 4
+; CHECK: br label %atomicrmw.start
+; CHECK:atomicrmw.start:
+; CHECK: %loaded = phi i32 [ %5, %entry ], [ %newloaded, %atomicrmw.start ]
+; CHECK: %new = add i32 %loaded, %ValOperand_Shifted
+; CHECK: %6 = and i32 %new, %Mask
+; CHECK: %7 = and i32 %loaded, %Inv_Mask
+; CHECK: %8 = or i32 %7, %6
+; CHECK: %9 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %8 monotonic monotonic
+; CHECK: %success = extractvalue { i32, i1 } %9, 1
+; CHECK: %newloaded = extractvalue { i32, i1 } %9, 0
+; CHECK: br i1 %success, label %atomicrmw.end, label %atomicrmw.start
+; CHECK:atomicrmw.end:
+; CHECK: %10 = lshr i32 %newloaded, %ShiftAmt
+; CHECK: %11 = trunc i32 %10 to i16
+; CHECK: fence seq_cst
+; CHECK: ret i16 %11
+define i16 @test_add_i16(i16* %arg, i16 %val) {
+entry:
+ %ret = atomicrmw add i16* %arg, i16 %val seq_cst
+ ret i16 %ret
+}
+
+; CHECK-LABEL: @test_xor_i16(
+; (I'm going to just assert on the bits that differ from add, above.)
+; CHECK:atomicrmw.start:
+; CHECK: %new = xor i32 %loaded, %ValOperand_Shifted
+; CHECK: %6 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %new monotonic monotonic
+; CHECK:atomicrmw.end:
+define i16 @test_xor_i16(i16* %arg, i16 %val) {
+entry:
+ %ret = atomicrmw xor i16* %arg, i16 %val seq_cst
+ ret i16 %ret
+}
+
+; CHECK-LABEL: @test_min_i16(
+; CHECK:atomicrmw.start:
+; CHECK: %6 = lshr i32 %loaded, %ShiftAmt
+; CHECK: %7 = trunc i32 %6 to i16
+; CHECK: %8 = icmp sle i16 %7, %val
+; CHECK: %new = select i1 %8, i16 %7, i16 %val
+; CHECK: %9 = zext i16 %new to i32
+; CHECK: %10 = shl i32 %9, %ShiftAmt
+; CHECK: %11 = and i32 %loaded, %Inv_Mask
+; CHECK: %12 = or i32 %11, %10
+; CHECK: %13 = cmpxchg i32* %AlignedAddr, i32 %loaded, i32 %12 monotonic monotonic
+; CHECK:atomicrmw.end:
+define i16 @test_min_i16(i16* %arg, i16 %val) {
+entry:
+ %ret = atomicrmw min i16* %arg, i16 %val seq_cst
+ ret i16 %ret
+}
diff --git a/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
index 792fb1ec4f70..637d29cb6142 100644
--- a/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
+++ b/test/Transforms/AtomicExpand/X86/expand-atomic-non-integer.ll
@@ -80,3 +80,88 @@ define void @float_store_expand_addr1(float addrspace(1)* %ptr, float %v) {
ret void
}
+define void @pointer_cmpxchg_expand(i8** %ptr, i8* %v) {
+; CHECK-LABEL: @pointer_cmpxchg_expand
+; CHECK: %1 = bitcast i8** %ptr to i64*
+; CHECK: %2 = ptrtoint i8* %v to i64
+; CHECK: %3 = cmpxchg i64* %1, i64 0, i64 %2 seq_cst monotonic
+; CHECK: %4 = extractvalue { i64, i1 } %3, 0
+; CHECK: %5 = extractvalue { i64, i1 } %3, 1
+; CHECK: %6 = inttoptr i64 %4 to i8*
+; CHECK: %7 = insertvalue { i8*, i1 } undef, i8* %6, 0
+; CHECK: %8 = insertvalue { i8*, i1 } %7, i1 %5, 1
+ cmpxchg i8** %ptr, i8* null, i8* %v seq_cst monotonic
+ ret void
+}
+
+define void @pointer_cmpxchg_expand2(i8** %ptr, i8* %v) {
+; CHECK-LABEL: @pointer_cmpxchg_expand2
+; CHECK: %1 = bitcast i8** %ptr to i64*
+; CHECK: %2 = ptrtoint i8* %v to i64
+; CHECK: %3 = cmpxchg i64* %1, i64 0, i64 %2 release monotonic
+; CHECK: %4 = extractvalue { i64, i1 } %3, 0
+; CHECK: %5 = extractvalue { i64, i1 } %3, 1
+; CHECK: %6 = inttoptr i64 %4 to i8*
+; CHECK: %7 = insertvalue { i8*, i1 } undef, i8* %6, 0
+; CHECK: %8 = insertvalue { i8*, i1 } %7, i1 %5, 1
+ cmpxchg i8** %ptr, i8* null, i8* %v release monotonic
+ ret void
+}
+
+define void @pointer_cmpxchg_expand3(i8** %ptr, i8* %v) {
+; CHECK-LABEL: @pointer_cmpxchg_expand3
+; CHECK: %1 = bitcast i8** %ptr to i64*
+; CHECK: %2 = ptrtoint i8* %v to i64
+; CHECK: %3 = cmpxchg i64* %1, i64 0, i64 %2 seq_cst seq_cst
+; CHECK: %4 = extractvalue { i64, i1 } %3, 0
+; CHECK: %5 = extractvalue { i64, i1 } %3, 1
+; CHECK: %6 = inttoptr i64 %4 to i8*
+; CHECK: %7 = insertvalue { i8*, i1 } undef, i8* %6, 0
+; CHECK: %8 = insertvalue { i8*, i1 } %7, i1 %5, 1
+ cmpxchg i8** %ptr, i8* null, i8* %v seq_cst seq_cst
+ ret void
+}
+
+define void @pointer_cmpxchg_expand4(i8** %ptr, i8* %v) {
+; CHECK-LABEL: @pointer_cmpxchg_expand4
+; CHECK: %1 = bitcast i8** %ptr to i64*
+; CHECK: %2 = ptrtoint i8* %v to i64
+; CHECK: %3 = cmpxchg weak i64* %1, i64 0, i64 %2 seq_cst seq_cst
+; CHECK: %4 = extractvalue { i64, i1 } %3, 0
+; CHECK: %5 = extractvalue { i64, i1 } %3, 1
+; CHECK: %6 = inttoptr i64 %4 to i8*
+; CHECK: %7 = insertvalue { i8*, i1 } undef, i8* %6, 0
+; CHECK: %8 = insertvalue { i8*, i1 } %7, i1 %5, 1
+ cmpxchg weak i8** %ptr, i8* null, i8* %v seq_cst seq_cst
+ ret void
+}
+
+define void @pointer_cmpxchg_expand5(i8** %ptr, i8* %v) {
+; CHECK-LABEL: @pointer_cmpxchg_expand5
+; CHECK: %1 = bitcast i8** %ptr to i64*
+; CHECK: %2 = ptrtoint i8* %v to i64
+; CHECK: %3 = cmpxchg volatile i64* %1, i64 0, i64 %2 seq_cst seq_cst
+; CHECK: %4 = extractvalue { i64, i1 } %3, 0
+; CHECK: %5 = extractvalue { i64, i1 } %3, 1
+; CHECK: %6 = inttoptr i64 %4 to i8*
+; CHECK: %7 = insertvalue { i8*, i1 } undef, i8* %6, 0
+; CHECK: %8 = insertvalue { i8*, i1 } %7, i1 %5, 1
+ cmpxchg volatile i8** %ptr, i8* null, i8* %v seq_cst seq_cst
+ ret void
+}
+
+define void @pointer_cmpxchg_expand6(i8 addrspace(2)* addrspace(1)* %ptr,
+ i8 addrspace(2)* %v) {
+; CHECK-LABEL: @pointer_cmpxchg_expand6
+; CHECK: %1 = bitcast i8 addrspace(2)* addrspace(1)* %ptr to i64 addrspace(1)*
+; CHECK: %2 = ptrtoint i8 addrspace(2)* %v to i64
+; CHECK: %3 = cmpxchg i64 addrspace(1)* %1, i64 0, i64 %2 seq_cst seq_cst
+; CHECK: %4 = extractvalue { i64, i1 } %3, 0
+; CHECK: %5 = extractvalue { i64, i1 } %3, 1
+; CHECK: %6 = inttoptr i64 %4 to i8 addrspace(2)*
+; CHECK: %7 = insertvalue { i8 addrspace(2)*, i1 } undef, i8 addrspace(2)* %6, 0
+; CHECK: %8 = insertvalue { i8 addrspace(2)*, i1 } %7, i1 %5, 1
+ cmpxchg i8 addrspace(2)* addrspace(1)* %ptr, i8 addrspace(2)* null, i8 addrspace(2)* %v seq_cst seq_cst
+ ret void
+}
+
diff --git a/test/Transforms/BBVectorize/vector-sel.ll b/test/Transforms/BBVectorize/vector-sel.ll
new file mode 100644
index 000000000000..cb775ceae695
--- /dev/null
+++ b/test/Transforms/BBVectorize/vector-sel.ll
@@ -0,0 +1,33 @@
+; RUN: opt < %s -bb-vectorize -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@d = external global [1 x [10 x [1 x i16]]], align 16
+
+;CHECK-LABEL: @test
+;CHECK: %0 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
+;CHECK: %1 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
+;CHECK: %2 = shufflevector <4 x i16> %0, <4 x i16> %1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+;CHECK: %3 = shufflevector <4 x i1> %boolvec, <4 x i1> %boolvec, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+;CHECK: %4 = select <8 x i1> %3, <8 x i16> <i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3, i16 -3>, <8 x i16> %2
+define void @test() {
+entry:
+ %bool = icmp ne i32 undef, 0
+ %boolvec = icmp ne <4 x i32> undef, zeroinitializer
+ br label %body
+
+body:
+ %0 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
+ %1 = select i1 %bool, <4 x i16> <i16 -2, i16 -2, i16 -2, i16 -2>, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>
+ %2 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %0
+ %3 = select <4 x i1> %boolvec, <4 x i16> <i16 -3, i16 -3, i16 -3, i16 -3>, <4 x i16> %1
+ %4 = add nsw <4 x i16> %2, zeroinitializer
+ %5 = add nsw <4 x i16> %3, zeroinitializer
+ %6 = getelementptr inbounds [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 0
+ %7 = bitcast i16* %6 to <4 x i16>*
+ store <4 x i16> %4, <4 x i16>* %7, align 2
+ %8 = getelementptr [1 x [10 x [1 x i16]]], [1 x [10 x [1 x i16]]]* @d, i64 0, i64 0, i64 undef, i64 4
+ %9 = bitcast i16* %8 to <4 x i16>*
+ store <4 x i16> %5, <4 x i16>* %9, align 2
+ ret void
+}
diff --git a/test/Transforms/BDCE/dce-pure.ll b/test/Transforms/BDCE/dce-pure.ll
index a379fa4a0039..a487a04db611 100644
--- a/test/Transforms/BDCE/dce-pure.ll
+++ b/test/Transforms/BDCE/dce-pure.ll
@@ -1,4 +1,5 @@
; RUN: opt -bdce -S < %s | FileCheck %s
+; RUN: opt -passes=bdce -S < %s | FileCheck %s
declare i32 @strlen(i8*) readonly nounwind
diff --git a/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
index f6f898fae21b..6cec253bbf9b 100644
--- a/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
+++ b/test/Transforms/CodeGenPrepare/AMDGPU/no-sink-addrspacecast.ll
@@ -8,7 +8,7 @@
define void @test_sink_ptrtoint_asc(float addrspace(1)* nocapture %arg, float addrspace(1)* nocapture readonly %arg1, float addrspace(3)* %arg2) #0 {
bb:
%tmp = getelementptr inbounds float, float addrspace(3)* %arg2, i32 16
- %tmp2 = tail call i32 @llvm.r600.read.tidig.x() #1
+ %tmp2 = tail call i32 @llvm.amdgcn.workitem.id.x() #1
%tmp3 = sext i32 %tmp2 to i64
%tmp4 = getelementptr inbounds float, float addrspace(1)* %arg1, i64 %tmp3
%tmp5 = load float, float addrspace(1)* %tmp4, align 4
@@ -43,7 +43,7 @@ bb15: ; preds = %bb14, %bb8
}
declare float @llvm.fma.f32(float, float, float) #1
-declare i32 @llvm.r600.read.tidig.x() #1
+declare i32 @llvm.amdgcn.workitem.id.x() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
index 8c5e01e3634f..66bfc99d1377 100644
--- a/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
+++ b/test/Transforms/CodeGenPrepare/X86/catchpad-phi-cast.ll
@@ -101,10 +101,9 @@ catch:
!llvm.module.flags = !{!7, !8, !9}
!llvm.ident = !{!10}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: null, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: null)
!1 = !DIFile(filename: "t.c", directory: "D:\5Csrc\5Cllvm\5Cbuild")
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: null)
+!4 = distinct !DISubprogram(name: "test_dbg_value", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: null)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -113,6 +112,6 @@ catch:
!10 = !{!"clang version 3.8.0 (trunk 254906) (llvm/trunk 254917)"}
!11 = !DILocalVariable(name: "p", scope: !4, file: !1, line: 2, type: !12)
!12 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!13 = !DIExpression()
+!13 = !DIExpression(DW_OP_deref)
!14 = !DILocation(line: 2, column: 8, scope: !4)
!15 = !DILocation(line: 3, column: 1, scope: !4)
diff --git a/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll b/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll
new file mode 100644
index 000000000000..94ab74f9e7bf
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/fcmp-sinking.ll
@@ -0,0 +1,29 @@
+; RUN: opt %s -codegenprepare -mattr=+soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=SOFTFP
+; RUN: opt %s -codegenprepare -mattr=-soft-float -S | FileCheck %s -check-prefix=CHECK -check-prefix=HARDFP
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: @foo
+; CHECK: entry:
+; SOFTFP: fcmp
+; HARDFP-NOT: fcmp
+; CHECK: body:
+; SOFTFP-NOT: fcmp
+; HARDFP: fcmp
+define void @foo(float %a, float %b) {
+entry:
+ %c = fcmp oeq float %a, %b
+ br label %head
+head:
+ %IND = phi i32 [ 0, %entry ], [ %IND.new, %body1 ]
+ %CMP = icmp slt i32 %IND, 1250
+ br i1 %CMP, label %body, label %tail
+body:
+ br i1 %c, label %body1, label %tail
+body1:
+ %IND.new = add i32 %IND, 1
+ br label %head
+tail:
+ ret void
+}
diff --git a/test/Transforms/CodeGenPrepare/X86/pr27536.ll b/test/Transforms/CodeGenPrepare/X86/pr27536.ll
new file mode 100644
index 000000000000..7ab1b038e80f
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/pr27536.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc"
+
+@rtti = external global i8
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %e = alloca i8
+ %tmpcast = bitcast i8* %e to i16*
+ invoke void @_CxxThrowException(i8* null, i8* null)
+ to label %catchret.dest unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* @rtti, i32 0, i16* %tmpcast]
+ catchret from %1 to label %catchret.dest
+
+catchret.dest: ; preds = %catch
+ ret void
+}
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[alloca:.*]] = alloca i8
+; CHECK-NEXT: %[[bc:.*]] = bitcast i8* %[[alloca]] to i16*
+
+; CHECK: catchpad within {{.*}} [i8* @rtti, i32 0, i16* %[[bc]]]
+
+declare void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/CodeGenPrepare/X86/select.ll b/test/Transforms/CodeGenPrepare/X86/select.ll
index a26938ad5ee4..79b195d21ca7 100644
--- a/test/Transforms/CodeGenPrepare/X86/select.ll
+++ b/test/Transforms/CodeGenPrepare/X86/select.ll
@@ -2,8 +2,7 @@
target triple = "x86_64-unknown-unknown"
-; Nothing to sink here, but this gets converted to a branch to
-; avoid stalling an out-of-order CPU on a predictable branch.
+; Nothing to sink and convert here.
define i32 @no_sink(double %a, double* %b, i32 %x, i32 %y) {
entry:
@@ -15,11 +14,7 @@ entry:
; CHECK-LABEL: @no_sink(
; CHECK: %load = load double, double* %b, align 8
; CHECK: %cmp = fcmp olt double %load, %a
-; CHECK: br i1 %cmp, label %select.end, label %select.false
-; CHECK: select.false:
-; CHECK: br label %select.end
-; CHECK: select.end:
-; CHECK: %sel = phi i32 [ %x, %entry ], [ %y, %select.false ]
+; CHECK: %sel = select i1 %cmp, i32 %x, i32 %y
; CHECK: ret i32 %sel
}
@@ -58,7 +53,7 @@ entry:
; CHECK: %div = fdiv float %a, %b
; CHECK: br label %select.end
; CHECK: select.end:
-; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ]
+; CHECK: %sel = phi float [ 4.000000e+00, %entry ], [ %div, %select.false.sink ]
; CHECK: ret float %sel
}
@@ -80,20 +75,39 @@ entry:
; CHECK: %div2 = fdiv float %b, %a
; CHECK: br label %select.end
; CHECK: select.end:
-; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ]
+; CHECK: %sel = phi float [ %div1, %select.true.sink ], [ %div2, %select.false.sink ]
; CHECK: ret float %sel
}
+; But if the select is marked unpredictable, then don't turn it into a branch.
+
+define float @unpredictable_select(float %a, float %b) {
+; CHECK-LABEL: @unpredictable_select(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[DIV:%.*]] = fdiv float %a, %b
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float %a, 1.000000e+00
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], float [[DIV]], float 2.000000e+00, !unpredictable !0
+; CHECK-NEXT: ret float [[SEL]]
+;
+entry:
+ %div = fdiv float %a, %b
+ %cmp = fcmp ogt float %a, 1.0
+ %sel = select i1 %cmp, float %div, float 2.0, !unpredictable !0
+ ret float %sel
+}
+
+!0 = !{}
+
; An 'fadd' is not too expensive, so it's ok to speculate.
define float @fadd_no_sink(float %a, float %b) {
%add = fadd float %a, %b
%cmp = fcmp ogt float 6.0, %a
- %sel = select i1 %cmp, float %add, float 7.0
+ %sel = select i1 %cmp, float %add, float 7.0
ret float %sel
; CHECK-LABEL: @fadd_no_sink(
-; CHECK: %sel = select i1 %cmp, float %add, float 7.0
+; CHECK: %sel = select i1 %cmp, float %add, float 7.0
}
; Possible enhancement: sinkability is only calculated with the direct
@@ -109,7 +123,7 @@ entry:
ret float %sel
; CHECK-LABEL: @fdiv_no_sink(
-; CHECK: %sel = select i1 %cmp, float %add, float 8.0
+; CHECK: %sel = select i1 %cmp, float %add, float 8.0
}
; Do not transform the CFG if the select operands may have side effects.
diff --git a/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
new file mode 100644
index 000000000000..5c0b5f3839d0
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/X86/sink-addrmode.ll
@@ -0,0 +1,196 @@
+; RUN: opt -S -codegenprepare < %s | FileCheck %s
+
+target datalayout =
+"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Can we sink single addressing mode computation to use?
+define void @test1(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test1
+; CHECK: add i64 {{.+}}, 40
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+ %v = load i32, i32* %casted, align 4
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+declare void @foo(i32)
+
+; Make sure sinking two copies of addressing mode into different blocks works
+define void @test2(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test2
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: add i64 {{.+}}, 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %next, label %fallthrough
+
+next:
+; CHECK-LABEL: next:
+; CHECK: add i64 {{.+}}, 40
+ %v2 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v2)
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+; If we have two loads in the same block, only need one copy of addressing mode
+; - instruction selection will duplicate if needed
+define void @test3(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test3
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: add i64 {{.+}}, 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+; CHECK-NOT: add i64 {{.+}}, 40
+ %v2 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v2)
+ br label %fallthrough
+
+fallthrough:
+ ret void
+}
+
+; Can we still sink addressing mode if there's a cold use of the
+; address itself?
+define void @test4(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test4
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: add i64 {{.+}}, 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+; CHECK-LABEL: rare.1:
+; CHECK: add i64 {{.+}}, 40
+ call void @slowpath(i32 %v1, i32* %casted) cold
+ br label %fallthrough
+}
+
+; Negative test - don't want to duplicate addressing into hot path
+define void @test5(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test5
+entry:
+; CHECK: %addr = getelementptr
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: add i64 {{.+}}, 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+ call void @slowpath(i32 %v1, i32* %casted) ;; NOT COLD
+ br label %fallthrough
+}
+
+; Negative test - opt for size
+define void @test6(i1 %cond, i64* %base) minsize {
+; CHECK-LABEL: @test6
+entry:
+; CHECK: %addr = getelementptr
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK-NOT: add i64 {{.+}}, 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+ call void @slowpath(i32 %v1, i32* %casted) cold
+ br label %fallthrough
+}
+
+
+; Make sure sinking two copies of addressing mode into different blocks works
+; when there are cold paths for each.
+define void @test7(i1 %cond, i64* %base) {
+; CHECK-LABEL: @test7
+entry:
+ %addr = getelementptr inbounds i64, i64* %base, i64 5
+ %casted = bitcast i64* %addr to i32*
+ br i1 %cond, label %if.then, label %fallthrough
+
+if.then:
+; CHECK-LABEL: if.then:
+; CHECK: add i64 {{.+}}, 40
+ %v1 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v1)
+ %cmp = icmp eq i32 %v1, 0
+ br i1 %cmp, label %rare.1, label %next
+
+next:
+; CHECK-LABEL: next:
+; CHECK: add i64 {{.+}}, 40
+ %v2 = load i32, i32* %casted, align 4
+ call void @foo(i32 %v2)
+ %cmp2 = icmp eq i32 %v2, 0
+ br i1 %cmp2, label %rare.1, label %fallthrough
+
+fallthrough:
+ ret void
+
+rare.1:
+; CHECK-LABEL: rare.1:
+; CHECK: add i64 {{.+}}, 40
+ call void @slowpath(i32 %v1, i32* %casted) cold
+ br label %next
+
+rare.2:
+; CHECK-LABEL: rare.2:
+; CHECK: add i64 {{.+}}, 40
+ call void @slowpath(i32 %v2, i32* %casted) cold
+ br label %fallthrough
+}
+
+
+declare void @slowpath(i32, i32*)
diff --git a/test/Transforms/CodeGenPrepare/builtin-condition.ll b/test/Transforms/CodeGenPrepare/builtin-condition.ll
new file mode 100644
index 000000000000..e18d1b0fc0e5
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/builtin-condition.ll
@@ -0,0 +1,90 @@
+; RUN: opt -codegenprepare -S < %s | FileCheck %s
+
+; #include<stdlib.h>
+; #define STATIC_BUF_SIZE 10
+; #define LARGER_BUF_SIZE 30
+;
+; size_t foo1(int flag) {
+; char *cptr;
+; char chararray[LARGER_BUF_SIZE];
+; char chararray2[STATIC_BUF_SIZE];
+; if(flag)
+; cptr = chararray2;
+; else
+; cptr = chararray;
+;
+; return __builtin_object_size(cptr, 2);
+; }
+;
+; size_t foo2(int n) {
+; char Small[10];
+; char Large[20];
+; char *Ptr = n ? Small : Large + 19;
+; return __builtin_object_size(Ptr, 0);
+; }
+;
+; void foo() {
+; size_t ret;
+; size_t ret1;
+; ret = foo1(0);
+; ret1 = foo2(0);
+; printf("\n%d %d\n", ret, ret1);
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [8 x i8] c"\0A%d %d\0A\00", align 1
+
+define i64 @foo1(i32 %flag) {
+entry:
+ %chararray = alloca [30 x i8], align 16
+ %chararray2 = alloca [10 x i8], align 1
+ %0 = getelementptr inbounds [30 x i8], [30 x i8]* %chararray, i64 0, i64 0
+ call void @llvm.lifetime.start(i64 30, i8* %0)
+ %1 = getelementptr inbounds [10 x i8], [10 x i8]* %chararray2, i64 0, i64 0
+ call void @llvm.lifetime.start(i64 10, i8* %1)
+ %tobool = icmp eq i32 %flag, 0
+ %cptr.0 = select i1 %tobool, i8* %0, i8* %1
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cptr.0, i1 true)
+ call void @llvm.lifetime.end(i64 10, i8* %1)
+ call void @llvm.lifetime.end(i64 30, i8* %0)
+ ret i64 %2
+; CHECK-LABEL: foo1
+; CHECK: ret i64 10
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+define i64 @foo2(i32 %n) {
+entry:
+ %Small = alloca [10 x i8], align 1
+ %Large = alloca [20 x i8], align 16
+ %0 = getelementptr inbounds [10 x i8], [10 x i8]* %Small, i64 0, i64 0
+ call void @llvm.lifetime.start(i64 10, i8* %0)
+ %1 = getelementptr inbounds [20 x i8], [20 x i8]* %Large, i64 0, i64 0
+ call void @llvm.lifetime.start(i64 20, i8* %1)
+ %tobool = icmp ne i32 %n, 0
+ %add.ptr = getelementptr inbounds [20 x i8], [20 x i8]* %Large, i64 0, i64 19
+ %cond = select i1 %tobool, i8* %0, i8* %add.ptr
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cond, i1 false)
+ call void @llvm.lifetime.end(i64 20, i8* %1)
+ call void @llvm.lifetime.end(i64 10, i8* %0)
+ ret i64 %2
+; CHECK-LABEL: foo2
+; CHECK: ret i64 10
+}
+
+define void @foo() {
+entry:
+ %call = tail call i64 @foo1(i32 0)
+ %call1 = tail call i64 @foo2(i32 0)
+ %call2 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([8 x i8], [8 x i8]* @.str, i64 0, i64 0), i64 %call, i64 %call1)
+ ret void
+}
+
+declare i32 @printf(i8* nocapture readonly, ...) \ No newline at end of file
diff --git a/test/Transforms/CodeGenPrepare/dom-tree.ll b/test/Transforms/CodeGenPrepare/dom-tree.ll
new file mode 100644
index 000000000000..b012706e3047
--- /dev/null
+++ b/test/Transforms/CodeGenPrepare/dom-tree.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -loop-unroll -codegenprepare < %s -domtree -analyze | FileCheck %s
+;
+; Checks that the dom tree is properly invalidated after an operation that will
+; invalidate it in CodeGenPrepare. If the domtree isn't properly invalidated,
+; this will likely segfault, or print badref.
+
+; CHECK-NOT: <badref>
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "armv7--linux-gnueabihf"
+
+define i32 @f(i32 %a) #0 {
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ ret i32 %or
+
+for.body:
+ %i.08 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %b.07 = phi i32 [ 0, %entry ], [ %or, %for.body ]
+ %shr = lshr i32 %a, %i.08
+ %and = and i32 %shr, 1
+ %sub = sub nuw nsw i32 31, %i.08
+ %shl = shl i32 %and, %sub
+ %or = or i32 %shl, %b.07
+ %inc = add nuw nsw i32 %i.08, 1
+ %exitcond = icmp eq i32 %inc, 32
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !3
+}
+
+attributes #0 = { norecurse nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{i32 1, !"min_enum_size", i32 4}
+!2 = !{!"clang version 3.8.0 (http://llvm.org/git/clang.git b7441a0f42c43a8eea9e3e706be187252db747fa)"}
+!3 = distinct !{!3, !4}
+!4 = !{!"llvm.loop.unroll.full"}
diff --git a/test/Transforms/ConstProp/calls.ll b/test/Transforms/ConstProp/calls.ll
index e65d8b28fe7d..a445ac81ff27 100644
--- a/test/Transforms/ConstProp/calls.ll
+++ b/test/Transforms/ConstProp/calls.ll
@@ -176,27 +176,198 @@ define double @T() {
ret double %d
}
-define i1 @test_sse_cvt() nounwind readnone {
-; CHECK-LABEL: @test_sse_cvt(
+define i1 @test_sse_cvts_exact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+ %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 7.0, double undef>) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 7.0, double undef>) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %cmp02 = icmp eq i32 %sum02, 10
+ %cmp13 = icmp eq i64 %sum13, 10
+ %b = and i1 %cmp02, %cmp13
+ ret i1 %b
+}
+
+; TODO: Inexact values should not fold as they are dependent on rounding mode
+define i1 @test_sse_cvts_inexact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_inexact(
; CHECK-NOT: call
; CHECK: ret i1 true
entry:
%i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
- %i1 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
- %i2 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
- %i3 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
- %i4 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 1.75, double undef>) nounwind
- %i5 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 1.75, double undef>) nounwind
- %i6 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 1.75, double undef>) nounwind
- %i7 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 1.75, double undef>) nounwind
- %sum11 = add i32 %i0, %i1
- %sum12 = add i32 %i4, %i5
- %sum1 = add i32 %sum11, %sum12
- %sum21 = add i64 %i2, %i3
- %sum22 = add i64 %i6, %i7
- %sum2 = add i64 %sum21, %sum22
- %sum1.sext = sext i32 %sum1 to i64
- %b = icmp eq i64 %sum1.sext, %sum2
+ %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> <double 1.75, double undef>) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %cmp02 = icmp eq i32 %sum02, 4
+ %cmp13 = icmp eq i64 %sum13, 4
+ %b = and i1 %cmp02, %cmp13
+ ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_sse_cvts_max() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+ %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+ %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+ %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %sum02.sext = sext i32 %sum02 to i64
+ %b = icmp eq i64 %sum02.sext, %sum13
+ ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_sse_cvts_inf() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+ %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+ %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+ %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %sum02.sext = sext i32 %sum02 to i64
+ %b = icmp eq i64 %sum02.sext, %sum13
+ ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_sse_cvts_nan() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvts_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+ %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+ %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+ %i0 = tail call i32 @llvm.x86.sse.cvtss2si(<4 x float> %fm) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvtss2si64(<4 x float> %fm) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvtsd2si(<2 x double> %dm) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvtsd2si64(<2 x double> %dm) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %sum02.sext = sext i32 %sum02 to i64
+ %b = icmp eq i64 %sum02.sext, %sum13
+ ret i1 %b
+}
+
+define i1 @test_sse_cvtts_exact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_exact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+ %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 3.0, float undef, float undef, float undef>) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 7.0, double undef>) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 7.0, double undef>) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %cmp02 = icmp eq i32 %sum02, 10
+ %cmp13 = icmp eq i64 %sum13, 10
+ %b = and i1 %cmp02, %cmp13
+ ret i1 %b
+}
+
+define i1 @test_sse_cvtts_inexact() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_inexact(
+; CHECK-NOT: call
+; CHECK: ret i1 true
+entry:
+ %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> <float 1.75, float undef, float undef, float undef>) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> <double 1.75, double undef>) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> <double 1.75, double undef>) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %cmp02 = icmp eq i32 %sum02, 2
+ %cmp13 = icmp eq i64 %sum13, 2
+ %b = and i1 %cmp02, %cmp13
+ ret i1 %b
+}
+
+; FLT_MAX/DBL_MAX should not fold
+define i1 @test_sse_cvtts_max() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_max(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+ %fm = bitcast <4 x i32> <i32 2139095039, i32 undef, i32 undef, i32 undef> to <4 x float>
+ %dm = bitcast <2 x i64> <i64 9218868437227405311, i64 undef> to <2 x double>
+ %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %sum02.sext = sext i32 %sum02 to i64
+ %b = icmp eq i64 %sum02.sext, %sum13
+ ret i1 %b
+}
+
+; INF should not fold
+define i1 @test_sse_cvtts_inf() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_inf(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+ %fm = bitcast <4 x i32> <i32 2139095040, i32 undef, i32 undef, i32 undef> to <4 x float>
+ %dm = bitcast <2 x i64> <i64 9218868437227405312, i64 undef> to <2 x double>
+ %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %sum02.sext = sext i32 %sum02 to i64
+ %b = icmp eq i64 %sum02.sext, %sum13
+ ret i1 %b
+}
+
+; NAN should not fold
+define i1 @test_sse_cvtts_nan() nounwind readnone {
+; CHECK-LABEL: @test_sse_cvtts_nan(
+; CHECK: call
+; CHECK: call
+; CHECK: call
+; CHECK: call
+entry:
+ %fm = bitcast <4 x i32> <i32 2143289344, i32 undef, i32 undef, i32 undef> to <4 x float>
+ %dm = bitcast <2 x i64> <i64 9221120237041090560, i64 undef> to <2 x double>
+ %i0 = tail call i32 @llvm.x86.sse.cvttss2si(<4 x float> %fm) nounwind
+ %i1 = tail call i64 @llvm.x86.sse.cvttss2si64(<4 x float> %fm) nounwind
+ %i2 = call i32 @llvm.x86.sse2.cvttsd2si(<2 x double> %dm) nounwind
+ %i3 = call i64 @llvm.x86.sse2.cvttsd2si64(<2 x double> %dm) nounwind
+ %sum02 = add i32 %i0, %i2
+ %sum13 = add i64 %i1, %i3
+ %sum02.sext = sext i32 %sum02 to i64
+ %b = icmp eq i64 %sum02.sext, %sum13
ret i1 %b
}
diff --git a/test/Transforms/ConstantHoisting/ARM/bad-cases.ll b/test/Transforms/ConstantHoisting/ARM/bad-cases.ll
new file mode 100644
index 000000000000..ffcfb2e56c95
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/ARM/bad-cases.ll
@@ -0,0 +1,109 @@
+; RUN: opt -consthoist -S < %s | FileCheck %s
+target triple = "thumbv6m-none-eabi"
+
+; Allocas in the entry block get handled (for free) by
+; prologue/epilogue. Elsewhere they're fair game though.
+define void @avoid_allocas() {
+; CHECK-LABEL: @avoid_allocas
+; CHECK: %addr1 = alloca i8, i32 1000
+; CHECK: %addr2 = alloca i8, i32 1020
+
+ %addr1 = alloca i8, i32 1000
+ %addr2 = alloca i8, i32 1020
+ br label %elsewhere
+
+elsewhere:
+; CHECK: [[BASE:%.*]] = bitcast i32 1000 to i32
+; CHECK: alloca i8, i32 [[BASE]]
+; CHECK: [[NEXT:%.*]] = add i32 [[BASE]], 20
+; CHECK: alloca i8, i32 [[NEXT]]
+
+ %addr3 = alloca i8, i32 1000
+ %addr4 = alloca i8, i32 1020
+
+ ret void
+}
+
+; The case values of switch instructions are required to be constants.
+define void @avoid_switch(i32 %in) {
+; CHECK-LABEL: @avoid_switch
+; CHECK: switch i32 %in, label %default [
+; CHECK: i32 1000, label %bb1
+; CHECK: i32 1020, label %bb2
+; CHECK: ]
+
+ switch i32 %in, label %default
+ [ i32 1000, label %bb1
+ i32 1020, label %bb2 ]
+
+bb1:
+ ret void
+
+bb2:
+ ret void
+
+default:
+ ret void
+}
+
+; We don't want to convert constant divides because the benefit from converting
+; them to a mul in the backend is larget than constant materialization savings.
+define void @signed_const_division(i32 %in1, i32 %in2, i32* %addr) {
+; CHECK-LABEL: @signed_const_division
+; CHECK: %res1 = sdiv i32 %l1, 1000000000
+; CHECK: %res2 = srem i32 %l2, 1000000000
+entry:
+ br label %loop
+
+loop:
+ %l1 = phi i32 [%res1, %loop], [%in1, %entry]
+ %l2 = phi i32 [%res2, %loop], [%in2, %entry]
+ %res1 = sdiv i32 %l1, 1000000000
+ store volatile i32 %res1, i32* %addr
+ %res2 = srem i32 %l2, 1000000000
+ store volatile i32 %res2, i32* %addr
+ %again = icmp eq i32 %res1, %res2
+ br i1 %again, label %loop, label %end
+
+end:
+ ret void
+}
+
+define void @unsigned_const_division(i32 %in1, i32 %in2, i32* %addr) {
+; CHECK-LABEL: @unsigned_const_division
+; CHECK: %res1 = udiv i32 %l1, 1000000000
+; CHECK: %res2 = urem i32 %l2, 1000000000
+
+entry:
+ br label %loop
+
+loop:
+ %l1 = phi i32 [%res1, %loop], [%in1, %entry]
+ %l2 = phi i32 [%res2, %loop], [%in2, %entry]
+ %res1 = udiv i32 %l1, 1000000000
+ store volatile i32 %res1, i32* %addr
+ %res2 = urem i32 %l2, 1000000000
+ store volatile i32 %res2, i32* %addr
+ %again = icmp eq i32 %res1, %res2
+ br i1 %again, label %loop, label %end
+
+end:
+ ret void
+}
+
+;PR 28282: even when data type is larger than 64-bit, the bit width of the
+;constant operand could be smaller than 64-bit. In this case, there is no
+;benefit to hoist the constant.
+define i32 @struct_type_test(i96 %a0, i96 %a1) {
+;CHECK-LABEL: @struct_type_test
+entry:
+;CHECK-NOT: %const = bitcast i96 32 to i96
+;CHECK: lshr0 = lshr i96 %a0, 32
+ %lshr0 = lshr i96 %a0, 32
+ %cast0 = trunc i96 %lshr0 to i32
+;CHECK: lshr1 = lshr i96 %a1, 32
+ %lshr1 = lshr i96 %a1, 32
+ %cast1 = trunc i96 %lshr1 to i32
+ %ret = add i32 %cast0, %cast1
+ ret i32 %ret
+}
diff --git a/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll b/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
new file mode 100644
index 000000000000..6af2bb1d8acb
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/ARM/const-addr-no-neg-offset.ll
@@ -0,0 +1,42 @@
+; RUN: opt -mtriple=arm-arm-none-eabi -consthoist -S < %s | FileCheck %s
+
+; There are different candidates here for the base constant: 1073876992 and
+; 1073876996. But we don't want to see the latter because it results in
+; negative offsets.
+
+define void @foo() #0 {
+entry:
+; CHECK-LABEL: @foo
+; CHECK-NOT: [[CONST1:%const_mat[0-9]*]] = add i32 %const, -4
+ %0 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %or = or i32 %0, 1
+ store volatile i32 %or, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %1 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and = and i32 %1, -117506048
+ store volatile i32 %and, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %2 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and1 = and i32 %2, -17367041
+ store volatile i32 %and1, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %3 = load volatile i32, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %and2 = and i32 %3, -262145
+ store volatile i32 %and2, i32* inttoptr (i32 1073876992 to i32*), align 4096
+ %4 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4
+ %and3 = and i32 %4, -8323073
+ store volatile i32 %and3, i32* inttoptr (i32 1073876996 to i32*), align 4
+ store volatile i32 10420224, i32* inttoptr (i32 1073877000 to i32*), align 8
+ %5 = load volatile i32, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %or4 = or i32 %5, 65536
+ store volatile i32 %or4, i32* inttoptr (i32 1073876996 to i32*), align 4096
+ %6 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or6.i.i = or i32 %6, 16
+ store volatile i32 %or6.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %7 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %and7.i.i = and i32 %7, -4
+ store volatile i32 %and7.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %8 = load volatile i32, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ %or8.i.i = or i32 %8, 2
+ store volatile i32 %or8.i.i, i32* inttoptr (i32 1073881088 to i32*), align 8192
+ ret void
+}
+
+attributes #0 = { minsize norecurse nounwind optsize readnone uwtable }
diff --git a/test/Transforms/ConstantHoisting/ARM/lit.local.cfg b/test/Transforms/ConstantHoisting/ARM/lit.local.cfg
new file mode 100644
index 000000000000..236e1d344166
--- /dev/null
+++ b/test/Transforms/ConstantHoisting/ARM/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'ARM' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/ConstantHoisting/X86/cast-inst.ll b/test/Transforms/ConstantHoisting/X86/cast-inst.ll
index bd1e196b6445..58d7650f91fe 100644
--- a/test/Transforms/ConstantHoisting/X86/cast-inst.ll
+++ b/test/Transforms/ConstantHoisting/X86/cast-inst.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -consthoist < %s | FileCheck %s
+; RUN: opt -S -passes='consthoist' < %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/ConstantMerge/merge-both.ll b/test/Transforms/ConstantMerge/merge-both.ll
index 514c789b4701..824ad5ab1443 100644
--- a/test/Transforms/ConstantMerge/merge-both.ll
+++ b/test/Transforms/ConstantMerge/merge-both.ll
@@ -1,4 +1,4 @@
-; RUN: opt -constmerge -S < %s | FileCheck %s
+; RUN: opt -S < %s -passes=constmerge | FileCheck %s
; Test that in one run var3 is merged into var2 and var1 into var4.
; Test that we merge @var5 and @var6 into one with the higher alignment
diff --git a/test/Transforms/CorrelatedValuePropagation/basic.ll b/test/Transforms/CorrelatedValuePropagation/basic.ll
index 9d1253a1888e..9836c7f80778 100644
--- a/test/Transforms/CorrelatedValuePropagation/basic.ll
+++ b/test/Transforms/CorrelatedValuePropagation/basic.ll
@@ -199,3 +199,289 @@ out:
next:
ret void
}
+
+define i1 @arg_attribute(i8* nonnull %a) {
+; CHECK-LABEL: @arg_attribute(
+; CHECK: ret i1 false
+ %cmp = icmp eq i8* %a, null
+ br label %exit
+
+exit:
+ ret i1 %cmp
+}
+
+declare nonnull i8* @return_nonnull()
+define i1 @call_attribute() {
+; CHECK-LABEL: @call_attribute(
+; CHECK: ret i1 false
+ %a = call i8* @return_nonnull()
+ %cmp = icmp eq i8* %a, null
+ br label %exit
+
+exit:
+ ret i1 %cmp
+}
+
+define i1 @umin(i32 %a, i32 %b) {
+; CHECK-LABEL: @umin(
+entry:
+ %cmp = icmp ult i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %cmp2 = icmp ult i32 %b, 20
+ br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+ %sel_cmp = icmp ult i32 %a, %b
+ %min = select i1 %sel_cmp, i32 %a, i32 %b
+ %res = icmp eq i32 %min, 7
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @smin(i32 %a, i32 %b) {
+; CHECK-LABEL: @smin(
+entry:
+ %cmp = icmp ult i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %cmp2 = icmp ult i32 %b, 20
+ br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+ %sel_cmp = icmp sle i32 %a, %b
+ %min = select i1 %sel_cmp, i32 %a, i32 %b
+ %res = icmp eq i32 %min, 7
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @smax(i32 %a, i32 %b) {
+; CHECK-LABEL: @smax(
+entry:
+ %cmp = icmp sgt i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %cmp2 = icmp sgt i32 %b, 20
+ br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+ %sel_cmp = icmp sge i32 %a, %b
+ %max = select i1 %sel_cmp, i32 %a, i32 %b
+ %res = icmp eq i32 %max, 7
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @umax(i32 %a, i32 %b) {
+; CHECK-LABEL: @umax(
+entry:
+ %cmp = icmp sgt i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %cmp2 = icmp sgt i32 %b, 20
+ br i1 %cmp2, label %b_guard, label %out
+
+b_guard:
+ %sel_cmp = icmp uge i32 %a, %b
+ %max = select i1 %sel_cmp, i32 %a, i32 %b
+ %res = icmp eq i32 %max, 7
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @clamp_low1(i32 %a) {
+; CHECK-LABEL: @clamp_low1(
+entry:
+ %cmp = icmp sge i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %sel_cmp = icmp eq i32 %a, 5
+ %add = add i32 %a, -1
+ %sel = select i1 %sel_cmp, i32 5, i32 %a
+ %res = icmp eq i32 %sel, 4
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @clamp_low2(i32 %a) {
+; CHECK-LABEL: @clamp_low2(
+entry:
+ %cmp = icmp sge i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %sel_cmp = icmp ne i32 %a, 5
+ %add = add i32 %a, -1
+ %sel = select i1 %sel_cmp, i32 %a, i32 5
+ %res = icmp eq i32 %sel, 4
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @clamp_high1(i32 %a) {
+; CHECK-LABEL: @clamp_high1(
+entry:
+ %cmp = icmp sle i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %sel_cmp = icmp eq i32 %a, 5
+ %add = add i32 %a, 1
+ %sel = select i1 %sel_cmp, i32 5, i32 %a
+ %res = icmp eq i32 %sel, 6
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @clamp_high2(i32 %a) {
+; CHECK-LABEL: @clamp_high2(
+entry:
+ %cmp = icmp sle i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %sel_cmp = icmp ne i32 %a, 5
+ %add = add i32 %a, 1
+ %sel = select i1 %sel_cmp, i32 %a, i32 5
+ %res = icmp eq i32 %sel, 6
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+; Just showing arbitrary constants work, not really a clamp
+define i1 @clamp_high3(i32 %a) {
+; CHECK-LABEL: @clamp_high3(
+entry:
+ %cmp = icmp sle i32 %a, 5
+ br i1 %cmp, label %a_guard, label %out
+
+a_guard:
+ %sel_cmp = icmp ne i32 %a, 5
+ %add = add i32 %a, 100
+ %sel = select i1 %sel_cmp, i32 %a, i32 5
+ %res = icmp eq i32 %sel, 105
+ br label %next
+next:
+; CHECK: next:
+; CHECK: ret i1 false
+ ret i1 %res
+out:
+ ret i1 false
+}
+
+define i1 @zext_unknown(i8 %a) {
+; CHECK-LABEL: @zext_unknown
+; CHECK: ret i1 true
+entry:
+ %a32 = zext i8 %a to i32
+ %cmp = icmp sle i32 %a32, 256
+ br label %exit
+exit:
+ ret i1 %cmp
+}
+
+define i1 @trunc_unknown(i32 %a) {
+; CHECK-LABEL: @trunc_unknown
+; CHECK: ret i1 true
+entry:
+ %a8 = trunc i32 %a to i8
+ %a32 = sext i8 %a8 to i32
+ %cmp = icmp sle i32 %a32, 128
+ br label %exit
+exit:
+ ret i1 %cmp
+}
+
+; TODO: missed optimization
+; Make sure we exercise non-integer inputs to unary operators (i.e. crash
+; check).
+define i1 @bitcast_unknown(float %a) {
+; CHECK-LABEL: @bitcast_unknown
+; CHECK: ret i1 %cmp
+entry:
+ %a32 = bitcast float %a to i32
+ %cmp = icmp sle i32 %a32, 128
+ br label %exit
+exit:
+ ret i1 %cmp
+}
+
+define i1 @bitcast_unknown2(i8* %p) {
+; CHECK-LABEL: @bitcast_unknown2
+; CHECK: ret i1 %cmp
+entry:
+ %p64 = ptrtoint i8* %p to i64
+ %cmp = icmp sle i64 %p64, 128
+ br label %exit
+exit:
+ ret i1 %cmp
+}
+
+
+define i1 @and_unknown(i32 %a) {
+; CHECK-LABEL: @and_unknown
+; CHECK: ret i1 true
+entry:
+ %and = and i32 %a, 128
+ %cmp = icmp sle i32 %and, 128
+ br label %exit
+exit:
+ ret i1 %cmp
+}
+
+define i1 @lshr_unknown(i32 %a) {
+; CHECK-LABEL: @lshr_unknown
+; CHECK: ret i1 true
+entry:
+ %and = lshr i32 %a, 30
+ %cmp = icmp sle i32 %and, 128
+ br label %exit
+exit:
+ ret i1 %cmp
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/conflict.ll b/test/Transforms/CorrelatedValuePropagation/conflict.ll
new file mode 100644
index 000000000000..ef566856ed7c
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/conflict.ll
@@ -0,0 +1,50 @@
+; RUN: opt -correlated-propagation -S < %s | FileCheck %s
+; Checks that we don't crash on conflicting facts about a value
+; (i.e. unreachable code)
+
+; Test that we can handle conflict edge facts
+define i8 @test(i8 %a) {
+; CHECK-LABEL: @test
+ %cmp1 = icmp eq i8 %a, 5
+ br i1 %cmp1, label %next, label %exit
+next:
+ %cmp2 = icmp eq i8 %a, 3
+; CHECK: br i1 false, label %dead, label %exit
+ br i1 %cmp2, label %dead, label %exit
+dead:
+; CHECK-LABEL: dead:
+; CHECK: ret i8 5
+; NOTE: undef, or 3 would be equal valid
+ ret i8 %a
+exit:
+ ret i8 0
+}
+
+declare void @llvm.assume(i1)
+
+; Test that we can handle conflicting assume vs edge facts
+define i8 @test2(i8 %a) {
+; CHECK-LABEL: @test2
+ %cmp1 = icmp eq i8 %a, 5
+ call void @llvm.assume(i1 %cmp1)
+ %cmp2 = icmp eq i8 %a, 3
+; CHECK: br i1 false, label %dead, label %exit
+ br i1 %cmp2, label %dead, label %exit
+dead:
+ ret i8 %a
+exit:
+ ret i8 0
+}
+
+define i8 @test3(i8 %a) {
+; CHECK-LABEL: @test3
+ %cmp1 = icmp eq i8 %a, 5
+ br i1 %cmp1, label %dead, label %exit
+dead:
+ %cmp2 = icmp eq i8 %a, 3
+; CHECK: call void @llvm.assume(i1 false)
+ call void @llvm.assume(i1 %cmp2)
+ ret i8 %a
+exit:
+ ret i8 0
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/icmp.ll b/test/Transforms/CorrelatedValuePropagation/icmp.ll
index c2863ffda0fb..9e525a39dad5 100644
--- a/test/Transforms/CorrelatedValuePropagation/icmp.ll
+++ b/test/Transforms/CorrelatedValuePropagation/icmp.ll
@@ -1,4 +1,5 @@
; RUN: opt -correlated-propagation -S %s | FileCheck %s
+; RUN: opt -passes=correlated-propagation -S %s | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
diff --git a/test/Transforms/CorrelatedValuePropagation/range.ll b/test/Transforms/CorrelatedValuePropagation/range.ll
index 884cc8bdc125..8ca171a286cc 100644
--- a/test/Transforms/CorrelatedValuePropagation/range.ll
+++ b/test/Transforms/CorrelatedValuePropagation/range.ll
@@ -189,3 +189,17 @@ define i1 @test10(i64* %p) {
%res = icmp eq i64 %a, 0
ret i1 %res
}
+
+@g = external global i32
+
+define i1 @test11() {
+; CHECK: @test11
+; CHECK: ret i1 true
+ %positive = load i32, i32* @g, !range !{i32 1, i32 2048}
+ %add = add i32 %positive, 1
+ %test = icmp sgt i32 %add, 0
+ br label %next
+
+next:
+ ret i1 %test
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/sdiv.ll b/test/Transforms/CorrelatedValuePropagation/sdiv.ll
new file mode 100644
index 000000000000..b85dcd8c0196
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/sdiv.ll
@@ -0,0 +1,54 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+; CHECK-LABEL: @test0(
+define void @test0(i32 %n) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %j.0 = phi i32 [ %n, %entry ], [ %div, %for.body ]
+ %cmp = icmp sgt i32 %j.0, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+; CHECK: %div1 = udiv i32 %j.0, 2
+ %div = sdiv i32 %j.0, 2
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; CHECK-LABEL: @test1(
+define void @test1(i32 %n) {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %j.0 = phi i32 [ %n, %entry ], [ %div, %for.body ]
+ %cmp = icmp sgt i32 %j.0, -2
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+; CHECK: %div = sdiv i32 %j.0, 2
+ %div = sdiv i32 %j.0, 2
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; CHECK-LABEL: @test2(
+define void @test2(i32 %n) {
+entry:
+ %cmp = icmp sgt i32 %n, 1
+ br i1 %cmp, label %bb, label %exit
+
+bb:
+; CHECK: %div1 = udiv i32 %n, 2
+ %div = sdiv i32 %n, 2
+ br label %exit
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/select.ll b/test/Transforms/CorrelatedValuePropagation/select.ll
index be44bdcd921d..d237521f3014 100644
--- a/test/Transforms/CorrelatedValuePropagation/select.ll
+++ b/test/Transforms/CorrelatedValuePropagation/select.ll
@@ -73,3 +73,146 @@ if.end:
ret i32 %sel
; CHECK: ret i32 1
}
+
+define i1 @test1(i32* %p, i1 %unknown) {
+; CHECK-LABEL: @test1
+ %pval = load i32, i32* %p
+ %cmp1 = icmp slt i32 %pval, 255
+ br i1 %cmp1, label %next, label %exit
+
+next:
+ %min = select i1 %unknown, i32 %pval, i32 5
+ ;; TODO: This pointless branch shouldn't be neccessary
+ br label %next2
+next2:
+; CHECK-LABEL: next2:
+; CHECK: ret i1 false
+ %res = icmp eq i32 %min, 255
+ ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+ ret i1 true
+}
+
+; Check that we take a conservative meet
+define i1 @test2(i32* %p, i32 %qval, i1 %unknown) {
+; CHECK-LABEL: test2
+ %pval = load i32, i32* %p
+ %cmp1 = icmp slt i32 %pval, 255
+ br i1 %cmp1, label %next, label %exit
+
+next:
+ %min = select i1 %unknown, i32 %pval, i32 %qval
+ ;; TODO: This pointless branch shouldn't be neccessary
+ br label %next2
+next2:
+; CHECK-LABEL: next2
+; CHECK: ret i1 %res
+ %res = icmp eq i32 %min, 255
+ ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+ ret i1 true
+}
+
+; Same as @test2, but for the opposite select input
+define i1 @test3(i32* %p, i32 %qval, i1 %unknown) {
+; CHECK-LABEL: test3
+ %pval = load i32, i32* %p
+ %cmp1 = icmp slt i32 %pval, 255
+ br i1 %cmp1, label %next, label %exit
+
+next:
+ %min = select i1 %unknown, i32 %qval, i32 %pval
+ ;; TODO: This pointless branch shouldn't be neccessary
+ br label %next2
+next2:
+; CHECK-LABEL: next2
+; CHECK: ret i1 %res
+ %res = icmp eq i32 %min, 255
+ ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+ ret i1 true
+}
+
+; Conflicting constants (i.e. isOverdefined result)
+; NOTE: Using doubles in this version is a bit of a hack. This
+; is to get around the fact that all integers (including constants
+; and non-constants) are actually represented as constant-ranges.
+define i1 @test4(i32* %p, i32 %qval, i1 %unknown) {
+; CHECK-LABEL: test4
+ %pval = load i32, i32* %p
+ %cmp1 = icmp slt i32 %pval, 255
+ br i1 %cmp1, label %next, label %exit
+
+next:
+ %min = select i1 %unknown, double 1.0, double 0.0
+ ;; TODO: This pointless branch shouldn't be neccessary
+ br label %next2
+next2:
+; CHECK-LABEL: next2
+; CHECK: ret i1 %res
+ %res = fcmp oeq double %min, 300.0
+ ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+ ret i1 true
+}
+
+;; Using the condition to clamp the result
+;;
+
+define i1 @test5(i32* %p, i1 %unknown) {
+; CHECK-LABEL: @test5
+ %pval = load i32, i32* %p
+ %cmp1 = icmp slt i32 %pval, 255
+ br i1 %cmp1, label %next, label %exit
+
+next:
+ %cond = icmp sgt i32 %pval, 0
+ %min = select i1 %cond, i32 %pval, i32 5
+ ;; TODO: This pointless branch shouldn't be neccessary
+ br label %next2
+next2:
+; CHECK-LABEL: next2:
+; CHECK: ret i1 false
+ %res = icmp eq i32 %min, -1
+ ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+ ret i1 true
+}
+
+define i1 @test6(i32* %p, i1 %unknown) {
+; CHECK-LABEL: @test6
+ %pval = load i32, i32* %p
+ %cmp1 = icmp ult i32 %pval, 255
+ br i1 %cmp1, label %next, label %exit
+
+next:
+ %cond = icmp ne i32 %pval, 254
+ %sel = select i1 %cond, i32 %pval, i32 1
+ ;; TODO: This pointless branch shouldn't be neccessary
+ br label %next2
+next2:
+; CHECK-LABEL: next2:
+; CHECK: ret i1 true
+ %res = icmp slt i32 %sel, 254
+ ret i1 %res
+
+exit:
+; CHECK-LABEL: exit:
+; CHECK: ret i1 true
+ ret i1 true
+}
diff --git a/test/Transforms/CorrelatedValuePropagation/srem.ll b/test/Transforms/CorrelatedValuePropagation/srem.ll
new file mode 100644
index 000000000000..7c9548566556
--- /dev/null
+++ b/test/Transforms/CorrelatedValuePropagation/srem.ll
@@ -0,0 +1,21 @@
+; RUN: opt < %s -correlated-propagation -S | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+target triple = "thumbv7m-arm-none-eabi"
+
+define void @h(i32* nocapture %p, i32 %x) local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: @h(
+; CHECK: urem
+
+ %cmp = icmp sgt i32 %x, 0
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %rem2 = srem i32 %x, 10
+ store i32 %rem2, i32* %p, align 4
+ br label %if.end
+
+if.end:
+ ret void
+}
diff --git a/test/Transforms/CrossDSOCFI/basic.ll b/test/Transforms/CrossDSOCFI/basic.ll
index 49b3e8f23ccf..31ce15c04c3e 100644
--- a/test/Transforms/CrossDSOCFI/basic.ll
+++ b/test/Transforms/CrossDSOCFI/basic.ll
@@ -1,88 +1,81 @@
; RUN: opt -S -cross-dso-cfi < %s | FileCheck %s
+; RUN: opt -S -passes=cross-dso-cfi < %s | FileCheck %s
-; CHECK: define void @__cfi_check(i64 %[[TYPE:.*]], i8* %[[ADDR:.*]]) align 4096
-; CHECK: switch i64 %[[TYPE]], label %[[TRAP:.*]] [
+; CHECK: define void @__cfi_check(i64 %[[TYPE:.*]], i8* %[[ADDR:.*]], i8* %[[DATA:.*]]) align 4096
+; CHECK: switch i64 %[[TYPE]], label %[[FAIL:.*]] [
; CHECK-NEXT: i64 111, label %[[L1:.*]]
; CHECK-NEXT: i64 222, label %[[L2:.*]]
; CHECK-NEXT: i64 333, label %[[L3:.*]]
; CHECK-NEXT: i64 444, label %[[L4:.*]]
; CHECK-NEXT: {{]$}}
-; CHECK: [[TRAP]]:
-; CHECK-NEXT: call void @llvm.trap()
-; CHECK-MEXT: unreachable
-
; CHECK: [[EXIT:.*]]:
; CHECK-NEXT: ret void
+; CHECK: [[FAIL]]:
+; CHECK-NEXT: call void @__cfi_check_fail(i8* %[[DATA]], i8* %[[ADDR]])
+; CHECK-NEXT: br label %[[EXIT]]
+
; CHECK: [[L1]]:
-; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 111)
-; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+; CHECK-NEXT: call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 111)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[FAIL]]
; CHECK: [[L2]]:
-; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 222)
-; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+; CHECK-NEXT: call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 222)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[FAIL]]
; CHECK: [[L3]]:
-; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 333)
-; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+; CHECK-NEXT: call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 333)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[FAIL]]
; CHECK: [[L4]]:
-; CHECK-NEXT: call i1 @llvm.bitset.test(i8* %[[ADDR]], metadata i64 444)
-; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[TRAP]]
+; CHECK-NEXT: call i1 @llvm.type.test(i8* %[[ADDR]], metadata i64 444)
+; CHECK-NEXT: br {{.*}} label %[[EXIT]], label %[[FAIL]]
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
-@_ZTV1A = constant i8 0
-@_ZTI1A = constant i8 0
-@_ZTS1A = constant i8 0
-@_ZTV1B = constant i8 0
-@_ZTI1B = constant i8 0
-@_ZTS1B = constant i8 0
+@_ZTV1A = constant i8 0, !type !4, !type !5
+@_ZTV1B = constant i8 0, !type !4, !type !5, !type !6, !type !7
-define signext i8 @f11() {
+define signext i8 @f11() !type !0 !type !1 {
entry:
ret i8 1
}
-define signext i8 @f12() {
+define signext i8 @f12() !type !0 !type !1 {
entry:
ret i8 2
}
-define signext i8 @f13() {
+define signext i8 @f13() !type !0 !type !1 {
entry:
ret i8 3
}
-define i32 @f21() {
+define i32 @f21() !type !2 !type !3 {
entry:
ret i32 4
}
-define i32 @f22() {
+define i32 @f22() !type !2 !type !3 {
entry:
ret i32 5
}
-!llvm.bitsets = !{!0, !1, !2, !3, !4, !7, !8, !9, !10, !11, !12, !13, !14, !15}
-!llvm.module.flags = !{!17}
-
-!0 = !{!"_ZTSFcvE", i8 ()* @f11, i64 0}
-!1 = !{i64 111, i8 ()* @f11, i64 0}
-!2 = !{!"_ZTSFcvE", i8 ()* @f12, i64 0}
-!3 = !{i64 111, i8 ()* @f12, i64 0}
-!4 = !{!"_ZTSFcvE", i8 ()* @f13, i64 0}
-!5 = !{i64 111, i8 ()* @f13, i64 0}
-!6 = !{!"_ZTSFivE", i32 ()* @f21, i64 0}
-!7 = !{i64 222, i32 ()* @f21, i64 0}
-!8 = !{!"_ZTSFivE", i32 ()* @f22, i64 0}
-!9 = !{i64 222, i32 ()* @f22, i64 0}
-!10 = !{!"_ZTS1A", i8* @_ZTV1A, i64 16}
-!11 = !{i64 333, i8* @_ZTV1A, i64 16}
-!12 = !{!"_ZTS1A", i8* @_ZTV1B, i64 16}
-!13 = !{i64 333, i8* @_ZTV1B, i64 16}
-!14 = !{!"_ZTS1B", i8* @_ZTV1B, i64 16}
-!15 = !{i64 444, i8* @_ZTV1B, i64 16}
-!17= !{i32 4, !"Cross-DSO CFI", i32 1}
+define weak_odr hidden void @__cfi_check_fail(i8*, i8*) {
+entry:
+ ret void
+}
+
+!llvm.module.flags = !{!8}
+
+!0 = !{i64 0, !"_ZTSFcvE"}
+!1 = !{i64 0, i64 111}
+!2 = !{i64 0, !"_ZTSFivE"}
+!3 = !{i64 0, i64 222}
+!4 = !{i64 16, !"_ZTS1A"}
+!5 = !{i64 16, i64 333}
+!6 = !{i64 16, !"_ZTS1B"}
+!7 = !{i64 16, i64 444}
+!8 = !{i32 4, !"Cross-DSO CFI", i32 1}
diff --git a/test/Transforms/DCE/basic.ll b/test/Transforms/DCE/basic.ll
new file mode 100644
index 000000000000..c2a56594c91c
--- /dev/null
+++ b/test/Transforms/DCE/basic.ll
@@ -0,0 +1,11 @@
+; RUN: opt -dce -S < %s | FileCheck %s
+; RUN: opt -passes=dce -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test
+define void @test() {
+; CHECK-NOT: add
+ %add = add i32 1, 2
+; CHECK-NOT: sub
+ %sub = sub i32 %add, 1
+ ret void
+}
diff --git a/test/Transforms/DCE/guards.ll b/test/Transforms/DCE/guards.ll
new file mode 100644
index 000000000000..d39c44058a7f
--- /dev/null
+++ b/test/Transforms/DCE/guards.ll
@@ -0,0 +1,11 @@
+; RUN: opt -dce -S < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+define void @f(i32 %val) {
+; CHECK-LABEL: @f(
+; CHECK-NEXT: ret void
+ %val2 = add i32 %val, 1
+ call void(i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"(i32 %val2) ]
+ ret void
+}
diff --git a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
index 10578761cd73..dfa999e1b34f 100644
--- a/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
+++ b/test/Transforms/DeadArgElim/2010-04-30-DbgInfo.ll
@@ -46,9 +46,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!30}
!0 = !DILocalVariable(name: "name", line: 8, arg: 1, scope: !1, file: !2, type: !6)
-!1 = distinct !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !4)
+!1 = distinct !DISubprogram(name: "vfs_addname", linkageName: "vfs_addname", line: 12, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, file: !28, scope: !2, type: !4)
!2 = !DIFile(filename: "tail.c", directory: "/Users/echeng/LLVM/radars/r7927803/")
-!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29)
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build 9999)", isOptimized: true, emissionKind: FullDebug, file: !28, enums: !29, retainedTypes: !29)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !6, !9, !9, !9}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, file: !28, scope: !2, baseType: !7)
@@ -61,7 +61,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!13 = !DILocation(line: 13, scope: !14)
!14 = distinct !DILexicalBlock(line: 12, column: 0, file: !28, scope: !1)
!15 = !DILocalVariable(name: "name", line: 17, arg: 1, scope: !16, file: !2, type: !6)
-!16 = distinct !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !28, scope: !2, type: !17)
+!16 = distinct !DISubprogram(name: "add_name_internal", linkageName: "add_name_internal", line: 22, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, file: !28, scope: !2, type: !17)
!17 = !DISubroutineType(types: !18)
!18 = !{!6, !6, !9, !9, !19, !9}
!19 = !DIBasicType(tag: DW_TAG_base_type, name: "unsigned char", size: 8, align: 8, encoding: DW_ATE_unsigned_char)
diff --git a/test/Transforms/DeadArgElim/comdat.ll b/test/Transforms/DeadArgElim/comdat.ll
new file mode 100644
index 000000000000..d3752eb40748
--- /dev/null
+++ b/test/Transforms/DeadArgElim/comdat.ll
@@ -0,0 +1,14 @@
+; RUN: opt -S < %s -deadargelim | FileCheck %s
+
+$f = comdat any
+
+define void @f() comdat {
+ call void @g(i32 0)
+ ret void
+}
+
+define internal void @g(i32 %dead) comdat($f) {
+ ret void
+}
+
+; CHECK: define internal void @g() comdat($f) {
diff --git a/test/Transforms/DeadArgElim/dbginfo.ll b/test/Transforms/DeadArgElim/dbginfo.ll
index a19d4b1fd1ab..47c952d883d4 100644
--- a/test/Transforms/DeadArgElim/dbginfo.ll
+++ b/test/Transforms/DeadArgElim/dbginfo.ll
@@ -48,15 +48,14 @@ attributes #2 = { nounwind readnone }
!llvm.module.flags = !{!12, !13}
!llvm.ident = !{!14}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
!2 = !{}
-!3 = !{!4, !8}
-!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "f2", linkageName: "_Z2f2v", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "dbg.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
-!8 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
+!8 = distinct !DISubprogram(name: "f1", linkageName: "_ZL2f1iz", line: 1, isLocal: true, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !9, variables: !2)
!9 = !DISubroutineType(types: !10)
!10 = !{null, !11, null}
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/DeadArgElim/funclet.ll b/test/Transforms/DeadArgElim/funclet.ll
new file mode 100644
index 000000000000..36b0d3aa626d
--- /dev/null
+++ b/test/Transforms/DeadArgElim/funclet.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -deadargelim < %s | FileCheck %s
+target triple = "x86_64-pc-windows-msvc"
+
+define internal void @callee(i8*) {
+entry:
+ call void @thunk()
+ ret void
+}
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @thunk()
+ to label %good1 unwind label %bad1
+
+good1: ; preds = %entry-block
+ ret void
+
+bad1: ; preds = %entry-block
+ %pad1 = cleanuppad within none []
+ call void @callee(i8* null) [ "funclet"(token %pad1) ]
+ cleanupret from %pad1 unwind to caller
+}
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[pad:.*]] = cleanuppad within none []
+; CHECK-NEXT: call void @callee() [ "funclet"(token %[[pad]]) ]
+
+declare void @thunk()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/DeadArgElim/returned.ll b/test/Transforms/DeadArgElim/returned.ll
index cbee026f9a37..f9d649498e4b 100644
--- a/test/Transforms/DeadArgElim/returned.ll
+++ b/test/Transforms/DeadArgElim/returned.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -deadargelim -S | FileCheck %s
+; RUN: opt < %s -passes=deadargelim -S | FileCheck %s
%Ty = type { i32, i32 }
diff --git a/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll b/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
new file mode 100644
index 000000000000..0bcd8516acd2
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/OverwriteStoreBegin.ll
@@ -0,0 +1,90 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+define void @write4to7(i32* nocapture %p) {
+; CHECK-LABEL: @write4to7(
+entry:
+ %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+ %p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 4
+; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+ %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
+ store i32 1, i32* %arrayidx1, align 4
+ ret void
+}
+
+define void @write0to3(i32* nocapture %p) {
+; CHECK-LABEL: @write0to3(
+entry:
+ %p3 = bitcast i32* %p to i8*
+; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 4
+; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+ store i32 1, i32* %p, align 4
+ ret void
+}
+
+define void @write0to7(i32* nocapture %p) {
+; CHECK-LABEL: @write0to7(
+entry:
+ %p3 = bitcast i32* %p to i8*
+; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 8
+; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+ %p4 = bitcast i32* %p to i64*
+ store i64 1, i64* %p4, align 8
+ ret void
+}
+
+define void @write0to7_2(i32* nocapture %p) {
+; CHECK-LABEL: @write0to7_2(
+entry:
+ %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+ %p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: [[GEP:%[0-9]+]] = getelementptr inbounds i8, i8* %p3, i64 4
+; CHECK: call void @llvm.memset.p0i8.i64(i8* [[GEP]], i8 0, i64 24, i32 4, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 28, i32 4, i1 false)
+ %p4 = bitcast i32* %p to i64*
+ store i64 1, i64* %p4, align 8
+ ret void
+}
+
+; We do not trim the beginning of the eariler write if the alignment of the
+; start pointer is changed.
+define void @dontwrite0to3_align8(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite0to3_align8(
+entry:
+ %p3 = bitcast i32* %p to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 8, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 8, i1 false)
+ store i32 1, i32* %p, align 4
+ ret void
+}
+
+define void @dontwrite0to1(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite0to1(
+entry:
+ %p3 = bitcast i32* %p to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+ %p4 = bitcast i32* %p to i16*
+ store i16 1, i16* %p4, align 4
+ ret void
+}
+
+define void @dontwrite2to9(i32* nocapture %p) {
+; CHECK-LABEL: @dontwrite2to9(
+entry:
+ %arrayidx0 = getelementptr inbounds i32, i32* %p, i64 1
+ %p3 = bitcast i32* %arrayidx0 to i8*
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+ call void @llvm.memset.p0i8.i64(i8* %p3, i8 0, i64 32, i32 4, i1 false)
+ %p4 = bitcast i32* %p to i16*
+ %arrayidx2 = getelementptr inbounds i16, i16* %p4, i64 1
+ %p5 = bitcast i16* %arrayidx2 to i64*
+ store i64 1, i64* %p5, align 8
+ ret void
+}
+
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
+
diff --git a/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll b/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll
new file mode 100644
index 000000000000..cb015b2d1ce9
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/combined-partial-overwrites.ll
@@ -0,0 +1,239 @@
+; RUN: opt -S -dse < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+%"struct.std::complex" = type { { float, float } }
+
+define void @_Z4testSt7complexIfE(%"struct.std::complex"* noalias nocapture sret %agg.result, i64 %c.coerce) {
+entry:
+; CHECK-LABEL: @_Z4testSt7complexIfE
+
+ %ref.tmp = alloca i64, align 8
+ %tmpcast = bitcast i64* %ref.tmp to %"struct.std::complex"*
+ %c.sroa.0.0.extract.shift = lshr i64 %c.coerce, 32
+ %c.sroa.0.0.extract.trunc = trunc i64 %c.sroa.0.0.extract.shift to i32
+ %0 = bitcast i32 %c.sroa.0.0.extract.trunc to float
+ %c.sroa.2.0.extract.trunc = trunc i64 %c.coerce to i32
+ %1 = bitcast i32 %c.sroa.2.0.extract.trunc to float
+ call void @_Z3barSt7complexIfE(%"struct.std::complex"* nonnull sret %tmpcast, i64 %c.coerce)
+ %2 = bitcast %"struct.std::complex"* %agg.result to i64*
+ %3 = load i64, i64* %ref.tmp, align 8
+ store i64 %3, i64* %2, align 4
+; CHECK-NOT: store i64
+
+ %_M_value.realp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 0
+ %4 = lshr i64 %3, 32
+ %5 = trunc i64 %4 to i32
+ %6 = bitcast i32 %5 to float
+ %_M_value.imagp.i.i = getelementptr inbounds %"struct.std::complex", %"struct.std::complex"* %agg.result, i64 0, i32 0, i32 1
+ %7 = trunc i64 %3 to i32
+ %8 = bitcast i32 %7 to float
+ %mul_ad.i.i = fmul fast float %6, %1
+ %mul_bc.i.i = fmul fast float %8, %0
+ %mul_i.i.i = fadd fast float %mul_ad.i.i, %mul_bc.i.i
+ %mul_ac.i.i = fmul fast float %6, %0
+ %mul_bd.i.i = fmul fast float %8, %1
+ %mul_r.i.i = fsub fast float %mul_ac.i.i, %mul_bd.i.i
+ store float %mul_r.i.i, float* %_M_value.realp.i.i, align 4
+ store float %mul_i.i.i, float* %_M_value.imagp.i.i, align 4
+ ret void
+; CHECK: ret void
+}
+
+declare void @_Z3barSt7complexIfE(%"struct.std::complex"* sret, i64)
+
+define void @test1(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test1
+
+ store i32 5, i32* %ptr
+ %bptr = bitcast i32* %ptr to i8*
+ store i8 7, i8* %bptr
+ %wptr = bitcast i32* %ptr to i16*
+ store i16 -30062, i16* %wptr
+ %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+ store i8 25, i8* %bptr2
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+ store i8 47, i8* %bptr3
+ %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+ %wptrp = bitcast i8* %bptr1 to i16*
+ store i16 2020, i16* %wptrp, align 1
+ ret void
+
+; CHECK-NOT: store i32 5, i32* %ptr
+; CHECK-NOT: store i8 7, i8* %bptr
+; CHECK: store i16 -30062, i16* %wptr
+; CHECK-NOT: store i8 25, i8* %bptr2
+; CHECK: store i8 47, i8* %bptr3
+; CHECK: store i16 2020, i16* %wptrp, align 1
+
+; CHECK: ret void
+}
+
+define void @test2(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test2
+
+ store i32 5, i32* %ptr
+
+ %bptr = bitcast i32* %ptr to i8*
+ %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+ %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+ %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+ %wptr = bitcast i8* %bptr to i16*
+ %wptrm1 = bitcast i8* %bptrm1 to i16*
+ %wptr1 = bitcast i8* %bptr1 to i16*
+ %wptr2 = bitcast i8* %bptr2 to i16*
+ %wptr3 = bitcast i8* %bptr3 to i16*
+
+ store i16 1456, i16* %wptrm1, align 1
+ store i16 1346, i16* %wptr, align 1
+ store i16 1756, i16* %wptr1, align 1
+ store i16 1126, i16* %wptr2, align 1
+ store i16 5656, i16* %wptr3, align 1
+
+; CHECK-NOT: store i32 5, i32* %ptr
+
+; CHECK: store i16 1456, i16* %wptrm1, align 1
+; CHECK: store i16 1346, i16* %wptr, align 1
+; CHECK: store i16 1756, i16* %wptr1, align 1
+; CHECK: store i16 1126, i16* %wptr2, align 1
+; CHECK: store i16 5656, i16* %wptr3, align 1
+
+ ret void
+
+; CHECK: ret void
+}
+
+define signext i8 @test3(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test3
+
+ store i32 5, i32* %ptr
+
+ %bptr = bitcast i32* %ptr to i8*
+ %bptrm1 = getelementptr inbounds i8, i8* %bptr, i64 -1
+ %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+ %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+ %wptr = bitcast i8* %bptr to i16*
+ %wptrm1 = bitcast i8* %bptrm1 to i16*
+ %wptr1 = bitcast i8* %bptr1 to i16*
+ %wptr2 = bitcast i8* %bptr2 to i16*
+ %wptr3 = bitcast i8* %bptr3 to i16*
+
+ %v = load i8, i8* %bptr, align 1
+ store i16 1456, i16* %wptrm1, align 1
+ store i16 1346, i16* %wptr, align 1
+ store i16 1756, i16* %wptr1, align 1
+ store i16 1126, i16* %wptr2, align 1
+ store i16 5656, i16* %wptr3, align 1
+
+; CHECK: store i32 5, i32* %ptr
+
+ ret i8 %v
+
+; CHECK: ret i8 %v
+}
+
+%struct.foostruct = type {
+i32 (i8*, i8**, i32, i8, i8*)*,
+i32 (i8*, i8**, i32, i8, i8*)*,
+i32 (i8*, i8**, i32, i8, i8*)*,
+i32 (i8*, i8**, i32, i8, i8*)*,
+void (i8*, i32, i32)*
+}
+declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1)
+declare void @goFunc(%struct.foostruct*)
+declare i32 @fa(i8*, i8**, i32, i8, i8*)
+
+define void @test4() {
+entry:
+; CHECK-LABEL: @test4
+
+ %bang = alloca %struct.foostruct, align 8
+ %v1 = bitcast %struct.foostruct* %bang to i8*
+ call void @llvm.memset.p0i8.i64(i8* %v1, i8 0, i64 40, i32 8, i1 false)
+ %v2 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 0
+ store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v2, align 8
+ %v3 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 1
+ store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v3, align 8
+ %v4 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 2
+ store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v4, align 8
+ %v5 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 3
+ store i32 (i8*, i8**, i32, i8, i8*)* @fa, i32 (i8*, i8**, i32, i8, i8*)** %v5, align 8
+ %v6 = getelementptr inbounds %struct.foostruct, %struct.foostruct* %bang, i64 0, i32 4
+ store void (i8*, i32, i32)* null, void (i8*, i32, i32)** %v6, align 8
+ call void @goFunc(%struct.foostruct* %bang)
+ ret void
+
+; CHECK-NOT: memset
+; CHECK: ret void
+}
+
+define signext i8 @test5(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test5
+
+ store i32 0, i32* %ptr
+
+ %bptr = bitcast i32* %ptr to i8*
+ %bptr1 = getelementptr inbounds i8, i8* %bptr, i64 1
+ %bptr2 = getelementptr inbounds i8, i8* %bptr, i64 2
+ %bptr3 = getelementptr inbounds i8, i8* %bptr, i64 3
+
+ %wptr = bitcast i8* %bptr to i16*
+ %wptr1 = bitcast i8* %bptr1 to i16*
+ %wptr2 = bitcast i8* %bptr2 to i16*
+
+ store i16 65535, i16* %wptr2, align 1
+ store i16 1456, i16* %wptr1, align 1
+ store i16 1346, i16* %wptr, align 1
+
+; CHECK-NOT: store i32 0, i32* %ptr
+
+ ret i8 0
+}
+
+define signext i8 @test6(i32 *%ptr) {
+entry:
+; CHECK-LABEL: @test6
+
+ store i32 0, i32* %ptr
+
+ %bptr = bitcast i32* %ptr to i16*
+ %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0
+ %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1
+
+ store i16 1456, i16* %bptr2, align 1
+ store i16 65535, i16* %bptr1, align 1
+
+; CHECK-NOT: store i32 0, i32* %ptr
+
+ ret i8 0
+}
+
+define signext i8 @test7(i64 *%ptr) {
+entry:
+; CHECK-LABEL: @test7
+
+ store i64 0, i64* %ptr
+
+ %bptr = bitcast i64* %ptr to i16*
+ %bptr1 = getelementptr inbounds i16, i16* %bptr, i64 0
+ %bptr2 = getelementptr inbounds i16, i16* %bptr, i64 1
+ %bptr3 = getelementptr inbounds i16, i16* %bptr, i64 2
+ %bptr4 = getelementptr inbounds i16, i16* %bptr, i64 3
+
+ store i16 1346, i16* %bptr1, align 1
+ store i16 1756, i16* %bptr3, align 1
+ store i16 1456, i16* %bptr2, align 1
+ store i16 5656, i16* %bptr4, align 1
+
+; CHECK-NOT: store i64 0, i64* %ptr
+
+ ret i8 0
+}
diff --git a/test/Transforms/DeadStoreElimination/fence.ll b/test/Transforms/DeadStoreElimination/fence.ll
new file mode 100644
index 000000000000..667f94287750
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/fence.ll
@@ -0,0 +1,96 @@
+; RUN: opt -S -basicaa -dse < %s | FileCheck %s
+
+; We conservative choose to prevent dead store elimination
+; across release or stronger fences. It's not required
+; (since the must still be a race on %addd.i), but
+; it is conservatively correct. A legal optimization
+; could hoist the second store above the fence, and then
+; DSE one of them.
+define void @test1(i32* %addr.i) {
+; CHECK-LABEL: @test1
+; CHECK: store i32 5
+; CHECK: fence
+; CHECK: store i32 5
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
+
+; Same as previous, but with different values. If we ever optimize
+; this more aggressively, this allows us to check that the correct
+; store is retained (the 'i32 1' store in this case)
+define void @test1b(i32* %addr.i) {
+; CHECK-LABEL: @test1b
+; CHECK: store i32 42
+; CHECK: fence release
+; CHECK: store i32 1
+; CHECK: ret
+ store i32 42, i32* %addr.i, align 4
+ fence release
+ store i32 1, i32* %addr.i, align 4
+ ret void
+}
+
+; We *could* DSE across this fence, but don't. No other thread can
+; observe the order of the acquire fence and the store.
+define void @test2(i32* %addr.i) {
+; CHECK-LABEL: @test2
+; CHECK: store
+; CHECK: fence
+; CHECK: store
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence acquire
+ store i32 5, i32* %addr.i, align 4
+ ret void
+}
+
+; We DSE stack alloc'ed and byval locations, in the presence of fences.
+; Fence does not make an otherwise thread local store visible.
+; Right now the DSE in presence of fence is only done in end blocks (with no successors),
+; but the same logic applies to other basic blocks as well.
+; The store to %addr.i can be removed since it is a byval attribute
+define void @test3(i32* byval %addr.i) {
+; CHECK-LABEL: @test3
+; CHECK-NOT: store
+; CHECK: fence
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ ret void
+}
+
+declare void @foo(i8* nocapture %p)
+
+declare noalias i8* @malloc(i32)
+
+; DSE of stores in locations allocated through library calls.
+define void @test_nocapture() {
+; CHECK-LABEL: @test_nocapture
+; CHECK: malloc
+; CHECK: foo
+; CHECK-NOT: store
+; CHECK: fence
+ %m = call i8* @malloc(i32 24)
+ call void @foo(i8* %m)
+ store i8 4, i8* %m
+ fence release
+ ret void
+}
+
+
+; This is a full fence, but it does not make a thread local store visible.
+; We can DSE the store in presence of the fence.
+define void @fence_seq_cst() {
+; CHECK-LABEL: @fence_seq_cst
+; CHECK-NEXT: fence seq_cst
+; CHECK-NEXT: ret void
+ %P1 = alloca i32
+ store i32 0, i32* %P1, align 4
+ fence seq_cst
+ store i32 4, i32* %P1, align 4
+ ret void
+}
+
diff --git a/test/Transforms/DeadStoreElimination/inst-limits.ll b/test/Transforms/DeadStoreElimination/inst-limits.ll
index 5848ab89bc88..cf520d7e8f6e 100644
--- a/test/Transforms/DeadStoreElimination/inst-limits.ll
+++ b/test/Transforms/DeadStoreElimination/inst-limits.ll
@@ -118,7 +118,7 @@ entry:
; Insert a meaningless dbg.value intrinsic; it should have no
; effect on the working of DSE in any way.
- call void @llvm.dbg.value(metadata i32* undef, i64 0, metadata !10, metadata !DIExpression()), !dbg !DILocation(scope: !4)
+ call void @llvm.dbg.value(metadata i32 undef, i64 0, metadata !10, metadata !DIExpression()), !dbg !DILocation(scope: !4)
; CHECK: store i32 -1, i32* @x, align 4
store i32 -1, i32* @x, align 4
@@ -245,11 +245,10 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!11, !13}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.4", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "/home/tmp")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "test_within_limit", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "test.c", directory: "/home/tmp")
!6 = !DISubroutineType(types: !7)
!7 = !{!8}
diff --git a/test/Transforms/DeadStoreElimination/operand-bundles.ll b/test/Transforms/DeadStoreElimination/operand-bundles.ll
new file mode 100644
index 000000000000..d71b9673ed1d
--- /dev/null
+++ b/test/Transforms/DeadStoreElimination/operand-bundles.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+
+declare noalias i8* @malloc(i64) "malloc-like"
+
+declare void @foo()
+declare void @bar(i8*)
+
+define void @test() {
+ %obj = call i8* @malloc(i64 8)
+ store i8 0, i8* %obj
+ ; don't remove store. %obj should be treated like it will be read by the @foo.
+ ; CHECK: store i8 0, i8* %obj
+ call void @foo() ["deopt" (i8* %obj)]
+ ret void
+}
+
+define void @test1() {
+ %obj = call i8* @malloc(i64 8)
+ store i8 0, i8* %obj
+ ; CHECK: store i8 0, i8* %obj
+ call void @bar(i8* nocapture %obj)
+ ret void
+}
+
+define void @test2() {
+ %obj = call i8* @malloc(i64 8)
+ store i8 0, i8* %obj
+ ; CHECK-NOT: store i8 0, i8* %obj
+ call void @foo()
+ ret void
+}
+
+define void @test3() {
+ ; CHECK-LABEL: @test3(
+ %s = alloca i64
+ ; Verify that this first store is not considered killed by the second one
+ ; since it could be observed from the deopt continuation.
+ ; CHECK: store i64 1, i64* %s
+ store i64 1, i64* %s
+ call void @foo() [ "deopt"(i64* %s) ]
+ store i64 0, i64* %s
+ ret void
+}
diff --git a/test/Transforms/DeadStoreElimination/simple.ll b/test/Transforms/DeadStoreElimination/simple.ll
index 4f6221db2454..193add112f64 100644
--- a/test/Transforms/DeadStoreElimination/simple.ll
+++ b/test/Transforms/DeadStoreElimination/simple.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -basicaa -dse -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=dse -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/EarlyCSE/basic.ll b/test/Transforms/EarlyCSE/basic.ll
index 8c9b74b4d0e1..fa1a7059db95 100644
--- a/test/Transforms/EarlyCSE/basic.ll
+++ b/test/Transforms/EarlyCSE/basic.ll
@@ -26,10 +26,9 @@ define void @test1(i8 %V, i32 *%P) {
; CHECK-NEXT: store volatile i32 %E
; CHECK-NEXT: store volatile i32 %E
- %G = add nuw i32 %C, %C ;; not a CSE with E
+ %G = add nuw i32 %C, %C
store volatile i32 %G, i32* %P
- ; CHECK-NEXT: %G = add nuw i32 %C, %C
- ; CHECK-NEXT: store volatile i32 %G
+ ; CHECK-NEXT: store volatile i32 %E
ret void
}
diff --git a/test/Transforms/EarlyCSE/flags.ll b/test/Transforms/EarlyCSE/flags.ll
new file mode 100644
index 000000000000..d189012e4be8
--- /dev/null
+++ b/test/Transforms/EarlyCSE/flags.ll
@@ -0,0 +1,18 @@
+; RUN: opt -early-cse -S < %s | FileCheck %s
+
+declare void @use(i1)
+
+define void @test1(float %x, float %y) {
+entry:
+ %cmp1 = fcmp nnan oeq float %y, %x
+ %cmp2 = fcmp oeq float %x, %y
+ call void @use(i1 %cmp1)
+ call void @use(i1 %cmp2)
+ ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[cmp:.*]] = fcmp oeq float %y, %x
+; CHECK-NEXT: call void @use(i1 %[[cmp]])
+; CHECK-NEXT: call void @use(i1 %[[cmp]])
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/EarlyCSE/guards.ll b/test/Transforms/EarlyCSE/guards.ll
new file mode 100644
index 000000000000..2cc4753969b5
--- /dev/null
+++ b/test/Transforms/EarlyCSE/guards.ll
@@ -0,0 +1,181 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+define i32 @test0(i32* %ptr, i1 %cond) {
+; We can do store to load forwarding over a guard, since it does not
+; clobber memory
+
+; CHECK-LABEL: @test0(
+; CHECK-NEXT: store i32 40, i32* %ptr
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+; CHECK-NEXT: ret i32 40
+
+ store i32 40, i32* %ptr
+ call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ %rval = load i32, i32* %ptr
+ ret i32 %rval
+}
+
+define i32 @test1(i32* %val, i1 %cond) {
+; We can CSE loads over a guard, since it does not clobber memory
+
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %val0 = load i32, i32* %val
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+; CHECK-NEXT: ret i32 0
+
+ %val0 = load i32, i32* %val
+ call void(i1,...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ %val1 = load i32, i32* %val
+ %rval = sub i32 %val0, %val1
+ ret i32 %rval
+}
+
+define i32 @test2() {
+; Guards on "true" get removed
+
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i32 0
+ call void(i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
+ ret i32 0
+}
+
+define i32 @test3(i32 %val) {
+; After a guard has executed the condition it was guarding is known to
+; be true.
+
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %cond0 = icmp slt i32 %val, 40
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond0) [ "deopt"() ]
+; CHECK-NEXT: ret i32 -1
+
+ %cond0 = icmp slt i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond0) [ "deopt"() ]
+ %cond1 = icmp slt i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+
+ %cond2 = icmp slt i32 %val, 40
+ %rval = sext i1 %cond2 to i32
+ ret i32 %rval
+}
+
+define i32 @test3.unhandled(i32 %val) {
+; After a guard has executed the condition it was guarding is known to
+; be true.
+
+; CHECK-LABEL: @test3.unhandled(
+; CHECK-NEXT: %cond0 = icmp slt i32 %val, 40
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond0) [ "deopt"() ]
+; CHECK-NEXT: %cond1 = icmp sge i32 %val, 40
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+; CHECK-NEXT: ret i32 0
+
+; Demonstrates a case we do not yet handle (it is legal to fold %cond2
+; to false)
+ %cond0 = icmp slt i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond0) [ "deopt"() ]
+ %cond1 = icmp sge i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+ ret i32 0
+}
+
+define i32 @test4(i32 %val, i1 %c) {
+; Same as test3, but with some control flow involved.
+
+; CHECK-LABEL: @test4(
+; CHECK: entry:
+; CHECK-NEXT: %cond0 = icmp slt i32 %val, 40
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond0
+; CHECK-NEXT: br label %bb0
+
+; CHECK: bb0:
+; CHECK-NEXT: %cond2 = icmp ult i32 %val, 200
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond2
+; CHECK-NEXT: br i1 %c, label %left, label %right
+
+; CHECK: left:
+; CHECK-NEXT: ret i32 0
+
+; CHECK: right:
+; CHECK-NEXT: ret i32 20
+
+entry:
+ %cond0 = icmp slt i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond0) [ "deopt"() ]
+ %cond1 = icmp slt i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+ br label %bb0
+
+bb0:
+ %cond2 = icmp ult i32 %val, 200
+ call void(i1,...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+ br i1 %c, label %left, label %right
+
+left:
+ %cond3 = icmp ult i32 %val, 200
+ call void(i1,...) @llvm.experimental.guard(i1 %cond3) [ "deopt"() ]
+ ret i32 0
+
+right:
+ ret i32 20
+}
+
+define i32 @test5(i32 %val, i1 %c) {
+; Same as test4, but the %left block has mutliple predecessors.
+
+; CHECK-LABEL: @test5(
+
+; CHECK: entry:
+; CHECK-NEXT: %cond0 = icmp slt i32 %val, 40
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond0
+; CHECK-NEXT: br label %bb0
+
+; CHECK: bb0:
+; CHECK-NEXT: %cond2 = icmp ult i32 %val, 200
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %cond2
+; CHECK-NEXT: br i1 %c, label %left, label %right
+
+; CHECK: left:
+; CHECK-NEXT: br label %right
+
+; CHECK: right:
+; CHECK-NEXT: br label %left
+
+entry:
+ %cond0 = icmp slt i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond0) [ "deopt"() ]
+ %cond1 = icmp slt i32 %val, 40
+ call void(i1,...) @llvm.experimental.guard(i1 %cond1) [ "deopt"() ]
+ br label %bb0
+
+bb0:
+ %cond2 = icmp ult i32 %val, 200
+ call void(i1,...) @llvm.experimental.guard(i1 %cond2) [ "deopt"() ]
+ br i1 %c, label %left, label %right
+
+left:
+ %cond3 = icmp ult i32 %val, 200
+ call void(i1,...) @llvm.experimental.guard(i1 %cond3) [ "deopt"() ]
+ br label %right
+
+right:
+ br label %left
+}
+
+define void @test6(i1 %c, i32* %ptr) {
+; Check that we do not DSE over calls to @llvm.experimental.guard.
+; Guard intrinsics do _read_ memory, so th call to guard below needs
+; to see the store of 500 to %ptr
+
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: store i32 500, i32* %ptr
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+; CHECK-NEXT: store i32 600, i32* %ptr
+
+
+ store i32 500, i32* %ptr
+ call void(i1,...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+ store i32 600, i32* %ptr
+ ret void
+}
diff --git a/test/Transforms/EarlyCSE/invariant-loads.ll b/test/Transforms/EarlyCSE/invariant-loads.ll
new file mode 100644
index 000000000000..04c7dd1372d9
--- /dev/null
+++ b/test/Transforms/EarlyCSE/invariant-loads.ll
@@ -0,0 +1,99 @@
+; RUN: opt -S -early-cse < %s | FileCheck %s
+
+declare void @clobber_and_use(i32)
+
+define void @f_0(i32* %ptr) {
+; CHECK-LABEL: @f_0(
+; CHECK: %val0 = load i32, i32* %ptr, !invariant.load !0
+; CHECK: call void @clobber_and_use(i32 %val0)
+; CHECK: call void @clobber_and_use(i32 %val0)
+; CHECK: call void @clobber_and_use(i32 %val0)
+; CHECK: ret void
+
+ %val0 = load i32, i32* %ptr, !invariant.load !{}
+ call void @clobber_and_use(i32 %val0)
+ %val1 = load i32, i32* %ptr, !invariant.load !{}
+ call void @clobber_and_use(i32 %val1)
+ %val2 = load i32, i32* %ptr, !invariant.load !{}
+ call void @clobber_and_use(i32 %val2)
+ ret void
+}
+
+define void @f_1(i32* %ptr) {
+; We can forward invariant loads to non-invariant loads, since once an
+; invariant load has executed, the location loaded from is known to be
+; unchanging.
+
+; CHECK-LABEL: @f_1(
+; CHECK: %val0 = load i32, i32* %ptr, !invariant.load !0
+; CHECK: call void @clobber_and_use(i32 %val0)
+; CHECK: call void @clobber_and_use(i32 %val0)
+
+ %val0 = load i32, i32* %ptr, !invariant.load !{}
+ call void @clobber_and_use(i32 %val0)
+ %val1 = load i32, i32* %ptr
+ call void @clobber_and_use(i32 %val1)
+ ret void
+}
+
+define void @f_2(i32* %ptr) {
+; Negative test -- we can't forward a non-invariant load into an
+; invariant load.
+
+; CHECK-LABEL: @f_2(
+; CHECK: %val0 = load i32, i32* %ptr
+; CHECK: call void @clobber_and_use(i32 %val0)
+; CHECK: %val1 = load i32, i32* %ptr, !invariant.load !0
+; CHECK: call void @clobber_and_use(i32 %val1)
+
+ %val0 = load i32, i32* %ptr
+ call void @clobber_and_use(i32 %val0)
+ %val1 = load i32, i32* %ptr, !invariant.load !{}
+ call void @clobber_and_use(i32 %val1)
+ ret void
+}
+
+define void @f_3(i1 %cond, i32* %ptr) {
+; CHECK-LABEL: @f_3(
+ %val0 = load i32, i32* %ptr, !invariant.load !{}
+ call void @clobber_and_use(i32 %val0)
+ br i1 %cond, label %left, label %right
+
+; CHECK: %val0 = load i32, i32* %ptr, !invariant.load !0
+; CHECK: left:
+; CHECK-NEXT: call void @clobber_and_use(i32 %val0)
+
+left:
+ %val1 = load i32, i32* %ptr
+ call void @clobber_and_use(i32 %val1)
+ ret void
+
+right:
+ ret void
+}
+
+define void @f_4(i1 %cond, i32* %ptr) {
+; Negative test -- can't forward %val0 to %va1 because that'll break
+; def-dominates-use.
+
+; CHECK-LABEL: @f_4(
+ br i1 %cond, label %left, label %merge
+
+left:
+; CHECK: left:
+; CHECK-NEXT: %val0 = load i32, i32* %ptr, !invariant.load !
+; CHECK-NEXT: call void @clobber_and_use(i32 %val0)
+
+ %val0 = load i32, i32* %ptr, !invariant.load !{}
+ call void @clobber_and_use(i32 %val0)
+ br label %merge
+
+merge:
+; CHECK: merge:
+; CHECK-NEXT: %val1 = load i32, i32* %ptr
+; CHECK-NEXT: call void @clobber_and_use(i32 %val1)
+
+ %val1 = load i32, i32* %ptr
+ call void @clobber_and_use(i32 %val1)
+ ret void
+}
diff --git a/test/Transforms/EliminateAvailableExternally/visibility.ll b/test/Transforms/EliminateAvailableExternally/visibility.ll
index 9966fcf30e85..f24b8ac65e16 100644
--- a/test/Transforms/EliminateAvailableExternally/visibility.ll
+++ b/test/Transforms/EliminateAvailableExternally/visibility.ll
@@ -1,4 +1,4 @@
-; RUN: opt -elim-avail-extern -S < %s | FileCheck %s
+; RUN: opt -passes=elim-avail-extern -S < %s | FileCheck %s
; CHECK: declare hidden void @f()
define available_externally hidden void @f() {
diff --git a/test/Transforms/Float2Int/basic.ll b/test/Transforms/Float2Int/basic.ll
index 7f04a594dc80..573714dbfb27 100644
--- a/test/Transforms/Float2Int/basic.ll
+++ b/test/Transforms/Float2Int/basic.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -float2int -S | FileCheck %s
+; RUN: opt < %s -passes='float2int' -S | FileCheck %s
;
; Positive tests
diff --git a/test/Transforms/FunctionAttrs/assume.ll b/test/Transforms/FunctionAttrs/assume.ll
new file mode 100644
index 000000000000..58200622eabe
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/assume.ll
@@ -0,0 +1,4 @@
+; RUN: opt -S -o - -functionattrs %s | FileCheck %s
+
+; CHECK-NOT: readnone
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/FunctionAttrs/comdat-ipo.ll b/test/Transforms/FunctionAttrs/comdat-ipo.ll
new file mode 100644
index 000000000000..e82d2fb92155
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/comdat-ipo.ll
@@ -0,0 +1,16 @@
+; RUN: opt < %s -functionattrs -S | FileCheck %s
+
+; See PR26774
+
+; CHECK-LABEL: define void @bar(i8* readonly) {
+define void @bar(i8* readonly) {
+ call void @foo(i8* %0)
+ ret void
+}
+
+
+; CHECK-LABEL: define linkonce_odr void @foo(i8* readonly) {
+define linkonce_odr void @foo(i8* readonly) {
+ call void @bar(i8* %0)
+ ret void
+}
diff --git a/test/Transforms/FunctionAttrs/convergent.ll b/test/Transforms/FunctionAttrs/convergent.ll
new file mode 100644
index 000000000000..37886b82b872
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/convergent.ll
@@ -0,0 +1,106 @@
+; RUN: opt -functionattrs -S < %s | FileCheck %s
+
+; CHECK: Function Attrs
+; CHECK-NOT: convergent
+; CHECK-NEXT: define i32 @nonleaf()
+define i32 @nonleaf() convergent {
+ %a = call i32 @leaf()
+ ret i32 %a
+}
+
+; CHECK: Function Attrs
+; CHECK-NOT: convergent
+; CHECK-NEXT: define i32 @leaf()
+define i32 @leaf() convergent {
+ ret i32 0
+}
+
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: declare i32 @k()
+declare i32 @k() convergent
+
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: define i32 @extern()
+define i32 @extern() convergent {
+ %a = call i32 @k() convergent
+ ret i32 %a
+}
+
+; Convergent should not be removed on the function here. Although the call is
+; not explicitly convergent, it picks up the convergent attr from the callee.
+;
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: define i32 @extern_non_convergent_call()
+define i32 @extern_non_convergent_call() convergent {
+ %a = call i32 @k()
+ ret i32 %a
+}
+
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: define i32 @indirect_convergent_call(
+define i32 @indirect_convergent_call(i32 ()* %f) convergent {
+ %a = call i32 %f() convergent
+ ret i32 %a
+}
+; Give indirect_non_convergent_call the norecurse attribute so we get a
+; "Function Attrs" comment in the output.
+;
+; CHECK: Function Attrs
+; CHECK-NOT: convergent
+; CHECK-NEXT: define i32 @indirect_non_convergent_call(
+define i32 @indirect_non_convergent_call(i32 ()* %f) convergent norecurse {
+ %a = call i32 %f()
+ ret i32 %a
+}
+
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: declare void @llvm.nvvm.barrier0()
+declare void @llvm.nvvm.barrier0() convergent
+
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: define i32 @intrinsic()
+define i32 @intrinsic() convergent {
+ ; Implicitly convergent, because the intrinsic is convergent.
+ call void @llvm.nvvm.barrier0()
+ ret i32 0
+}
+
+; CHECK: Function Attrs
+; CHECK-NOT: convergent
+; CHECK-NEXT: define i32 @recursive1()
+define i32 @recursive1() convergent {
+ %a = call i32 @recursive2() convergent
+ ret i32 %a
+}
+
+; CHECK: Function Attrs
+; CHECK-NOT: convergent
+; CHECK-NEXT: define i32 @recursive2()
+define i32 @recursive2() convergent {
+ %a = call i32 @recursive1() convergent
+ ret i32 %a
+}
+
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: define i32 @noopt()
+define i32 @noopt() convergent optnone noinline {
+ %a = call i32 @noopt_friend() convergent
+ ret i32 0
+}
+
+; A function which is mutually-recursive with a convergent, optnone function
+; shouldn't have its convergent attribute stripped.
+; CHECK: Function Attrs
+; CHECK-SAME: convergent
+; CHECK-NEXT: define i32 @noopt_friend()
+define i32 @noopt_friend() convergent {
+ %a = call i32 @noopt()
+ ret i32 0
+}
diff --git a/test/Transforms/FunctionAttrs/nocapture.ll b/test/Transforms/FunctionAttrs/nocapture.ll
index 4057b2a86e3b..020d8bcd4c76 100644
--- a/test/Transforms/FunctionAttrs/nocapture.ll
+++ b/test/Transforms/FunctionAttrs/nocapture.ll
@@ -193,3 +193,28 @@ define void @test6_2(i8* %x6_2, i8* %y6_2, i8* %z6_2) {
ret void
}
+; CHECK: define void @test_cmpxchg(i32* nocapture %p)
+define void @test_cmpxchg(i32* %p) {
+ cmpxchg i32* %p, i32 0, i32 1 acquire monotonic
+ ret void
+}
+
+; CHECK: define void @test_cmpxchg_ptr(i32** nocapture %p, i32* %q)
+define void @test_cmpxchg_ptr(i32** %p, i32* %q) {
+ cmpxchg i32** %p, i32* null, i32* %q acquire monotonic
+ ret void
+}
+
+; CHECK: define void @test_atomicrmw(i32* nocapture %p)
+define void @test_atomicrmw(i32* %p) {
+ atomicrmw add i32* %p, i32 1 seq_cst
+ ret void
+}
+
+; CHECK: define void @test_volatile(i32* %x)
+define void @test_volatile(i32* %x) {
+entry:
+ %gep = getelementptr i32, i32* %x, i64 1
+ store volatile i32 0, i32* %gep, align 4
+ ret void
+}
diff --git a/test/Transforms/FunctionAttrs/norecurse.ll b/test/Transforms/FunctionAttrs/norecurse.ll
index d5a2d8208402..42de757fbad2 100644
--- a/test/Transforms/FunctionAttrs/norecurse.ll
+++ b/test/Transforms/FunctionAttrs/norecurse.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -basicaa -functionattrs -rpo-functionattrs -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs),rpo-functionattrs' -S | FileCheck %s
; CHECK: define i32 @leaf() #0
define i32 @leaf() {
@@ -29,6 +30,13 @@ define i32 @extern() {
}
declare i32 @k() readnone
+; CHECK: define void @intrinsic(i8* nocapture %dest, i8* nocapture readonly %src, i32 %len) {
+define void @intrinsic(i8* %dest, i8* %src, i32 %len) {
+ call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i32 1, i1 false)
+ ret void
+}
+declare void @llvm.memcpy.p0i8.p0i8.i32(i8*, i8*, i32, i32, i1)
+
; CHECK: define internal i32 @called_by_norecurse() #0
define internal i32 @called_by_norecurse() {
%a = call i32 @k()
diff --git a/test/Transforms/FunctionAttrs/noreturn.ll b/test/Transforms/FunctionAttrs/noreturn.ll
deleted file mode 100644
index 990bea984dfe..000000000000
--- a/test/Transforms/FunctionAttrs/noreturn.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -functionattrs -instcombine -S | FileCheck %s
-
-define void @endless_loop() noreturn nounwind readnone ssp uwtable {
-entry:
- br label %while.body
-
-while.body:
- br label %while.body
-}
-;CHECK-LABEL: @main(
-;CHECK: endless_loop
-;CHECK: ret
-define i32 @main() noreturn nounwind ssp uwtable {
-entry:
- tail call void @endless_loop()
- unreachable
-}
-
diff --git a/test/Transforms/FunctionAttrs/operand-bundles-scc.ll b/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
new file mode 100644
index 000000000000..2502ea719cab
--- /dev/null
+++ b/test/Transforms/FunctionAttrs/operand-bundles-scc.ll
@@ -0,0 +1,13 @@
+; RUN: opt -S -functionattrs < %s | FileCheck %s
+
+define void @f() {
+; CHECK-LABEL: define void @f() {
+ call void @g() [ "unknown"() ]
+ ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g() {
+ call void @f()
+ ret void
+}
diff --git a/test/Transforms/FunctionAttrs/optnone.ll b/test/Transforms/FunctionAttrs/optnone.ll
index 441ff4da65ec..cd08c75d75b4 100644
--- a/test/Transforms/FunctionAttrs/optnone.ll
+++ b/test/Transforms/FunctionAttrs/optnone.ll
@@ -16,11 +16,9 @@ define void @test_optnone(i8* %p) noinline optnone {
declare i8 @strlen(i8*) noinline optnone
; CHECK-LABEL: @strlen
-; CHECK: (i8*) #2
+; CHECK: (i8*) #1
; CHECK-LABEL: attributes #0
; CHECK: = { norecurse readnone }
; CHECK-LABEL: attributes #1
-; CHECK: = { noinline norecurse optnone }
-; CHECK-LABEL: attributes #2
; CHECK: = { noinline optnone }
diff --git a/test/Transforms/FunctionAttrs/readattrs.ll b/test/Transforms/FunctionAttrs/readattrs.ll
index aabdfe8d2005..65861a55fc2d 100644
--- a/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/test/Transforms/FunctionAttrs/readattrs.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -functionattrs -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes='cgscc(function-attrs)' -S | FileCheck %s
@x = global i32 0
declare void @test1_1(i8* %x1_1, i8* readonly %y1_1, ...)
@@ -103,3 +104,11 @@ define <4 x i32> @test12_2(<4 x i32*> %ptrs) {
%res = call <4 x i32> @test12_1(<4 x i32*> %ptrs)
ret <4 x i32> %res
}
+
+; CHECK: define i32 @volatile_load(
+; CHECK-NOT: readonly
+; CHECK: ret
+define i32 @volatile_load(i32* %p) {
+ %load = load volatile i32, i32* %p
+ ret i32 %load
+}
diff --git a/test/Transforms/FunctionImport/Inputs/adjustable_threshold.ll b/test/Transforms/FunctionImport/Inputs/adjustable_threshold.ll
new file mode 100644
index 000000000000..fd4644d264a5
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/adjustable_threshold.ll
@@ -0,0 +1,37 @@
+define void @globalfunc1() {
+entry:
+ call void @trampoline()
+ ret void
+}
+; Adds an artificial level in the call graph to reduce the importing threshold
+define void @trampoline() {
+entry:
+ call void @largefunction()
+ ret void
+}
+
+define void @globalfunc2() {
+entry:
+ call void @largefunction()
+ ret void
+}
+
+
+; Size is 5: if two layers below in the call graph the threshold will be 4,
+; but if only one layer below the threshold will be 7.
+define void @largefunction() {
+ entry:
+ call void @staticfunc2()
+ call void @staticfunc2()
+ call void @staticfunc2()
+ call void @staticfunc2()
+ call void @staticfunc2()
+ ret void
+}
+
+define internal void @staticfunc2() {
+entry:
+ ret void
+}
+
+
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport.ll b/test/Transforms/FunctionImport/Inputs/funcimport.ll
index 79b766b386df..fa96b8ea2663 100644
--- a/test/Transforms/FunctionImport/Inputs/funcimport.ll
+++ b/test/Transforms/FunctionImport/Inputs/funcimport.ll
@@ -75,6 +75,11 @@ entry:
ret void
}
+define linkonce void @linkoncefunc2() #0 {
+entry:
+ ret void
+}
+
define internal i32 @staticfunc() #0 {
entry:
ret i32 1
@@ -99,4 +104,46 @@ entry:
ret void
}
+define void @referencelargelinkonce() #0 {
+entry:
+ call void @linkonceodr()
+ ret void
+}
+
+; A large enough linkonce_odr function that should never be imported
+define linkonce_odr void @linkonceodr() #0 {
+entry:
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ call void @globalfunc2()
+ ret void
+}
+
diff --git a/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
index 35c62a262903..0e75924cd3b6 100644
--- a/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
+++ b/test/Transforms/FunctionImport/Inputs/funcimport_debug.ll
@@ -14,11 +14,10 @@ attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fp
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "func", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/FunctionImport/Inputs/inlineasm.ll b/test/Transforms/FunctionImport/Inputs/inlineasm.ll
new file mode 100644
index 000000000000..1ffc5db5f8b0
--- /dev/null
+++ b/test/Transforms/FunctionImport/Inputs/inlineasm.ll
@@ -0,0 +1,11 @@
+@myvar = internal constant i8 1, align 1
+@llvm.used = appending global [1 x i8*] [i8* @myvar], section "llvm.metadata"
+
+define void @foo(i64* %v) #0 {
+entry:
+ %v.addr = alloca i64*, align 8
+ store i64* %v, i64** %v.addr, align 8
+ %0 = load i64*, i64** %v.addr, align 8
+ call void asm sideeffect "movzbl myvar(%rip), %eax\0A\09movq %rax, $0\0A\09", "=*m,~{eax},~{dirflag},~{fpsr},~{flags}"(i64* %0) #1
+ ret void
+}
diff --git a/test/Transforms/FunctionImport/adjustable_threshold.ll b/test/Transforms/FunctionImport/adjustable_threshold.ll
new file mode 100644
index 000000000000..adb8b0dffb05
--- /dev/null
+++ b/test/Transforms/FunctionImport/adjustable_threshold.ll
@@ -0,0 +1,31 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/adjustable_threshold.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Test import with default progressive instruction factor
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=10 -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIM-DEFAULT
+; INSTLIM-DEFAULT: call void @staticfunc2.llvm.
+
+; Test import with a reduced progressive instruction factor
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=10 -import-instr-evolution-factor=0.5 -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIM-PROGRESSIVE
+; INSTLIM-PROGRESSIVE-NOT: call void @staticfunc
+
+
+
+declare void @globalfunc1()
+declare void @globalfunc2()
+
+define void @entry() {
+entry:
+; Call site are processed in reversed order!
+
+; On the direct call, we reconsider @largefunction with a higher threshold and
+; import it
+ call void @globalfunc2()
+; When importing globalfunc1, the threshold was limited and @largefunction was
+; not imported.
+ call void @globalfunc1()
+ ret void
+}
+
diff --git a/test/Transforms/FunctionImport/funcimport.ll b/test/Transforms/FunctionImport/funcimport.ll
index 52fd53d3f31f..7f7e57b35743 100644
--- a/test/Transforms/FunctionImport/funcimport.ll
+++ b/test/Transforms/FunctionImport/funcimport.ll
@@ -1,14 +1,21 @@
; Do setup work for all below tests: generate bitcode and combined index
-; RUN: llvm-as -function-summary %s -o %t.bc
-; RUN: llvm-as -function-summary %p/Inputs/funcimport.ll -o %t2.bc
-; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/funcimport.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -print-summary-global-ids -o %t3 %t.bc %t2.bc 2>&1 | FileCheck %s --check-prefix=GUID
; Do the import now
-; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIMDEF
+; RUN: opt -disable-force-link-odr -function-import -stats -print-imports -enable-import-metadata -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIMDEF
+; "-stats" requires +Asserts.
+; REQUIRES: asserts
; Test import with smaller instruction limit
-; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -import-instr-limit=5 -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIM5
-; INSTLIM5-NOT: @staticfunc.llvm.2
+; RUN: opt -disable-force-link-odr -function-import -enable-import-metadata -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=5 -S | FileCheck %s --check-prefix=CHECK --check-prefix=INSTLIM5
+; INSTLIM5-NOT: @staticfunc.llvm.
+
+; Test import with smaller instruction limit and without the -disable-force-link-odr
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -import-instr-limit=5 -S | FileCheck %s --check-prefix=INSTLIM5ODR
+; INSTLIM5ODR: define linkonce_odr void @linkonceodr() {
+
define i32 @main() #0 {
entry:
@@ -21,6 +28,8 @@ entry:
call void (...) @setfuncptr()
call void (...) @callfuncptr()
call void (...) @weakfunc()
+ call void (...) @linkoncefunc2()
+ call void (...) @referencelargelinkonce()
ret i32 0
}
@@ -32,46 +41,102 @@ declare void @weakalias(...) #1
; CHECK-DAG: declare void @analias
declare void @analias(...) #1
+; FIXME: Add this checking back when follow on fix to add alias summary
+; records is committed.
; Aliases import the aliasee function
declare void @linkoncealias(...) #1
-; CHECK-DAG: define linkonce_odr void @linkoncefunc()
-; CHECK-DAG: @linkoncealias = alias void (...), bitcast (void ()* @linkoncefunc to void (...)*
-; INSTLIMDEF-DAG: define available_externally i32 @referencestatics(i32 %i)
+; INSTLIMDEF-DAG: Import referencestatics
+; INSTLIMDEF-DAG: define available_externally i32 @referencestatics(i32 %i) !thinlto_src_module !0 {
; INSTLIM5-DAG: declare i32 @referencestatics(...)
declare i32 @referencestatics(...) #1
; The import of referencestatics will expose call to staticfunc that
; should in turn be imported as a promoted/renamed and hidden function.
; Ensure that the call is to the properly-renamed function.
-; INSTLIMDEF-DAG: %call = call i32 @staticfunc.llvm.2()
-; INSTLIMDEF-DAG: define available_externally hidden i32 @staticfunc.llvm.2()
+; INSTLIMDEF-DAG: Import staticfunc
+; INSTLIMDEF-DAG: %call = call i32 @staticfunc.llvm.
+; INSTLIMDEF-DAG: define available_externally hidden i32 @staticfunc.llvm.{{.*}} !thinlto_src_module !0 {
-; CHECK-DAG: define available_externally i32 @referenceglobals(i32 %i)
+; INSTLIMDEF-DAG: Import referenceglobals
+; CHECK-DAG: define available_externally i32 @referenceglobals(i32 %i) !thinlto_src_module !0 {
declare i32 @referenceglobals(...) #1
; The import of referenceglobals will expose call to globalfunc1 that
; should in turn be imported.
-; CHECK-DAG: define available_externally void @globalfunc1()
+; INSTLIMDEF-DAG: Import globalfunc1
+; CHECK-DAG: define available_externally void @globalfunc1() !thinlto_src_module !0
-; CHECK-DAG: define available_externally i32 @referencecommon(i32 %i)
+; INSTLIMDEF-DAG: Import referencecommon
+; CHECK-DAG: define available_externally i32 @referencecommon(i32 %i) !thinlto_src_module !0 {
declare i32 @referencecommon(...) #1
-; CHECK-DAG: define available_externally void @setfuncptr()
+; INSTLIMDEF-DAG: Import setfuncptr
+; CHECK-DAG: define available_externally void @setfuncptr() !thinlto_src_module !0 {
declare void @setfuncptr(...) #1
-; CHECK-DAG: define available_externally void @callfuncptr()
+; INSTLIMDEF-DAG: Import callfuncptr
+; CHECK-DAG: define available_externally void @callfuncptr() !thinlto_src_module !0 {
declare void @callfuncptr(...) #1
; Ensure that all uses of local variable @P which has used in setfuncptr
; and callfuncptr are to the same promoted/renamed global.
-; CHECK-DAG: @P.llvm.2 = available_externally hidden global void ()* null
-; CHECK-DAG: %0 = load void ()*, void ()** @P.llvm.2,
-; CHECK-DAG: store void ()* @staticfunc2.llvm.2, void ()** @P.llvm.2,
+; CHECK-DAG: @P.llvm.{{.*}} = external hidden global void ()*
+; CHECK-DAG: %0 = load void ()*, void ()** @P.llvm.
+; CHECK-DAG: store void ()* @staticfunc2.llvm.{{.*}}, void ()** @P.llvm.
+
+; Ensure that @referencelargelinkonce definition is pulled in, but later we
+; also check that the linkonceodr function is not.
+; CHECK-DAG: define available_externally void @referencelargelinkonce() !thinlto_src_module !0 {
+; INSTLIM5-DAG: declare void @linkonceodr()
+declare void @referencelargelinkonce(...)
; Won't import weak func
; CHECK-DAG: declare void @weakfunc(...)
declare void @weakfunc(...) #1
-; INSTLIMDEF-DAG: define available_externally hidden void @funcwithpersonality.llvm.2() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
-; INSTLIM5-DAG: declare hidden void @funcwithpersonality.llvm.2()
+; Won't import linkonce func
+; CHECK-DAG: declare void @linkoncefunc2(...)
+declare void @linkoncefunc2(...) #1
+
+; INSTLIMDEF-DAG: Import funcwithpersonality
+; INSTLIMDEF-DAG: define available_externally hidden void @funcwithpersonality.llvm.{{.*}}() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !thinlto_src_module !0 {
+; INSTLIM5-DAG: declare hidden void @funcwithpersonality.llvm.{{.*}}()
+
+; INSTLIMDEF-DAG: Import globalfunc2
+; INSTLIMDEF-DAG: 13 function-import - Number of functions imported
+; CHECK-DAG: !0 = !{!"{{.*}}/Inputs/funcimport.ll"}
+
+; The actual GUID values will depend on path to test.
+; GUID-DAG: GUID {{.*}} is weakalias
+; GUID-DAG: GUID {{.*}} is referenceglobals
+; GUID-DAG: GUID {{.*}} is weakfunc
+; GUID-DAG: GUID {{.*}} is main
+; GUID-DAG: GUID {{.*}} is referencecommon
+; GUID-DAG: GUID {{.*}} is analias
+; GUID-DAG: GUID {{.*}} is referencestatics
+; GUID-DAG: GUID {{.*}} is linkoncealias
+; GUID-DAG: GUID {{.*}} is setfuncptr
+; GUID-DAG: GUID {{.*}} is callfuncptr
+; GUID-DAG: GUID {{.*}} is funcwithpersonality
+; GUID-DAG: GUID {{.*}} is setfuncptr
+; GUID-DAG: GUID {{.*}} is staticfunc2
+; GUID-DAG: GUID {{.*}} is __gxx_personality_v0
+; GUID-DAG: GUID {{.*}} is referencestatics
+; GUID-DAG: GUID {{.*}} is globalfunc1
+; GUID-DAG: GUID {{.*}} is globalfunc2
+; GUID-DAG: GUID {{.*}} is P
+; GUID-DAG: GUID {{.*}} is staticvar
+; GUID-DAG: GUID {{.*}} is commonvar
+; GUID-DAG: GUID {{.*}} is weakalias
+; GUID-DAG: GUID {{.*}} is staticfunc
+; GUID-DAG: GUID {{.*}} is weakfunc
+; GUID-DAG: GUID {{.*}} is referenceglobals
+; GUID-DAG: GUID {{.*}} is weakvar
+; GUID-DAG: GUID {{.*}} is staticconstvar
+; GUID-DAG: GUID {{.*}} is analias
+; GUID-DAG: GUID {{.*}} is globalvar
+; GUID-DAG: GUID {{.*}} is referencecommon
+; GUID-DAG: GUID {{.*}} is linkoncealias
+; GUID-DAG: GUID {{.*}} is callfuncptr
+; GUID-DAG: GUID {{.*}} is linkoncefunc
diff --git a/test/Transforms/FunctionImport/funcimport_alias.ll b/test/Transforms/FunctionImport/funcimport_alias.ll
index 8c7f00fe37b3..7868e08d32fd 100644
--- a/test/Transforms/FunctionImport/funcimport_alias.ll
+++ b/test/Transforms/FunctionImport/funcimport_alias.ll
@@ -1,12 +1,12 @@
; Do setup work for all below tests: generate bitcode and combined index
-; RUN: llvm-as -function-summary %s -o %t.bc
-; RUN: llvm-as -function-summary %p/Inputs/funcimport_alias.ll -o %t2.bc
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/funcimport_alias.ll -o %t2.bc
; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
; Do the import now. Ensures that the importer handles an external call
; from imported callanalias() to a function that is defined already in
; the dest module, but as an alias.
-; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -S | FileCheck %s
define i32 @main() #0 {
entry:
diff --git a/test/Transforms/FunctionImport/funcimport_debug.ll b/test/Transforms/FunctionImport/funcimport_debug.ll
index 96b73a3f6bc7..e7c6389e90a5 100644
--- a/test/Transforms/FunctionImport/funcimport_debug.ll
+++ b/test/Transforms/FunctionImport/funcimport_debug.ll
@@ -1,23 +1,22 @@
; Do setup work for all below tests: generate bitcode and combined index
-; RUN: llvm-as -function-summary %s -o %t.bc
-; RUN: llvm-as -function-summary %p/Inputs/funcimport_debug.ll -o %t2.bc
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/funcimport_debug.ll -o %t2.bc
; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
; Do the import now and confirm that metadata is linked for imported function.
-; RUN: opt -function-import -summary-file %t3.thinlto.bc %s -S | FileCheck %s
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -S | FileCheck %s
; CHECK: define available_externally void @func()
; Check that we have exactly two subprograms (that func's subprogram wasn't
; linked more than once for example), and that they are connected to
-; the subprogram list on a compute unit.
-; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs1:[0-9]+]]
-; CHECK: ![[SPs1]] = !{![[MAINSP:[0-9]+]]}
-; CHECK: ![[MAINSP]] = distinct !DISubprogram(name: "main"
-; CHECK: !{{[0-9]+}} = distinct !DICompileUnit({{.*}} subprograms: ![[SPs2:[0-9]+]]
-; CHECK-NOT: ![[SPs2]] = !{{{.*}}null{{.*}}}
-; CHECK: ![[SPs2]] = !{![[FUNCSP:[0-9]+]]}
-; CHECK: ![[FUNCSP]] = distinct !DISubprogram(name: "func"
+; the correct compile unit.
+; CHECK: ![[CU1:[0-9]+]] = distinct !DICompileUnit(
+; CHECK: ![[CU2:[0-9]+]] = distinct !DICompileUnit(
+; CHECK: distinct !DISubprogram(name: "main"
+; CHECK-SAME: unit: ![[CU1]]
+; CHECK: distinct !DISubprogram(name: "func"
+; CHECK-SAME: unit: ![[CU2]]
; CHECK-NOT: distinct !DISubprogram
; ModuleID = 'funcimport_debug.o'
@@ -40,11 +39,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 255685) (llvm/trunk 255682)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "funcimport_debug.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/FunctionImport/inlineasm.ll b/test/Transforms/FunctionImport/inlineasm.ll
new file mode 100644
index 000000000000..d0516f146534
--- /dev/null
+++ b/test/Transforms/FunctionImport/inlineasm.ll
@@ -0,0 +1,19 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/inlineasm.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; Attempt the import now, ensure below that file containing inline assembly
+; is not imported from. Otherwise we would need to promote its local variable
+; used in the inline assembly, which would not see the rename.
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -S 2>&1 | FileCheck %s --check-prefix=CHECK
+
+define i32 @main() #0 {
+entry:
+ %f = alloca i64, align 8
+ call void @foo(i64* %f)
+ ret i32 0
+}
+
+; CHECK: declare void @foo(i64*)
+declare void @foo(i64*) #1
diff --git a/test/Transforms/GCOVProfiling/function-numbering.ll b/test/Transforms/GCOVProfiling/function-numbering.ll
index f94d5ad30bbc..6f10fd23cb06 100644
--- a/test/Transforms/GCOVProfiling/function-numbering.ll
+++ b/test/Transforms/GCOVProfiling/function-numbering.ll
@@ -7,6 +7,10 @@
; RUN: opt -insert-gcov-profiling -S < %t2 | FileCheck --check-prefix GCDA %s
; RUN: llvm-cov gcov -n -dump %T/function-numbering.gcno 2>&1 | FileCheck --check-prefix GCNO %s
+; RUNN: rm %T/function-numbering.gcno
+
+; RUN: opt -passes=insert-gcov-profiling -S < %t2 | FileCheck --check-prefix GCDA %s
+; RUN: llvm-cov gcov -n -dump %T/function-numbering.gcno 2>&1 | FileCheck --check-prefix GCNO %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.10.0"
@@ -40,15 +44,14 @@ define void @baz() !dbg !8 {
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 ", isOptimized: false, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
!2 = !{}
-!3 = !{!4, !7, !8}
-!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/function-numbering.ll", directory: "")
!6 = !DISubroutineType(types: !2)
-!7 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
-!8 = distinct !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!7 = distinct !DISubprogram(name: "bar", line: 2, isLocal: false, isDefinition: true, isOptimized: false, unit: !0, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "baz", line: 3, isLocal: false, isDefinition: true, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.6.0 "}
diff --git a/test/Transforms/GCOVProfiling/global-ctor.ll b/test/Transforms/GCOVProfiling/global-ctor.ll
index 47600c7bfcad..e20da85eb586 100644
--- a/test/Transforms/GCOVProfiling/global-ctor.ll
+++ b/test/Transforms/GCOVProfiling/global-ctor.ll
@@ -4,6 +4,10 @@
; RUN: not grep '_GLOBAL__sub_I_global-ctor' %T/global-ctor.gcno
; RUN: rm %T/global-ctor.gcno
+; RUN: opt -passes=insert-gcov-profiling -disable-output < %t2
+; RUN: not grep '_GLOBAL__sub_I_global-ctor' %T/global-ctor.gcno
+; RUN: rm %T/global-ctor.gcno
+
@x = global i32 0, align 4
@llvm.global_ctors = appending global [1 x { i32, void ()*, i8* }] [{ i32, void ()*, i8* } { i32 65535, void ()* @_GLOBAL__sub_I_global-ctor.ll, i8* null }]
@@ -38,15 +42,14 @@ attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "
!llvm.gcov = !{!16}
!llvm.ident = !{!12}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 210217)", isOptimized: false, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
!2 = !{}
-!3 = !{!4, !8}
-!4 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
+!4 = distinct !DISubprogram(name: "__cxx_global_var_init", line: 2, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 2, file: !5, scope: !6, type: !7, variables: !2)
!5 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
!6 = !DIFile(filename: "global-ctor.ll", directory: "/home/nlewycky")
!7 = !DISubroutineType(types: !2)
-!8 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, file: !1, scope: !9, type: !7, variables: !2)
+!8 = distinct !DISubprogram(name: "", linkageName: "_GLOBAL__sub_I_global-ctor.ll", isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagArtificial, isOptimized: false, unit: !0, file: !1, scope: !9, type: !7, variables: !2)
!9 = !DIFile(filename: "<stdin>", directory: "/home/nlewycky")
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/GCOVProfiling/linezero.ll b/test/Transforms/GCOVProfiling/linezero.ll
index e071c4e6dbf7..a2e5381e4dbc 100644
--- a/test/Transforms/GCOVProfiling/linezero.ll
+++ b/test/Transforms/GCOVProfiling/linezero.ll
@@ -1,6 +1,9 @@
; RUN: sed -e 's|PATTERN|%/T|g' %s | opt -insert-gcov-profiling -disable-output
; RUN: rm %T/linezero.gcno
+; RUN: sed -e 's|PATTERN|%/T|g' %s | opt -passes=insert-gcov-profiling -disable-output
+; RUN: rm %T/linezero.gcno
+
; This is a crash test.
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -93,27 +96,26 @@ attributes #3 = { noreturn nounwind }
!llvm.gcov = !{!25}
!llvm.ident = !{!26}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0 (trunk 209871)", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !3, globals: !2, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "PATTERN")
!2 = !{}
!3 = !{!4}
!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "vector", line: 21, size: 8, align: 8, file: !5, elements: !6, identifier: "_ZTS6vector")
!5 = !DIFile(filename: "linezero.cc", directory: "PATTERN")
!6 = !{!7, !13}
-!7 = !DISubprogram(name: "begin", linkageName: "_ZN6vector5beginEv", line: 25, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !5, scope: !"_ZTS6vector", type: !8)
+!7 = !DISubprogram(name: "begin", linkageName: "_ZN6vector5beginEv", line: 25, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 25, file: !5, scope: !4, type: !8)
!8 = !DISubroutineType(types: !9)
!9 = !{!10, !12}
!10 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, baseType: !11)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
-!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !"_ZTS6vector")
-!13 = !DISubprogram(name: "end", linkageName: "_ZN6vector3endEv", line: 26, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 26, file: !5, scope: !"_ZTS6vector", type: !8)
-!14 = !{!15, !20}
-!15 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 50, file: !5, scope: !16, type: !17, variables: !2)
+!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, flags: DIFlagArtificial | DIFlagObjectPointer, baseType: !4)
+!13 = !DISubprogram(name: "end", linkageName: "_ZN6vector3endEv", line: 26, isLocal: false, isDefinition: false, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 26, file: !5, scope: !4, type: !8)
+!15 = distinct !DISubprogram(name: "test", linkageName: "_Z4testv", line: 50, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 50, file: !5, scope: !16, type: !17, variables: !2)
!16 = !DIFile(filename: "linezero.cc", directory: "PATTERN")
!17 = !DISubroutineType(types: !18)
!18 = !{!19}
!19 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!20 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 54, file: !5, scope: !16, type: !21, variables: !2)
+!20 = distinct !DISubprogram(name: "f1", linkageName: "_Z2f1v", line: 54, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 54, file: !5, scope: !16, type: !21, variables: !2)
!21 = !DISubroutineType(types: !22)
!22 = !{null}
!23 = !{i32 2, !"Dwarf Version", i32 4}
@@ -122,7 +124,7 @@ attributes #3 = { noreturn nounwind }
!26 = !{!"clang version 3.5.0 (trunk 209871)"}
!27 = !DILocalVariable(name: "__range", flags: DIFlagArtificial, scope: !28, type: !29)
!28 = distinct !DILexicalBlock(line: 51, column: 0, file: !5, scope: !15)
-!29 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !"_ZTS6vector")
+!29 = !DIDerivedType(tag: DW_TAG_rvalue_reference_type, baseType: !4)
!30 = !DILocation(line: 0, scope: !28)
!31 = !DILocation(line: 51, scope: !28)
!32 = !DILocalVariable(name: "__begin", flags: DIFlagArtificial, scope: !28, type: !10)
diff --git a/test/Transforms/GCOVProfiling/linkagename.ll b/test/Transforms/GCOVProfiling/linkagename.ll
index 65830bf78025..dc46a35a1856 100644
--- a/test/Transforms/GCOVProfiling/linkagename.ll
+++ b/test/Transforms/GCOVProfiling/linkagename.ll
@@ -4,6 +4,10 @@
; RUN: grep _Z3foov %T/linkagename.gcno
; RUN: rm %T/linkagename.gcno
+; RUN: opt -passes=insert-gcov-profiling -disable-output < %t2
+; RUN: grep _Z3foov %T/linkagename.gcno
+; RUN: rm %T/linkagename.gcno
+
define void @_Z3foov() !dbg !5 {
entry:
ret void, !dbg !8
@@ -13,12 +17,11 @@ entry:
!llvm.module.flags = !{!10}
!llvm.gcov = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: 0, file: !2, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3, imports: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 177323)", isOptimized: false, emissionKind: FullDebug, file: !2, enums: !3, retainedTypes: !3, globals: !3, imports: !3)
!1 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
!2 = !DIFile(filename: "hello.cc", directory: "/home/nlewycky")
!3 = !{}
-!4 = !{!5}
-!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !1, type: !6, variables: !3)
+!5 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !1, type: !6, variables: !3)
!6 = !DISubroutineType(types: !7)
!7 = !{null}
!8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GCOVProfiling/modules.ll b/test/Transforms/GCOVProfiling/modules.ll
index 1a8edfeedd4c..460d4dfdfa17 100644
--- a/test/Transforms/GCOVProfiling/modules.ll
+++ b/test/Transforms/GCOVProfiling/modules.ll
@@ -1,11 +1,12 @@
; RUN: opt -insert-gcov-profiling -o - < %s | llvm-dis | FileCheck -check-prefix=EMIT-ARCS %s
+; RUN: opt -passes=insert-gcov-profiling -o - < %s | llvm-dis | FileCheck -check-prefix=EMIT-ARCS %s
; EMIT-ARCS-NOT: call void @llvm_gcda_start_file
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!3, !4}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, splitDebugFilename: "my.dwo", emissionKind: 1, enums: !2, retainedTypes: !2, subprograms: !2, globals: !2, imports: !2, dwoId: 43981)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "LLVM", isOptimized: false, runtimeVersion: 2, splitDebugFilename: "my.dwo", emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !2, imports: !2, dwoId: 43981)
!1 = !DIFile(filename: "<stdin>", directory: "/")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/GCOVProfiling/return-block.ll b/test/Transforms/GCOVProfiling/return-block.ll
index 9b502a14bfa2..424e0b581f04 100644
--- a/test/Transforms/GCOVProfiling/return-block.ll
+++ b/test/Transforms/GCOVProfiling/return-block.ll
@@ -9,6 +9,15 @@
; But we can optionally emit it second, to match newer gcc versions.
; RUN: opt -insert-gcov-profiling -gcov-exit-block-before-body -disable-output %t2
; RUN: llvm-cov gcov -n -dump %T/return-block.gcno 2>&1 | FileCheck -check-prefix=CHECK -check-prefix=RETURN-SECOND %s
+; RUN: rm %T/return-block.gcno
+
+; By default, the return block is last.
+; RUN: opt -passes=insert-gcov-profiling -disable-output %t2
+; RUN: llvm-cov gcov -n -dump %T/return-block.gcno 2>&1 | FileCheck -check-prefix=CHECK -check-prefix=RETURN-LAST %s
+
+; But we can optionally emit it second, to match newer gcc versions.
+; RUN: opt -passes=insert-gcov-profiling -gcov-exit-block-before-body -disable-output %t2
+; RUN: llvm-cov gcov -n -dump %T/return-block.gcno 2>&1 | FileCheck -check-prefix=CHECK -check-prefix=RETURN-SECOND %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -44,11 +53,10 @@ attributes #2 = { nounwind }
!llvm.module.flags = !{!11, !12}
!llvm.ident = !{!13}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !8, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk 223182)", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !8, imports: !2)
!1 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 5, isLocal: false, isDefinition: true, isOptimized: true, unit: !0, scopeLine: 5, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: ".../llvm/test/Transforms/GCOVProfiling/return-block.ll", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{null}
diff --git a/test/Transforms/GCOVProfiling/version.ll b/test/Transforms/GCOVProfiling/version.ll
index 67bfb3c97612..da70ac463574 100644
--- a/test/Transforms/GCOVProfiling/version.ll
+++ b/test/Transforms/GCOVProfiling/version.ll
@@ -8,6 +8,14 @@
; RUN: head -c8 %T/version.gcno | grep '^oncg.704'
; RUN: rm %T/version.gcno
+; RUN: opt -passes=insert-gcov-profiling -disable-output < %t2
+; RUN: head -c8 %T/version.gcno | grep '^oncg.204'
+; RUN: rm %T/version.gcno
+; RUN: not opt -passes=insert-gcov-profiling -default-gcov-version=asdfasdf -disable-output < %t2
+; RUN: opt -passes=insert-gcov-profiling -default-gcov-version=407* -disable-output < %t2
+; RUN: head -c8 %T/version.gcno | grep '^oncg.704'
+; RUN: rm %T/version.gcno
+
define void @test() !dbg !5 {
ret void, !dbg !8
}
@@ -16,11 +24,10 @@ define void @test() !dbg !5 {
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!12}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: 0, file: !11, enums: !3, retainedTypes: !3, subprograms: !4, globals: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.3 (trunk 176994)", isOptimized: false, emissionKind: FullDebug, file: !11, enums: !3, retainedTypes: !3, globals: !3)
!2 = !DIFile(filename: "version", directory: "/usr/local/google/home/nlewycky")
!3 = !{}
-!4 = !{!5}
-!5 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !10, scope: !6, type: !7, variables: !3)
+!5 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !10, scope: !6, type: !7, variables: !3)
!6 = !DIFile(filename: "<stdin>", directory: ".")
!7 = !DISubroutineType(types: !{null})
!8 = !DILocation(line: 1, scope: !5)
diff --git a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
index 0ffb34c39b4f..98fb6b3c382e 100644
--- a/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
+++ b/test/Transforms/GVN/2007-07-25-InfiniteLoop.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | not grep "tmp10 ="
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
%struct.INT2 = type { i32, i32 }
@blkshifts = external global %struct.INT2* ; <%struct.INT2**> [#uses=2]
@@ -10,5 +10,6 @@ entry:
bb: ; preds = %bb, %entry
%tmp10 = load %struct.INT2*, %struct.INT2** @blkshifts, align 4 ; <%struct.INT2*> [#uses=0]
+; CHECK-NOT: %tmp10
br label %bb
}
diff --git a/test/Transforms/GVN/2007-07-26-PhiErasure.ll b/test/Transforms/GVN/2007-07-26-PhiErasure.ll
index 82af521ab3d6..e9dab2bbac0c 100644
--- a/test/Transforms/GVN/2007-07-26-PhiErasure.ll
+++ b/test/Transforms/GVN/2007-07-26-PhiErasure.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | not grep phi
+; RUN: opt < %s -gvn -S | FileCheck %s
%struct..0anon = type { i32 }
%struct.FILE = type { i8*, i32, i32, i16, i16, %struct.__sbuf, i32, i8*, i32 (i8*)*, i32 (i8*, i8*, i32)*, i64 (i8*, i64, i32)*, i32 (i8*, i8*, i32)*, %struct.__sbuf, %struct.__sFILEX*, i32, [3 x i8], [1 x i8], %struct.__sbuf, i32, i64 }
@@ -26,3 +26,19 @@ bb2982.preheader: ; preds = %cond_next2943
ret i32 %tmp298316
}
+
+; CHECK: define i32 @reload(%struct.rtx_def* %first, i32 %global, %struct.FILE* %dumpfile) {
+; CHECK-NEXT: cond_next2835.1:
+; CHECK-NEXT: br label %bb2928
+; CHECK: bb2928:
+; CHECK-NEXT: br i1 false, label %bb2928.cond_next2943_crit_edge, label %cond_true2935
+; CHECK: bb2928.cond_next2943_crit_edge:
+; CHECK-NEXT: br label %cond_next2943
+; CHECK: cond_true2935:
+; CHECK-NEXT: br label %cond_next2943
+; CHECK: cond_next2943:
+; CHECK-NEXT: br i1 false, label %bb2982.preheader, label %bb2928
+; CHECK: bb2982.preheader:
+; CHECK-NEXT: %tmp298316 = load i32, i32* @n_spills, align 4
+; CHECK-NEXT: ret i32 %tmp298316
+; CHECK-NEXT: }
diff --git a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
index b2e4c64ee770..ebd6ea247ef5 100644
--- a/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
+++ b/test/Transforms/GVN/2007-07-31-NoDomInherit.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep "tmp47 = phi i32 "
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
%struct.anon = type { i32 (i32, i32, i32)*, i32, i32, [3 x i32], i8*, i8*, i8* }
@debug = external constant i32 ; <i32*> [#uses=0]
@@ -214,6 +214,7 @@ bb18: ; preds = %bb49
%tmp46 = load i32, i32* %i ; <i32> [#uses=1]
call void @fix_operands( i32 %tmp46 )
%tmp47 = load i32, i32* %i ; <i32> [#uses=1]
+; CHECK: %tmp47 = phi i32 [ %tmp48, %bb18 ], [ 0, %bb17 ]
%tmp48 = add i32 %tmp47, 1 ; <i32> [#uses=1]
store i32 %tmp48, i32* %i
br label %bb49
diff --git a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
index b285560e6a99..11ec736d2dff 100644
--- a/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
+++ b/test/Transforms/GVN/2007-07-31-RedundantPhi.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | not grep "tmp701 ="
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
@img_width = external global i16 ; <i16*> [#uses=2]
@@ -18,5 +18,6 @@ cond_false470: ; preds = %cond_next449
cond_next698: ; preds = %cond_true492
%tmp701 = load i16, i16* @img_width, align 2 ; <i16> [#uses=0]
+; CHECK-NOT: %tmp701 =
ret i32 0
}
diff --git a/test/Transforms/GVN/2008-02-12-UndefLoad.ll b/test/Transforms/GVN/2008-02-12-UndefLoad.ll
index a1aed867ec63..0e6b17c4aa50 100644
--- a/test/Transforms/GVN/2008-02-12-UndefLoad.ll
+++ b/test/Transforms/GVN/2008-02-12-UndefLoad.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | not grep load
+; RUN: opt < %s -gvn -S | FileCheck %s
; PR1996
%struct.anon = type { i32, i8, i8, i8, i8 }
@@ -9,6 +9,7 @@ entry:
%tmp = getelementptr %struct.anon, %struct.anon* %c, i32 0, i32 0 ; <i32*> [#uses=1]
%tmp1 = getelementptr i32, i32* %tmp, i32 1 ; <i32*> [#uses=2]
%tmp2 = load i32, i32* %tmp1, align 4 ; <i32> [#uses=1]
+; CHECK-NOT: load
%tmp3 = or i32 %tmp2, 11 ; <i32> [#uses=1]
%tmp4 = and i32 %tmp3, -21 ; <i32> [#uses=1]
store i32 %tmp4, i32* %tmp1, align 4
diff --git a/test/Transforms/GVN/2008-07-02-Unreachable.ll b/test/Transforms/GVN/2008-07-02-Unreachable.ll
index d9932644da3e..0cd80e8da0c7 100644
--- a/test/Transforms/GVN/2008-07-02-Unreachable.ll
+++ b/test/Transforms/GVN/2008-07-02-Unreachable.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep "ret i8 [%]tmp3"
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
; PR2503
@g_3 = external global i8 ; <i8*> [#uses=2]
@@ -29,6 +29,7 @@ forinc: ; preds = %forbody
afterfor: ; preds = %forcond, %forcond.thread
%tmp10 = load i8, i8* @g_3 ; <i8> [#uses=0]
ret i8 %tmp10
+; CHECK: ret i8 %tmp3
ifend: ; preds = %afterfor, %ifthen
ret i8 0
diff --git a/test/Transforms/GVN/2008-12-09-SelfRemove.ll b/test/Transforms/GVN/2008-12-09-SelfRemove.ll
index d8ab1bae48e9..b0468b4def6b 100644
--- a/test/Transforms/GVN/2008-12-09-SelfRemove.ll
+++ b/test/Transforms/GVN/2008-12-09-SelfRemove.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | grep getelementptr | count 1
+; RUN: opt < %s -gvn -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9.5"
@@ -24,3 +24,15 @@ bb21: ; preds = %bb21, %bb
return: ; preds = %entry
ret void
}
+
+; CHECK: define void @d_print_mod_list(%struct.d_print_info* %dpi, %struct.d_print_mod* %mods, i32 %suffix) #0 {
+; CHECK: entry:
+; CHECK: %0 = getelementptr %struct.d_print_info, %struct.d_print_info* %dpi, i32 0, i32 1
+; CHECK: br i1 false, label %return, label %bb
+; CHECK: bb:
+; CHECK: br label %bb21
+; CHECK: bb21:
+; CHECK: br label %bb21
+; CHECK: return:
+; CHECK: ret void
+; CHECK: }
diff --git a/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll b/test/Transforms/GVN/PRE/2009-02-17-LoadPRECrash.ll
index 808f28c674c5..808f28c674c5 100644
--- a/test/Transforms/GVN/2009-02-17-LoadPRECrash.ll
+++ b/test/Transforms/GVN/PRE/2009-02-17-LoadPRECrash.ll
diff --git a/test/Transforms/GVN/2009-06-17-InvalidPRE.ll b/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll
index bf0a234da4ac..ec592b0e9e84 100644
--- a/test/Transforms/GVN/2009-06-17-InvalidPRE.ll
+++ b/test/Transforms/GVN/PRE/2009-06-17-InvalidPRE.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -gvn -enable-load-pre -S | not grep pre1
+; RUN: opt < %s -gvn -enable-load-pre -S | FileCheck %s
+; CHECK-NOT: pre1
; GVN load pre was hoisting the loads at %13 and %16 up to bb4.outer.
; This is invalid as it bypasses the check for %m.0.ph==null in bb4.
; ModuleID = 'mbuf.c'
diff --git a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll b/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll
index 0769575759ba..0769575759ba 100644
--- a/test/Transforms/GVN/2011-06-01-NonLocalMemdepMiscompile.ll
+++ b/test/Transforms/GVN/PRE/2011-06-01-NonLocalMemdepMiscompile.ll
diff --git a/test/Transforms/GVN/PRE/atomic.ll b/test/Transforms/GVN/PRE/atomic.ll
new file mode 100644
index 000000000000..509acd613e95
--- /dev/null
+++ b/test/Transforms/GVN/PRE/atomic.ll
@@ -0,0 +1,503 @@
+; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-macosx10.7.0"
+
+@x = common global i32 0, align 4
+@y = common global i32 0, align 4
+
+; GVN across unordered store (allowed)
+define i32 @test1() nounwind uwtable ssp {
+; CHECK-LABEL: test1
+; CHECK: add i32 %x, %x
+entry:
+ %x = load i32, i32* @y
+ store atomic i32 %x, i32* @x unordered, align 4
+ %y = load i32, i32* @y
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; GVN across unordered load (allowed)
+define i32 @test3() nounwind uwtable ssp {
+; CHECK-LABEL: test3
+; CHECK: add i32 %x, %x
+entry:
+ %x = load i32, i32* @y
+ %y = load atomic i32, i32* @x unordered, align 4
+ %z = load i32, i32* @y
+ %a = add i32 %x, %z
+ %b = add i32 %y, %a
+ ret i32 %b
+}
+
+; GVN load to unordered load (allowed)
+define i32 @test5() nounwind uwtable ssp {
+; CHECK-LABEL: test5
+; CHECK: add i32 %x, %x
+entry:
+ %x = load atomic i32, i32* @x unordered, align 4
+ %y = load i32, i32* @x
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; GVN unordered load to load (unordered load must not be removed)
+define i32 @test6() nounwind uwtable ssp {
+; CHECK-LABEL: test6
+; CHECK: load atomic i32, i32* @x unordered
+entry:
+ %x = load i32, i32* @x
+ %x2 = load atomic i32, i32* @x unordered, align 4
+ %x3 = add i32 %x, %x2
+ ret i32 %x3
+}
+
+; GVN across release-acquire pair (forbidden)
+define i32 @test7() nounwind uwtable ssp {
+; CHECK-LABEL: test7
+; CHECK: add i32 %x, %y
+entry:
+ %x = load i32, i32* @y
+ store atomic i32 %x, i32* @x release, align 4
+ %w = load atomic i32, i32* @x acquire, align 4
+ %y = load i32, i32* @y
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; GVN across monotonic store (allowed)
+define i32 @test9() nounwind uwtable ssp {
+; CHECK-LABEL: test9
+; CHECK: add i32 %x, %x
+entry:
+ %x = load i32, i32* @y
+ store atomic i32 %x, i32* @x monotonic, align 4
+ %y = load i32, i32* @y
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; GVN of an unordered across monotonic load (not allowed)
+define i32 @test10() nounwind uwtable ssp {
+; CHECK-LABEL: test10
+; CHECK: add i32 %x, %y
+entry:
+ %x = load atomic i32, i32* @y unordered, align 4
+ %clobber = load atomic i32, i32* @x monotonic, align 4
+ %y = load atomic i32, i32* @y monotonic, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+define i32 @PR22708(i1 %flag) {
+; CHECK-LABEL: PR22708
+entry:
+ br i1 %flag, label %if.then, label %if.end
+
+if.then:
+ store i32 43, i32* @y, align 4
+; CHECK: store i32 43, i32* @y, align 4
+ br label %if.end
+
+if.end:
+ load atomic i32, i32* @x acquire, align 4
+ %load = load i32, i32* @y, align 4
+; CHECK: load atomic i32, i32* @x acquire, align 4
+; CHECK: load i32, i32* @y, align 4
+ ret i32 %load
+}
+
+; CHECK-LABEL: @test12(
+; Can't remove a load over a ordering barrier
+define i32 @test12(i1 %B, i32* %P1, i32* %P2) {
+ %load0 = load i32, i32* %P1
+ %1 = load atomic i32, i32* %P2 seq_cst, align 4
+ %load1 = load i32, i32* %P1
+ %sel = select i1 %B, i32 %load0, i32 %load1
+ ret i32 %sel
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load i32, i32* %P1
+}
+
+; CHECK-LABEL: @test13(
+; atomic to non-atomic forwarding is legal
+define i32 @test13(i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load i32, i32* %P1
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test13b(
+define i32 @test13b(i32* %P1) {
+ store atomic i32 0, i32* %P1 unordered, align 4
+ %b = load i32, i32* %P1
+ ret i32 %b
+ ; CHECK: ret i32 0
+}
+
+; CHECK-LABEL: @test14(
+; atomic to unordered atomic forwarding is legal
+define i32 @test14(i32* %P1) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1 seq_cst
+ ; CHECK-NEXT: ret i32 0
+}
+
+; CHECK-LABEL: @test15(
+; implementation restriction: can't forward to stonger
+; than unordered
+define i32 @test15(i32* %P1, i32* %P2) {
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %b = load atomic i32, i32* %P1 seq_cst, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16(
+; forwarding non-atomic to atomic is wrong! (However,
+; it would be legal to use the later value in place of the
+; former in this particular example. We just don't
+; do that right now.)
+define i32 @test16(i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; CHECK-LABEL: @test16b(
+define i32 @test16b(i32* %P1) {
+ store i32 0, i32* %P1
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ ret i32 %b
+ ; CHECK: load atomic i32, i32* %P1
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_store(
+; CHECK: store
+; CHECK: store atomic
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ store atomic i32 0, i32* %P2 seq_cst, align 4
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_seq_cst(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst(
+; CHECK: store
+; CHECK: fence seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full singlethread fence
+define void @fence_seq_cst_st(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_seq_cst_st(
+; CHECK: store
+; CHECK: fence singlethread seq_cst
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ fence singlethread seq_cst
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_sideeffect(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_sideeffect(
+; CHECK: store
+; CHECK: call void asm sideeffect
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm sideeffect "", ""()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE across a full fence
+define void @fence_asm_memory(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @fence_asm_memory(
+; CHECK: store
+; CHECK: call void asm
+; CHECK: store
+ store i32 0, i32* %P1, align 4
+ call void asm "", "~{memory}"()
+ store i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can't remove a volatile load
+define i32 @volatile_load(i32* %P1, i32* %P2) {
+ %a = load i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @volatile_load(
+ ; CHECK: load i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+}
+
+; Can't remove redundant volatile loads
+define i32 @redundant_volatile_load(i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load volatile i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @redundant_volatile_load(
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: sub
+}
+
+; Can't DSE a volatile store
+define void @volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @volatile_store(
+; CHECK: store volatile
+; CHECK: store
+ store volatile i32 0, i32* %P1, align 4
+ store i32 3, i32* %P1, align 4
+ ret void
+}
+
+; Can't DSE a redundant volatile store
+define void @redundant_volatile_store(i32* %P1, i32* %P2) {
+; CHECK-LABEL: @redundant_volatile_store(
+; CHECK: store volatile
+; CHECK: store volatile
+ store volatile i32 0, i32* %P1, align 4
+ store volatile i32 0, i32* %P1, align 4
+ ret void
+}
+
+; Can value forward from volatiles
+define i32 @test20(i32* %P1, i32* %P2) {
+ %a = load volatile i32, i32* %P1, align 4
+ %b = load i32, i32* %P1, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+ ; CHECK-LABEL: @test20(
+ ; CHECK: load volatile i32, i32* %P1
+ ; CHECK: ret i32 0
+}
+
+; We're currently conservative about widening
+define i64 @widen1(i32* %P1) {
+ ; CHECK-LABEL: @widen1(
+ ; CHECK: load atomic i32, i32* %P1
+ ; CHECK: load atomic i64, i64* %p2
+ %p2 = bitcast i32* %P1 to i64*
+ %a = load atomic i32, i32* %P1 unordered, align 4
+ %b = load atomic i64, i64* %p2 unordered, align 4
+ %a64 = sext i32 %a to i64
+ %res = sub i64 %a64, %b
+ ret i64 %res
+}
+
+; narrowing does work
+define i64 @narrow(i32* %P1) {
+ ; CHECK-LABEL: @narrow(
+ ; CHECK: load atomic i64, i64* %p2
+ ; CHECK-NOT: load atomic i32, i32* %P1
+ %p2 = bitcast i32* %P1 to i64*
+ %a64 = load atomic i64, i64* %p2 unordered, align 4
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %b64 = sext i32 %b to i64
+ %res = sub i64 %a64, %b64
+ ret i64 %res
+}
+
+; Missed optimization, we don't yet optimize ordered loads
+define i64 @narrow2(i32* %P1) {
+ ; CHECK-LABEL: @narrow2(
+ ; CHECK: load atomic i64, i64* %p2
+ ; CHECK: load atomic i32, i32* %P1
+ %p2 = bitcast i32* %P1 to i64*
+ %a64 = load atomic i64, i64* %p2 acquire, align 4
+ %b = load atomic i32, i32* %P1 acquire, align 4
+ %b64 = sext i32 %b to i64
+ %res = sub i64 %a64, %b64
+ ret i64 %res
+}
+
+; Note: The cross block FRE testing is deliberately light. All of the tricky
+; bits of legality are shared code with the block-local FRE above. These
+; are here only to show that we haven't obviously broken anything.
+
+; unordered atomic to unordered atomic
+define i32 @non_local_fre(i32* %P1) {
+; CHECK-LABEL: @non_local_fre(
+; CHECK: load atomic i32, i32* %P1
+; CHECK: ret i32 0
+; CHECK: ret i32 0
+ %a = load atomic i32, i32* %P1 unordered, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ ret i32 %a
+next:
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+; unordered atomic to non-atomic
+define i32 @non_local_fre2(i32* %P1) {
+; CHECK-LABEL: @non_local_fre2(
+; CHECK: load atomic i32, i32* %P1
+; CHECK: ret i32 0
+; CHECK: ret i32 0
+ %a = load atomic i32, i32* %P1 unordered, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ ret i32 %a
+next:
+ %b = load i32, i32* %P1
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+; Can't forward ordered atomics.
+define i32 @non_local_fre3(i32* %P1) {
+; CHECK-LABEL: @non_local_fre3(
+; CHECK: load atomic i32, i32* %P1 acquire
+; CHECK: ret i32 0
+; CHECK: load atomic i32, i32* %P1 acquire
+; CHECK: ret i32 %res
+ %a = load atomic i32, i32* %P1 acquire, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ ret i32 %a
+next:
+ %b = load atomic i32, i32* %P1 acquire, align 4
+ %res = sub i32 %a, %b
+ ret i32 %res
+}
+
+declare void @clobber()
+
+; unordered atomic to unordered atomic
+define i32 @non_local_pre(i32* %P1) {
+; CHECK-LABEL: @non_local_pre(
+; CHECK: load atomic i32, i32* %P1 unordered
+; CHECK: load atomic i32, i32* %P1 unordered
+; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
+; CHECK: ret i32 %b
+ %a = load atomic i32, i32* %P1 unordered, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ call void @clobber()
+ br label %next
+next:
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ ret i32 %b
+}
+
+; unordered atomic to non-atomic
+define i32 @non_local_pre2(i32* %P1) {
+; CHECK-LABEL: @non_local_pre2(
+; CHECK: load atomic i32, i32* %P1 unordered
+; CHECK: load i32, i32* %P1
+; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
+; CHECK: ret i32 %b
+ %a = load atomic i32, i32* %P1 unordered, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ call void @clobber()
+ br label %next
+next:
+ %b = load i32, i32* %P1
+ ret i32 %b
+}
+
+; non-atomic to unordered atomic - can't forward!
+define i32 @non_local_pre3(i32* %P1) {
+; CHECK-LABEL: @non_local_pre3(
+; CHECK: %a = load i32, i32* %P1
+; CHECK: %b = load atomic i32, i32* %P1 unordered
+; CHECK: ret i32 %b
+ %a = load i32, i32* %P1
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ call void @clobber()
+ br label %next
+next:
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ ret i32 %b
+}
+
+; ordered atomic to ordered atomic - can't forward
+define i32 @non_local_pre4(i32* %P1) {
+; CHECK-LABEL: @non_local_pre4(
+; CHECK: %a = load atomic i32, i32* %P1 seq_cst
+; CHECK: %b = load atomic i32, i32* %P1 seq_cst
+; CHECK: ret i32 %b
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ call void @clobber()
+ br label %next
+next:
+ %b = load atomic i32, i32* %P1 seq_cst, align 4
+ ret i32 %b
+}
+
+; can't remove volatile on any path
+define i32 @non_local_pre5(i32* %P1) {
+; CHECK-LABEL: @non_local_pre5(
+; CHECK: %a = load atomic i32, i32* %P1 seq_cst
+; CHECK: %b = load volatile i32, i32* %P1
+; CHECK: ret i32 %b
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ call void @clobber()
+ br label %next
+next:
+ %b = load volatile i32, i32* %P1
+ ret i32 %b
+}
+
+
+; ordered atomic to unordered atomic
+define i32 @non_local_pre6(i32* %P1) {
+; CHECK-LABEL: @non_local_pre6(
+; CHECK: load atomic i32, i32* %P1 seq_cst
+; CHECK: load atomic i32, i32* %P1 unordered
+; CHECK: %b = phi i32 [ %b.pre, %early ], [ %a, %0 ]
+; CHECK: ret i32 %b
+ %a = load atomic i32, i32* %P1 seq_cst, align 4
+ %cmp = icmp eq i32 %a, 0
+ br i1 %cmp, label %early, label %next
+early:
+ call void @clobber()
+ br label %next
+next:
+ %b = load atomic i32, i32* %P1 unordered, align 4
+ ret i32 %b
+}
+
diff --git a/test/Transforms/GVN/invariant-load.ll b/test/Transforms/GVN/PRE/invariant-load.ll
index f74fd3392c18..f74fd3392c18 100644
--- a/test/Transforms/GVN/invariant-load.ll
+++ b/test/Transforms/GVN/PRE/invariant-load.ll
diff --git a/test/Transforms/GVN/PRE/load-metadata.ll b/test/Transforms/GVN/PRE/load-metadata.ll
new file mode 100644
index 000000000000..3294cda7ee90
--- /dev/null
+++ b/test/Transforms/GVN/PRE/load-metadata.ll
@@ -0,0 +1,24 @@
+; RUN: opt -S -gvn < %s | FileCheck %s
+
+define i32 @test1(i32* %p, i1 %C) {
+; CHECK-LABEL: @test1(
+block1:
+ br i1 %C, label %block2, label %block3
+
+block2:
+ br label %block4
+; CHECK: block2:
+; CHECK-NEXT: load i32, i32* %p, !range !0, !invariant.group !1
+
+block3:
+ store i32 0, i32* %p
+ br label %block4
+
+block4:
+ %PRE = load i32, i32* %p, !range !0, !invariant.group !1
+ ret i32 %PRE
+}
+
+
+!0 = !{i32 40, i32 100}
+!1 = !{!"magic ptr"}
diff --git a/test/Transforms/GVN/load-pre-align.ll b/test/Transforms/GVN/PRE/load-pre-align.ll
index 1198cafaeed9..1198cafaeed9 100644
--- a/test/Transforms/GVN/load-pre-align.ll
+++ b/test/Transforms/GVN/PRE/load-pre-align.ll
diff --git a/test/Transforms/GVN/load-pre-licm.ll b/test/Transforms/GVN/PRE/load-pre-licm.ll
index d14b01caf779..d14b01caf779 100644
--- a/test/Transforms/GVN/load-pre-licm.ll
+++ b/test/Transforms/GVN/PRE/load-pre-licm.ll
diff --git a/test/Transforms/GVN/load-pre-nonlocal.ll b/test/Transforms/GVN/PRE/load-pre-nonlocal.ll
index e0e886653076..1e5e0912cd72 100644
--- a/test/Transforms/GVN/load-pre-nonlocal.ll
+++ b/test/Transforms/GVN/PRE/load-pre-nonlocal.ll
@@ -92,6 +92,7 @@ if.end:
!6 = !{!"int", !3, i64 0}
!llvm.module.flags = !{!7, !8, !9}
+!llvm.dbg.cu = !{!18}
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{i32 1, !"PIC Level", i32 2}
@@ -99,9 +100,12 @@ if.end:
!10 = !{}
!11 = !DISubroutineType(types: !10)
!12 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!13 = distinct !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !10)
+!13 = distinct !DISubprogram(name: "test", scope: !12, file: !12, line: 99, type: !11, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, unit: !18, variables: !10)
!14 = !DILocation(line: 100, column: 1, scope: !13)
!15 = !DILocation(line: 101, column: 1, scope: !13)
!16 = !DILocation(line: 102, column: 1, scope: !13)
!17 = !DILocation(line: 103, column: 1, scope: !13)
-
+!18 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !12,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
diff --git a/test/Transforms/GVN/local-pre.ll b/test/Transforms/GVN/PRE/local-pre.ll
index 2c92699dca91..943f351f17a9 100644
--- a/test/Transforms/GVN/local-pre.ll
+++ b/test/Transforms/GVN/PRE/local-pre.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -enable-pre -S | grep "b.pre"
+; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
define i32 @main(i32 %p, i32 %q) {
block1:
@@ -11,8 +11,12 @@ block2:
block3:
br label %block4
+; CHECK: %.pre = add i32 %p, 1
+; CHECK-NEXT: br label %block4
block4:
%b = add i32 %p, 1
ret i32 %b
+; CHECK: %b.pre-phi = phi i32 [ %.pre, %block3 ], [ %a, %block2 ]
+; CHECK-NEXT: ret i32 %b.pre-phi
}
diff --git a/test/Transforms/GVN/lpre-call-wrap-2.ll b/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll
index 5dc779ef40d1..5dc779ef40d1 100644
--- a/test/Transforms/GVN/lpre-call-wrap-2.ll
+++ b/test/Transforms/GVN/PRE/lpre-call-wrap-2.ll
diff --git a/test/Transforms/GVN/lpre-call-wrap.ll b/test/Transforms/GVN/PRE/lpre-call-wrap.ll
index 274830549394..274830549394 100644
--- a/test/Transforms/GVN/lpre-call-wrap.ll
+++ b/test/Transforms/GVN/PRE/lpre-call-wrap.ll
diff --git a/test/Transforms/GVN/phi-translate.ll b/test/Transforms/GVN/PRE/phi-translate.ll
index 67036ab9746c..42335486910c 100644
--- a/test/Transforms/GVN/phi-translate.ll
+++ b/test/Transforms/GVN/PRE/phi-translate.ll
@@ -37,6 +37,7 @@ end:
}
!llvm.module.flags = !{!0, !1, !2}
+!llvm.dbg.cu = !{!12}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"PIC Level", i32 2}
@@ -44,9 +45,13 @@ end:
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "a.cc", directory: "/tmp")
-!6 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
+!6 = distinct !DISubprogram(name: "foo", scope: !5, file: !5, line: 42, type: !4, isLocal: false, isDefinition: true, scopeLine: 43, flags: DIFlagPrototyped, isOptimized: false, unit: !12, variables: !3)
!7 = !DILocation(line: 43, column: 1, scope: !6)
!8 = !DILocation(line: 44, column: 1, scope: !6)
!9 = !DILocation(line: 45, column: 1, scope: !6)
!10 = !DILocation(line: 46, column: 1, scope: !6)
!11 = !DILocation(line: 47, column: 1, scope: !6)
+!12 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !5,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
diff --git a/test/Transforms/GVN/pre-basic-add.ll b/test/Transforms/GVN/PRE/pre-basic-add.ll
index fa4e2e3abded..f4000c56c358 100644
--- a/test/Transforms/GVN/pre-basic-add.ll
+++ b/test/Transforms/GVN/PRE/pre-basic-add.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -enable-pre -S | grep ".pre"
+; RUN: opt < %s -gvn -enable-pre -S | FileCheck %s
@H = common global i32 0 ; <i32*> [#uses=2]
@G = common global i32 0 ; <i32*> [#uses=1]
@@ -12,6 +12,7 @@ entry:
bb: ; preds = %entry
%3 = add i32 %0, 42 ; <i32> [#uses=1]
+; CHECK: %.pre = add i32 %0, 42
store i32 %3, i32* @G, align 4
br label %bb1
@@ -20,6 +21,10 @@ bb1: ; preds = %bb, %entry
store i32 %4, i32* @H, align 4
br label %return
+; CHECK: %.pre-phi = phi i32 [ %.pre, %entry.bb1_crit_edge ], [ %3, %bb ]
+; CHECK-NEXT: store i32 %.pre-phi, i32* @H, align 4
+; CHECK-NEXT: ret i32 0
+
return: ; preds = %bb1
ret i32 0
}
diff --git a/test/Transforms/GVN/pre-gep-load.ll b/test/Transforms/GVN/PRE/pre-gep-load.ll
index a46dc22ade89..9eec8bb6455b 100644
--- a/test/Transforms/GVN/pre-gep-load.ll
+++ b/test/Transforms/GVN/PRE/pre-gep-load.ll
@@ -1,4 +1,6 @@
; RUN: opt < %s -basicaa -gvn -enable-load-pre -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=gvn -enable-load-pre -S | FileCheck %s
+
target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
target triple = "aarch64--linux-gnu"
@@ -34,7 +36,7 @@ sw.bb2: ; preds = %if.end, %entry
%arrayidx5 = getelementptr inbounds double, double* %2, i64 %idxprom3
%3 = load double, double* %arrayidx5, align 8
; CHECK: sw.bb2:
-; CHECK-NEXT-NOT: sext
+; CHECK-NOT: sext
; CHECK-NEXT: phi double [
; CHECK-NOT: load
%sub6 = fsub double 3.000000e+00, %3
diff --git a/test/Transforms/GVN/pre-load.ll b/test/Transforms/GVN/PRE/pre-load.ll
index 685df24f62b6..685df24f62b6 100644
--- a/test/Transforms/GVN/pre-load.ll
+++ b/test/Transforms/GVN/PRE/pre-load.ll
diff --git a/test/Transforms/GVN/pre-no-cost-phi.ll b/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
index 4c5afa12ddc1..4c5afa12ddc1 100644
--- a/test/Transforms/GVN/pre-no-cost-phi.ll
+++ b/test/Transforms/GVN/PRE/pre-no-cost-phi.ll
diff --git a/test/Transforms/GVN/pre-single-pred.ll b/test/Transforms/GVN/PRE/pre-single-pred.ll
index 0df45cf5c1c0..0df45cf5c1c0 100644
--- a/test/Transforms/GVN/pre-single-pred.ll
+++ b/test/Transforms/GVN/PRE/pre-single-pred.ll
diff --git a/test/Transforms/GVN/preserve-tbaa.ll b/test/Transforms/GVN/PRE/preserve-tbaa.ll
index 19467eeff404..19467eeff404 100644
--- a/test/Transforms/GVN/preserve-tbaa.ll
+++ b/test/Transforms/GVN/PRE/preserve-tbaa.ll
diff --git a/test/Transforms/GVN/rle-phi-translate.ll b/test/Transforms/GVN/PRE/rle-phi-translate.ll
index 7402e1a1fc00..7402e1a1fc00 100644
--- a/test/Transforms/GVN/rle-phi-translate.ll
+++ b/test/Transforms/GVN/PRE/rle-phi-translate.ll
diff --git a/test/Transforms/GVN/PRE/rle-semidominated.ll b/test/Transforms/GVN/PRE/rle-semidominated.ll
new file mode 100644
index 000000000000..f9704d3efbff
--- /dev/null
+++ b/test/Transforms/GVN/PRE/rle-semidominated.ll
@@ -0,0 +1,36 @@
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
+
+define i32 @main(i32* %p, i32 %x, i32 %y) {
+block1:
+ %z = load i32, i32* %p
+ %cmp = icmp eq i32 %x, %y
+ br i1 %cmp, label %block2, label %block3
+
+block2:
+ br label %block4
+
+block3:
+ %b = bitcast i32 0 to i32
+ store i32 %b, i32* %p
+ br label %block4
+
+block4:
+ %DEAD = load i32, i32* %p
+ ret i32 %DEAD
+}
+
+; CHECK: define i32 @main(i32* %p, i32 %x, i32 %y) {
+; CHECK-NEXT: block1:
+; CHECK-NOT: %z = load i32, i32* %p
+; CHECK-NEXT: %cmp = icmp eq i32 %x, %y
+; CHECK-NEXT: br i1 %cmp, label %block2, label %block3
+; CHECK: block2:
+; CHECK-NEXT: %DEAD.pre = load i32, i32* %p
+; CHECK-NEXT: br label %block4
+; CHECK: block3:
+; CHECK-NEXT: store i32 0, i32* %p
+; CHECK-NEXT: br label %block4
+; CHECK: block4:
+; CHECK-NEXT: %DEAD = phi i32 [ 0, %block3 ], [ %DEAD.pre, %block2 ]
+; CHECK-NEXT: ret i32 %DEAD
+; CHECK-NEXT: }
diff --git a/test/Transforms/GVN/rle.ll b/test/Transforms/GVN/PRE/rle.ll
index 3f42135b4092..3f42135b4092 100644
--- a/test/Transforms/GVN/rle.ll
+++ b/test/Transforms/GVN/PRE/rle.ll
diff --git a/test/Transforms/GVN/volatile.ll b/test/Transforms/GVN/PRE/volatile.ll
index b31058db4ea8..ccc5bbfa48e4 100644
--- a/test/Transforms/GVN/volatile.ll
+++ b/test/Transforms/GVN/PRE/volatile.ll
@@ -152,6 +152,16 @@ exit:
ret i32 %add
}
+define i32 @test9(i32* %V) {
+entry:
+ %load = load volatile i32, i32* %V, !range !0
+ ret i32 %load
+}
+; CHECK-LABEL: test9
+; CHECK: load volatile
+; CHECK: ret i32 0
+
declare void @use(i32) readonly
declare void @clobber(i32* %p, i32* %q)
+!0 = !{ i32 0, i32 1 }
diff --git a/test/Transforms/GVN/assume-equal.ll b/test/Transforms/GVN/assume-equal.ll
index f9304a8fc7c6..d423c1685e1d 100644
--- a/test/Transforms/GVN/assume-equal.ll
+++ b/test/Transforms/GVN/assume-equal.ll
@@ -226,6 +226,46 @@ bb3:
ret i32 17
}
+; This test checks if GVN can do the constant propagation correctly
+; when there are multiple uses of the same assume value in the
+; basic block that has a loop back-edge pointing to itself.
+;
+; CHECK-LABEL: define i32 @_Z1il(i32 %val, i1 %k)
+define i32 @_Z1il(i32 %val, i1 %k) {
+ br label %next
+
+next:
+; CHECK: tail call void @llvm.assume(i1 %k)
+; CHECK-NEXT: %cmp = icmp eq i32 %val, 50
+ tail call void @llvm.assume(i1 %k)
+ tail call void @llvm.assume(i1 %k)
+ %cmp = icmp eq i32 %val, 50
+ br i1 %cmp, label %next, label %meh
+
+meh:
+ ret i32 0
+}
+
+; This test checks if GVN can prevent the constant propagation correctly
+; in the successor blocks that are not dominated by the basic block
+; with the assume instruction.
+;
+; CHECK-LABEL: define i1 @_z1im(i32 %val, i1 %k, i1 %j)
+define i1 @_z1im(i32 %val, i1 %k, i1 %j) {
+ br i1 %j, label %next, label %meh
+
+next:
+; CHECK: tail call void @llvm.assume(i1 %k)
+; CHECK-NEXT: br label %meh
+ tail call void @llvm.assume(i1 %k)
+ tail call void @llvm.assume(i1 %k)
+ br label %meh
+
+meh:
+; CHECK: ret i1 %k
+ ret i1 %k
+}
+
declare noalias i8* @_Znwm(i64)
declare void @_ZN1AC1Ev(%struct.A*)
declare void @llvm.assume(i1)
diff --git a/test/Transforms/GVN/atomic.ll b/test/Transforms/GVN/atomic.ll
deleted file mode 100644
index 11b54f39756b..000000000000
--- a/test/Transforms/GVN/atomic.ll
+++ /dev/null
@@ -1,109 +0,0 @@
-; RUN: opt -basicaa -gvn -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.7.0"
-
-@x = common global i32 0, align 4
-@y = common global i32 0, align 4
-
-; GVN across unordered store (allowed)
-define i32 @test1() nounwind uwtable ssp {
-; CHECK-LABEL: test1
-; CHECK: add i32 %x, %x
-entry:
- %x = load i32, i32* @y
- store atomic i32 %x, i32* @x unordered, align 4
- %y = load i32, i32* @y
- %z = add i32 %x, %y
- ret i32 %z
-}
-
-; GVN across unordered load (allowed)
-define i32 @test3() nounwind uwtable ssp {
-; CHECK-LABEL: test3
-; CHECK: add i32 %x, %x
-entry:
- %x = load i32, i32* @y
- %y = load atomic i32, i32* @x unordered, align 4
- %z = load i32, i32* @y
- %a = add i32 %x, %z
- %b = add i32 %y, %a
- ret i32 %b
-}
-
-; GVN load to unordered load (allowed)
-define i32 @test5() nounwind uwtable ssp {
-; CHECK-LABEL: test5
-; CHECK: add i32 %x, %x
-entry:
- %x = load atomic i32, i32* @x unordered, align 4
- %y = load i32, i32* @x
- %z = add i32 %x, %y
- ret i32 %z
-}
-
-; GVN unordered load to load (unordered load must not be removed)
-define i32 @test6() nounwind uwtable ssp {
-; CHECK-LABEL: test6
-; CHECK: load atomic i32, i32* @x unordered
-entry:
- %x = load i32, i32* @x
- %x2 = load atomic i32, i32* @x unordered, align 4
- %x3 = add i32 %x, %x2
- ret i32 %x3
-}
-
-; GVN across release-acquire pair (forbidden)
-define i32 @test7() nounwind uwtable ssp {
-; CHECK-LABEL: test7
-; CHECK: add i32 %x, %y
-entry:
- %x = load i32, i32* @y
- store atomic i32 %x, i32* @x release, align 4
- %w = load atomic i32, i32* @x acquire, align 4
- %y = load i32, i32* @y
- %z = add i32 %x, %y
- ret i32 %z
-}
-
-; GVN across monotonic store (allowed)
-define i32 @test9() nounwind uwtable ssp {
-; CHECK-LABEL: test9
-; CHECK: add i32 %x, %x
-entry:
- %x = load i32, i32* @y
- store atomic i32 %x, i32* @x monotonic, align 4
- %y = load i32, i32* @y
- %z = add i32 %x, %y
- ret i32 %z
-}
-
-; GVN of an unordered across monotonic load (not allowed)
-define i32 @test10() nounwind uwtable ssp {
-; CHECK-LABEL: test10
-; CHECK: add i32 %x, %y
-entry:
- %x = load atomic i32, i32* @y unordered, align 4
- %clobber = load atomic i32, i32* @x monotonic, align 4
- %y = load atomic i32, i32* @y monotonic, align 4
- %z = add i32 %x, %y
- ret i32 %z
-}
-
-define i32 @PR22708(i1 %flag) {
-; CHECK-LABEL: PR22708
-entry:
- br i1 %flag, label %if.then, label %if.end
-
-if.then:
- store i32 43, i32* @y, align 4
-; CHECK: store i32 43, i32* @y, align 4
- br label %if.end
-
-if.end:
- load atomic i32, i32* @x acquire, align 4
- %load = load i32, i32* @y, align 4
-; CHECK: load atomic i32, i32* @x acquire, align 4
-; CHECK: load i32, i32* @y, align 4
- ret i32 %load
-}
diff --git a/test/Transforms/GVN/basic.ll b/test/Transforms/GVN/basic.ll
index 6f4aace45fde..44b321306319 100644
--- a/test/Transforms/GVN/basic.ll
+++ b/test/Transforms/GVN/basic.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -gvn -S | not grep "%z2 ="
+; RUN: opt < %s -gvn -S | FileCheck %s
+; RUN: opt < %s -passes=gvn -S | FileCheck %s
define i32 @main() {
block1:
@@ -8,3 +9,8 @@ block2:
%z2 = bitcast i32 0 to i32
ret i32 %z2
}
+
+; CHECK: define i32 @main() {
+; CHECK-NEXT: block1:
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
diff --git a/test/Transforms/GVN/big-endian.ll b/test/Transforms/GVN/big-endian.ll
new file mode 100644
index 000000000000..faaf3ef465b1
--- /dev/null
+++ b/test/Transforms/GVN/big-endian.ll
@@ -0,0 +1,40 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-unknown-linux-gnu"
+
+;; Make sure we use correct bit shift based on storage size for
+;; loads reusing a load value.
+define i64 @test1({ i1, i8 }* %predA, { i1, i8 }* %predB) {
+; CHECK-LABEL: @test1
+; CHECK: [[V1:%.*]] = load i16, i16* %{{.*}}
+; CHECK: [[V2:%.*]] = lshr i16 [[V1]], 8
+; CHECK: trunc i16 [[V2]] to i1
+
+ %valueLoadA.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 0
+ %valueLoadA.fca.0.load = load i1, i1* %valueLoadA.fca.0.gep, align 8
+ %valueLoadB.fca.0.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 0
+ %valueLoadB.fca.0.load = load i1, i1* %valueLoadB.fca.0.gep, align 8
+ %isTrue = and i1 %valueLoadA.fca.0.load, %valueLoadB.fca.0.load
+ %valueLoadA.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predA, i64 0, i32 1
+ %valueLoadA.fca.1.load = load i8, i8* %valueLoadA.fca.1.gep, align 1
+ %isNotNullA = icmp ne i8 %valueLoadA.fca.1.load, 0
+ %valueLoadB.fca.1.gep = getelementptr inbounds { i1, i8 }, { i1, i8 }* %predB, i64 0, i32 1
+ %valueLoadB.fca.1.load = load i8, i8* %valueLoadB.fca.1.gep, align 1
+ %isNotNullB = icmp ne i8 %valueLoadB.fca.1.load, 0
+ %isNotNull = and i1 %isNotNullA, %isNotNullB
+ %isTrueAndNotNull = and i1 %isTrue, %isNotNull
+ %ret = zext i1 %isTrueAndNotNull to i64
+ ret i64 %ret
+}
+
+;; And likewise for loads reusing a store value.
+define i1 @test2(i8 %V, i8* %P) {
+; CHECK-LABEL: @test2
+; CHECK-NOT: lshr
+ store i8 %V, i8* %P
+ %P2 = bitcast i8* %P to i1*
+ %A = load i1, i1* %P2
+ ret i1 %A
+}
+
diff --git a/test/Transforms/GVN/bitcast-of-call.ll b/test/Transforms/GVN/bitcast-of-call.ll
index 55b4b6e9d317..930e4d7373d3 100644
--- a/test/Transforms/GVN/bitcast-of-call.ll
+++ b/test/Transforms/GVN/bitcast-of-call.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -gvn -S | not grep tmp2
+; RUN: opt < %s -gvn -S | FileCheck %s
; PR2213
define i32* @f(i8* %x) {
@@ -6,6 +6,7 @@ entry:
%tmp = call i8* @m( i32 12 ) ; <i8*> [#uses=2]
%tmp1 = bitcast i8* %tmp to i32* ; <i32*> [#uses=0]
%tmp2 = bitcast i8* %tmp to i32* ; <i32*> [#uses=0]
+; CHECK-NOT: %tmp2
ret i32* %tmp2
}
diff --git a/test/Transforms/GVN/calls-nonlocal.ll b/test/Transforms/GVN/calls-nonlocal.ll
index 24ef2e9ec412..d3c03d6c78d7 100644
--- a/test/Transforms/GVN/calls-nonlocal.ll
+++ b/test/Transforms/GVN/calls-nonlocal.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -basicaa -gvn -S | grep strlen | count 2
+; Two occurrences of strlen should be zapped.
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9"
@@ -46,4 +47,29 @@ return: ; preds = %bb27
ret i32 %tmp.0
}
+; CHECK: define i32 @test(i32 %g, i8* %P) #0 {
+; CHECK: entry:
+; CHECK: %tmp2 = call i32 @strlen(i8* %P) #1
+; CHECK: %tmp3 = icmp eq i32 %tmp2, 100
+; CHECK: %tmp34 = zext i1 %tmp3 to i8
+; CHECK: br i1 %tmp3, label %bb, label %bb6
+; CHECK: bb:
+; CHECK: br label %bb27
+; CHECK: bb6:
+; CHECK: %tmp8 = add i32 %g, 42
+; CHECK: br i1 false, label %bb14, label %bb16
+; CHECK: bb14:
+; CHECK: br label %bb27
+; CHECK: bb16:
+; CHECK: %tmp18 = mul i32 %tmp8, 2
+; CHECK: br i1 false, label %bb24, label %bb26
+; CHECK: bb24:
+; CHECK: br label %bb27
+; CHECK: bb26:
+; CHECK: br label %bb27
+; CHECK: bb27:
+; CHECK: %tmp.0 = phi i32 [ 11, %bb26 ], [ undef, %bb24 ], [ undef, %bb14 ], [ %g, %bb ]
+; CHECK: ret i32 %tmp.0
+; CHECK: }
+
declare i32 @strlen(i8*) nounwind readonly
diff --git a/test/Transforms/GVN/calls-readonly.ll b/test/Transforms/GVN/calls-readonly.ll
index ba8ad661eabf..35b69d49ac71 100644
--- a/test/Transforms/GVN/calls-readonly.ll
+++ b/test/Transforms/GVN/calls-readonly.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep "call.*strlen" | count 1
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
; Should delete the second call to strlen even though the intervening strchr call exists.
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
@@ -24,6 +24,22 @@ bb1: ; preds = %bb, %entry
ret i8* %6
}
+; CHECK: define i8* @test(i8* %P, i8* %Q, i32 %x, i32 %y) #0 {
+; CHECK: entry:
+; CHECK-NEXT: %0 = tail call i32 @strlen(i8* %P)
+; CHECK-NEXT: %1 = icmp eq i32 %0, 0
+; CHECK-NEXT: br i1 %1, label %bb, label %bb1
+; CHECK: bb:
+; CHECK-NEXT: %2 = sdiv i32 %x, %y
+; CHECK-NEXT: br label %bb1
+; CHECK: bb1:
+; CHECK-NEXT: %x_addr.0 = phi i32 [ %2, %bb ], [ %x, %entry ]
+; CHECK-NEXT: %3 = tail call i8* @strchr(i8* %Q, i32 97)
+; CHECK-NEXT: %4 = add i32 %x_addr.0, %0
+; CHECK-NEXT: %5 = getelementptr i8, i8* %3, i32 %x_addr.0
+; CHECK-NEXT: ret i8* %5
+; CHECK: }
+
declare i32 @strlen(i8*) nounwind readonly
declare i8* @strchr(i8*, i32) nounwind readonly
diff --git a/test/Transforms/GVN/fence.ll b/test/Transforms/GVN/fence.ll
new file mode 100644
index 000000000000..f68861dad1ac
--- /dev/null
+++ b/test/Transforms/GVN/fence.ll
@@ -0,0 +1,69 @@
+; RUN: opt -S -basicaa -gvn < %s | FileCheck %s
+
+; We can value forward across the fence since we can (semantically)
+; reorder the following load before the fence.
+define i32 @test(i32* %addr.i) {
+; CHECK-LABEL: @test
+; CHECK: store
+; CHECK: fence
+; CHECK-NOT: load
+; CHECK: ret
+ store i32 5, i32* %addr.i, align 4
+ fence release
+ %a = load i32, i32* %addr.i, align 4
+ ret i32 %a
+}
+
+; Same as above
+define i32 @test2(i32* %addr.i) {
+; CHECK-LABEL: @test2
+; CHECK-NEXT: fence
+; CHECK-NOT: load
+; CHECK: ret
+ %a = load i32, i32* %addr.i, align 4
+ fence release
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %res
+}
+
+; We can not value forward across an acquire barrier since we might
+; be syncronizing with another thread storing to the same variable
+; followed by a release fence. This is not so much enforcing an
+; ordering property (though it is that too), but a liveness
+; property. We expect to eventually see the value of store by
+; another thread when spinning on that location.
+define i32 @test3(i32* noalias %addr.i, i32* noalias %otheraddr) {
+; CHECK-LABEL: @test3
+; CHECK: load
+; CHECK: fence
+; CHECK: load
+; CHECK: ret i32 %res
+ ; the following code is intented to model the unrolling of
+ ; two iterations in a spin loop of the form:
+ ; do { fence acquire: tmp = *%addr.i; ) while (!tmp);
+ ; It's hopefully clear that allowing PRE to turn this into:
+ ; if (!*%addr.i) while(true) {} would be unfortunate
+ fence acquire
+ %a = load i32, i32* %addr.i, align 4
+ fence acquire
+ %a2 = load i32, i32* %addr.i, align 4
+ %res = sub i32 %a, %a2
+ ret i32 %res
+}
+
+; Another example of why forwarding across an acquire fence is problematic
+; can be seen in a normal locking operation. Say we had:
+; *p = 5; unlock(l); lock(l); use(p);
+; forwarding the store to p would be invalid. A reasonable implementation
+; of unlock and lock might be:
+; unlock() { atomicrmw sub %l, 1 unordered; fence release }
+; lock() {
+; do {
+; %res = cmpxchg %p, 0, 1, monotonic monotonic
+; } while(!%res.success)
+; fence acquire;
+; }
+; Given we chose to forward across the release fence, we clearly can't forward
+; across the acquire fence as well.
+
diff --git a/test/Transforms/GVN/flags.ll b/test/Transforms/GVN/flags.ll
new file mode 100644
index 000000000000..1b44905bb5c7
--- /dev/null
+++ b/test/Transforms/GVN/flags.ll
@@ -0,0 +1,18 @@
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+declare void @use(i1)
+
+define void @test1(float %x, float %y) {
+entry:
+ %cmp1 = fcmp nnan oeq float %y, %x
+ %cmp2 = fcmp oeq float %x, %y
+ call void @use(i1 %cmp1)
+ call void @use(i1 %cmp2)
+ ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[cmp:.*]] = fcmp oeq float %y, %x
+; CHECK-NEXT: call void @use(i1 %[[cmp]])
+; CHECK-NEXT: call void @use(i1 %[[cmp]])
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/GVN/fold-const-expr.ll b/test/Transforms/GVN/fold-const-expr.ll
new file mode 100644
index 000000000000..562f39c4e8a1
--- /dev/null
+++ b/test/Transforms/GVN/fold-const-expr.ll
@@ -0,0 +1,99 @@
+; GVN failed to do constant expression folding and expanded
+; them unfolded in many places, producing exponentially large const
+; expressions. As a result, the compilation never fisished.
+; This test checks that we are folding constant expression
+; PR 28418
+; RUN: opt -gvn -S < %s | FileCheck %s
+
+%2 = type { i32, i32, i32, i32, i32 }
+define i32 @_Z16vector3util_mainv(i32 %x, i32 %y) {
+ %tmp1 = alloca %2, align 4
+ %tmp114 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1
+ %tmp115 = bitcast i32* %tmp114 to <4 x i32>*
+ store <4 x i32> <i32 234567891, i32 345678912, i32 456789123, i32 0>, <4 x i32>* %tmp115, align 4
+ %tmp1683 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1
+ %tmp1688 = load i32, i32* %tmp1683, align 4
+ %tmp1693 = shl i32 %tmp1688, 5
+ %tmp1694 = xor i32 %tmp1693, %tmp1688
+ %tmp1695 = lshr i32 %tmp1694, 7
+ %tmp1696 = xor i32 %tmp1695, %tmp1694
+ %tmp1697 = shl i32 %tmp1696, 22
+ %tmp1698 = xor i32 %tmp1697, %tmp1696
+ %tmp1707 = shl i32 %tmp1698, 5
+ %tmp1708 = xor i32 %tmp1707, %tmp1698
+ %tmp1709 = lshr i32 %tmp1708, 7
+ %tmp1710 = xor i32 %tmp1709, %tmp1708
+ %tmp1711 = shl i32 %tmp1710, 22
+ %tmp1712 = xor i32 %tmp1711, %tmp1710
+ %tmp1721 = shl i32 %tmp1712, 5
+ %tmp1722 = xor i32 %tmp1721, %tmp1712
+ %tmp1723 = lshr i32 %tmp1722, 7
+ %tmp1724 = xor i32 %tmp1723, %tmp1722
+ %tmp1725 = shl i32 %tmp1724, 22
+ %tmp1726 = xor i32 %tmp1725, %tmp1724
+ %tmp1735 = shl i32 %tmp1726, 5
+ %tmp1736 = xor i32 %tmp1735, %tmp1726
+ %tmp1737 = lshr i32 %tmp1736, 7
+ %tmp1738 = xor i32 %tmp1737, %tmp1736
+ %tmp1739 = shl i32 %tmp1738, 22
+ %tmp1740 = xor i32 %tmp1739, %tmp1738
+ store i32 %tmp1740, i32* %tmp1683, align 4
+; CHECK: store i32 310393545, i32* %tmp114, align 4
+ %tmp1756 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1
+ %tmp1761 = load i32, i32* %tmp1756, align 4
+ %tmp1766 = shl i32 %tmp1761, 5
+ %tmp1767 = xor i32 %tmp1766, %tmp1761
+ %tmp1768 = lshr i32 %tmp1767, 7
+ %tmp1769 = xor i32 %tmp1768, %tmp1767
+ %tmp1770 = shl i32 %tmp1769, 22
+ %tmp1771 = xor i32 %tmp1770, %tmp1769
+ %tmp1780 = shl i32 %tmp1771, 5
+ %tmp1781 = xor i32 %tmp1780, %tmp1771
+ %tmp1782 = lshr i32 %tmp1781, 7
+ %tmp1783 = xor i32 %tmp1782, %tmp1781
+ %tmp1784 = shl i32 %tmp1783, 22
+ %tmp1785 = xor i32 %tmp1784, %tmp1783
+ %tmp1794 = shl i32 %tmp1785, 5
+ %tmp1795 = xor i32 %tmp1794, %tmp1785
+ %tmp1796 = lshr i32 %tmp1795, 7
+ %tmp1797 = xor i32 %tmp1796, %tmp1795
+ %tmp1798 = shl i32 %tmp1797, 22
+ %tmp1799 = xor i32 %tmp1798, %tmp1797
+ %tmp1808 = shl i32 %tmp1799, 5
+ %tmp1809 = xor i32 %tmp1808, %tmp1799
+ %tmp1810 = lshr i32 %tmp1809, 7
+ %tmp1811 = xor i32 %tmp1810, %tmp1809
+ %tmp1812 = shl i32 %tmp1811, 22
+ %tmp1813 = xor i32 %tmp1812, %tmp1811
+ store i32 %tmp1813, i32* %tmp1756, align 4
+; CHECK: store i32 -383584258, i32* %tmp114, align 4
+ %tmp2645 = getelementptr inbounds %2, %2* %tmp1, i64 0, i32 1
+ %tmp2650 = load i32, i32* %tmp2645, align 4
+ %tmp2655 = shl i32 %tmp2650, 5
+ %tmp2656 = xor i32 %tmp2655, %tmp2650
+ %tmp2657 = lshr i32 %tmp2656, 7
+ %tmp2658 = xor i32 %tmp2657, %tmp2656
+ %tmp2659 = shl i32 %tmp2658, 22
+ %tmp2660 = xor i32 %tmp2659, %tmp2658
+ %tmp2669 = shl i32 %tmp2660, 5
+ %tmp2670 = xor i32 %tmp2669, %tmp2660
+ %tmp2671 = lshr i32 %tmp2670, 7
+ %tmp2672 = xor i32 %tmp2671, %tmp2670
+ %tmp2673 = shl i32 %tmp2672, 22
+ %tmp2674 = xor i32 %tmp2673, %tmp2672
+ %tmp2683 = shl i32 %tmp2674, 5
+ %tmp2684 = xor i32 %tmp2683, %tmp2674
+ %tmp2685 = lshr i32 %tmp2684, 7
+ %tmp2686 = xor i32 %tmp2685, %tmp2684
+ %tmp2687 = shl i32 %tmp2686, 22
+ %tmp2688 = xor i32 %tmp2687, %tmp2686
+ %tmp2697 = shl i32 %tmp2688, 5
+ %tmp2698 = xor i32 %tmp2697, %tmp2688
+ %tmp2699 = lshr i32 %tmp2698, 7
+ %tmp2700 = xor i32 %tmp2699, %tmp2698
+ %tmp2701 = shl i32 %tmp2700, 22
+ %tmp2702 = xor i32 %tmp2701, %tmp2700
+ store i32 %tmp2702, i32* %tmp2645, align 4
+; CHECK: store i32 -57163022, i32* %tmp114, align 4
+ ret i32 0
+}
diff --git a/test/Transforms/GVN/hoist-pr20242.ll b/test/Transforms/GVN/hoist-pr20242.ll
new file mode 100644
index 000000000000..b91f18a5cd64
--- /dev/null
+++ b/test/Transforms/GVN/hoist-pr20242.ll
@@ -0,0 +1,74 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that all "or" expressions are hoisted.
+; CHECK-LABEL: @encode
+; CHECK: or i32
+; CHECK-NOT: or i32
+
+define i8* @encode(i8* %p, i32 %v) {
+entry:
+ %p.addr = alloca i8*, align 8
+ %v.addr = alloca i32, align 4
+ store i8* %p, i8** %p.addr, align 8
+ store i32 %v, i32* %v.addr, align 4
+ %0 = load i32, i32* %v.addr, align 4
+ %cmp = icmp ult i32 %0, 23
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %1 = load i32, i32* %v.addr, align 4
+ %or = or i32 %1, 128
+ %conv = trunc i32 %or to i8
+ %2 = load i8*, i8** %p.addr, align 8
+ %incdec.ptr = getelementptr inbounds i8, i8* %2, i32 1
+ store i8* %incdec.ptr, i8** %p.addr, align 8
+ store i8 %conv, i8* %2, align 1
+ br label %if.end15
+
+if.else: ; preds = %entry
+ %3 = load i32, i32* %v.addr, align 4
+ %cmp1 = icmp ult i32 %3, 42
+ br i1 %cmp1, label %if.then3, label %if.else9
+
+if.then3: ; preds = %if.else
+ %4 = load i32, i32* %v.addr, align 4
+ %or4 = or i32 %4, 128
+ %conv5 = trunc i32 %or4 to i8
+ %5 = load i8*, i8** %p.addr, align 8
+ %incdec.ptr6 = getelementptr inbounds i8, i8* %5, i32 1
+ store i8* %incdec.ptr6, i8** %p.addr, align 8
+ store i8 %conv5, i8* %5, align 1
+ %6 = load i32, i32* %v.addr, align 4
+ %conv7 = trunc i32 %6 to i8
+ %7 = load i8*, i8** %p.addr, align 8
+ %incdec.ptr8 = getelementptr inbounds i8, i8* %7, i32 1
+ store i8* %incdec.ptr8, i8** %p.addr, align 8
+ store i8 %conv7, i8* %7, align 1
+ br label %if.end
+
+if.else9: ; preds = %if.else
+ %8 = load i32, i32* %v.addr, align 4
+ %or10 = or i32 %8, 128
+ %conv11 = trunc i32 %or10 to i8
+ %9 = load i8*, i8** %p.addr, align 8
+ %incdec.ptr12 = getelementptr inbounds i8, i8* %9, i32 1
+ store i8* %incdec.ptr12, i8** %p.addr, align 8
+ store i8 %conv11, i8* %9, align 1
+ %10 = load i32, i32* %v.addr, align 4
+ %shr = lshr i32 %10, 7
+ %conv13 = trunc i32 %shr to i8
+ %11 = load i8*, i8** %p.addr, align 8
+ %incdec.ptr14 = getelementptr inbounds i8, i8* %11, i32 1
+ store i8* %incdec.ptr14, i8** %p.addr, align 8
+ store i8 %conv13, i8* %11, align 1
+ br label %if.end
+
+if.end: ; preds = %if.else9, %if.then3
+ br label %if.end15
+
+if.end15: ; preds = %if.end, %if.then
+ %12 = load i8*, i8** %p.addr, align 8
+ ret i8* %12
+}
diff --git a/test/Transforms/GVN/hoist-pr22005.ll b/test/Transforms/GVN/hoist-pr22005.ll
new file mode 100644
index 000000000000..9299f4f48e55
--- /dev/null
+++ b/test/Transforms/GVN/hoist-pr22005.ll
@@ -0,0 +1,30 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+target datalayout = "e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Check that all "sub" expressions are hoisted.
+; CHECK-LABEL: @fun
+; CHECK: sub i64
+; CHECK-NOT: sub i64
+
+define i64 @fun(i8* %out, i8* %end) {
+ %1 = icmp ult i8* %out, %end
+ br i1 %1, label %2, label %6
+
+; <label>:2 ; preds = %0
+ %3 = ptrtoint i8* %end to i64
+ %4 = ptrtoint i8* %out to i64
+ %5 = sub i64 %3, %4
+ br label %10
+
+; <label>:6 ; preds = %0
+ %7 = ptrtoint i8* %out to i64
+ %8 = ptrtoint i8* %end to i64
+ %9 = sub i64 %8, %7
+ br label %10
+
+; <label>:10 ; preds = %6, %2
+ %.in = phi i64 [ %5, %2 ], [ %9, %6 ]
+ %11 = add i64 %.in, 257
+ ret i64 %11
+}
diff --git a/test/Transforms/GVN/hoist-pr28606.ll b/test/Transforms/GVN/hoist-pr28606.ll
new file mode 100644
index 000000000000..2c588283ea91
--- /dev/null
+++ b/test/Transforms/GVN/hoist-pr28606.ll
@@ -0,0 +1,50 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+%struct.S = type { i8* }
+
+declare void @f(<{ %struct.S }>* inalloca)
+
+
+; Check that we don't clone the %x alloca and insert it in the live range of
+; %argmem, which would break the inalloca contract.
+;
+; CHECK-LABEL: @test
+; CHECK: alloca i8
+; CHECK: stacksave
+; CHECK: alloca inalloca
+; CHECK-NOT: alloca i8
+
+; Check that store instructions are hoisted.
+; CHECK: store i8
+; CHECK-NOT: store i8
+; CHECK: stackrestore
+
+define void @test(i1 %b) {
+entry:
+ %x = alloca i8
+ %inalloca.save = call i8* @llvm.stacksave()
+ %argmem = alloca inalloca <{ %struct.S }>, align 4
+ %0 = getelementptr inbounds <{ %struct.S }>, <{ %struct.S }>* %argmem, i32 0, i32 0
+ br i1 %b, label %true, label %false
+
+true:
+ %p = getelementptr inbounds %struct.S, %struct.S* %0, i32 0, i32 0
+ store i8* %x, i8** %p, align 4
+ br label %exit
+
+false:
+ %p2 = getelementptr inbounds %struct.S, %struct.S* %0, i32 0, i32 0
+ store i8* %x, i8** %p2, align 4
+ br label %exit
+
+exit:
+ call void @f(<{ %struct.S }>* inalloca %argmem)
+ call void @llvm.stackrestore(i8* %inalloca.save)
+ ret void
+}
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
diff --git a/test/Transforms/GVN/hoist.ll b/test/Transforms/GVN/hoist.ll
new file mode 100644
index 000000000000..9c2c425a1a72
--- /dev/null
+++ b/test/Transforms/GVN/hoist.ll
@@ -0,0 +1,691 @@
+; RUN: opt -gvn-hoist -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@GlobalVar = internal global float 1.000000e+00
+
+; Check that all scalar expressions are hoisted.
+;
+; CHECK-LABEL: @scalarsHoisting
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fmul
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @scalarsHoisting(float %d, float %min, float %max, float %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %sub = fsub float %min, %a
+ %mul = fmul float %sub, %div
+ %sub1 = fsub float %max, %a
+ %mul2 = fmul float %sub1, %div
+ br label %if.end
+
+if.else: ; preds = %entry
+ %sub3 = fsub float %max, %a
+ %mul4 = fmul float %sub3, %div
+ %sub5 = fsub float %min, %a
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+ %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that all loads and scalars depending on the loads are hoisted.
+; Check that getelementptr computation gets hoisted before the load.
+;
+; CHECK-LABEL: @readsAndScalarsHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @readsAndScalarsHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %A = getelementptr float, float* %min, i32 1
+ %0 = load float, float* %A, align 4
+ %1 = load float, float* %a, align 4
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ br label %if.end
+
+if.else: ; preds = %entry
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %B = getelementptr float, float* %min, i32 1
+ %5 = load float, float* %B, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+ %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that we do not hoist loads after a store: the first two loads will be
+; hoisted, and then the third load will not be hoisted.
+;
+; CHECK-LABEL: @readsAndWrites
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: store
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @readsAndWrites(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+ store float %0, float* @GlobalVar
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ br label %if.end
+
+if.else: ; preds = %entry
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+ %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that we do hoist loads when the store is above the insertion point.
+;
+; CHECK-LABEL: @readsAndWriteAboveInsertPt
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @readsAndWriteAboveInsertPt(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ store float 0.000000e+00, float* @GlobalVar
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ br label %if.end
+
+if.else: ; preds = %entry
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+ %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that dependent expressions are hoisted.
+; CHECK-LABEL: @dependentScalarsHoisting
+; CHECK: fsub
+; CHECK: fadd
+; CHECK: fdiv
+; CHECK: fmul
+; CHECK-NOT: fsub
+; CHECK-NOT: fadd
+; CHECK-NOT: fdiv
+; CHECK-NOT: fmul
+define float @dependentScalarsHoisting(float %a, float %b, i1 %c) {
+entry:
+ br i1 %c, label %if.then, label %if.else
+
+if.then:
+ %d = fsub float %b, %a
+ %e = fadd float %d, %a
+ %f = fdiv float %e, %a
+ %g = fmul float %f, %a
+ br label %if.end
+
+if.else:
+ %h = fsub float %b, %a
+ %i = fadd float %h, %a
+ %j = fdiv float %i, %a
+ %k = fmul float %j, %a
+ br label %if.end
+
+if.end:
+ %r = phi float [ %g, %if.then ], [ %k, %if.else ]
+ ret float %r
+}
+
+; Check that all independent expressions are hoisted.
+; CHECK-LABEL: @independentScalarsHoisting
+; CHECK: fadd
+; CHECK: fsub
+; CHECK: fdiv
+; CHECK: fmul
+; CHECK-NOT: fsub
+; CHECK-NOT: fdiv
+; CHECK-NOT: fmul
+define float @independentScalarsHoisting(float %a, float %b, i1 %c) {
+entry:
+ br i1 %c, label %if.then, label %if.else
+
+if.then:
+ %d = fadd float %b, %a
+ %e = fsub float %b, %a
+ %f = fdiv float %b, %a
+ %g = fmul float %b, %a
+ br label %if.end
+
+if.else:
+ %i = fadd float %b, %a
+ %h = fsub float %b, %a
+ %j = fdiv float %b, %a
+ %k = fmul float %b, %a
+ br label %if.end
+
+if.end:
+ %p = phi float [ %d, %if.then ], [ %i, %if.else ]
+ %q = phi float [ %e, %if.then ], [ %h, %if.else ]
+ %r = phi float [ %f, %if.then ], [ %j, %if.else ]
+ %s = phi float [ %g, %if.then ], [ %k, %if.else ]
+ %t = fadd float %p, %q
+ %u = fadd float %r, %s
+ %v = fadd float %t, %u
+ ret float %v
+}
+
+; Check that we hoist load and scalar expressions in triangles.
+; CHECK-LABEL: @triangleHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @triangleHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ br label %if.end
+
+if.end: ; preds = %entry
+ %p1 = phi float [ %mul2, %if.then ], [ 0.000000e+00, %entry ]
+ %p2 = phi float [ %mul, %if.then ], [ 0.000000e+00, %entry ]
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+
+ %x = fadd float %p1, %mul6
+ %y = fadd float %p2, %mul4
+ %z = fadd float %x, %y
+ ret float %z
+}
+
+; Check that we hoist load and scalar expressions in dominator.
+; CHECK-LABEL: @dominatorHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @dominatorHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end: ; preds = %entry
+ %p1 = phi float [ %mul4, %if.then ], [ 0.000000e+00, %entry ]
+ %p2 = phi float [ %mul6, %if.then ], [ 0.000000e+00, %entry ]
+
+ %x = fadd float %p1, %mul2
+ %y = fadd float %p2, %mul
+ %z = fadd float %x, %y
+ ret float %z
+}
+
+; Check that we hoist load and scalar expressions in dominator.
+; CHECK-LABEL: @domHoisting
+; CHECK: load
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK: load
+; CHECK: fsub
+; CHECK: fmul
+; CHECK-NOT: load
+; CHECK-NOT: fmul
+; CHECK-NOT: fsub
+define float @domHoisting(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then:
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.else:
+ %6 = load float, float* %max, align 4
+ %7 = load float, float* %a, align 4
+ %sub9 = fsub float %6, %7
+ %mul10 = fmul float %sub9, %div
+ %8 = load float, float* %min, align 4
+ %sub12 = fsub float %8, %7
+ %mul13 = fmul float %sub12, %div
+ br label %if.end
+
+if.end:
+ %p1 = phi float [ %mul4, %if.then ], [ %mul10, %if.else ]
+ %p2 = phi float [ %mul6, %if.then ], [ %mul13, %if.else ]
+
+ %x = fadd float %p1, %mul2
+ %y = fadd float %p2, %mul
+ %z = fadd float %x, %y
+ ret float %z
+}
+
+; Check that we do not hoist loads past stores within a same basic block.
+; CHECK-LABEL: @noHoistInSingleBBWithStore
+; CHECK: load
+; CHECK: store
+; CHECK: load
+; CHECK: store
+define i32 @noHoistInSingleBBWithStore() {
+entry:
+ %D = alloca i32, align 4
+ %0 = bitcast i32* %D to i8*
+ %bf = load i8, i8* %0, align 4
+ %bf.clear = and i8 %bf, -3
+ store i8 %bf.clear, i8* %0, align 4
+ %bf1 = load i8, i8* %0, align 4
+ %bf.clear1 = and i8 %bf1, 1
+ store i8 %bf.clear1, i8* %0, align 4
+ ret i32 0
+}
+
+; Check that we do not hoist loads past calls within a same basic block.
+; CHECK-LABEL: @noHoistInSingleBBWithCall
+; CHECK: load
+; CHECK: call
+; CHECK: load
+declare void @foo()
+define i32 @noHoistInSingleBBWithCall() {
+entry:
+ %D = alloca i32, align 4
+ %0 = bitcast i32* %D to i8*
+ %bf = load i8, i8* %0, align 4
+ %bf.clear = and i8 %bf, -3
+ call void @foo()
+ %bf1 = load i8, i8* %0, align 4
+ %bf.clear1 = and i8 %bf1, 1
+ ret i32 0
+}
+
+; Check that we do not hoist loads past stores in any branch of a diamond.
+; CHECK-LABEL: @noHoistInDiamondWithOneStore1
+; CHECK: fdiv
+; CHECK: fcmp
+; CHECK: br
+define float @noHoistInDiamondWithOneStore1(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ store float 0.000000e+00, float* @GlobalVar
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ br label %if.end
+
+if.else: ; preds = %entry
+ ; There are no side effects on the if.else branch.
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+ %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+
+ %6 = load float, float* %max, align 4
+ %7 = load float, float* %a, align 4
+ %sub6 = fsub float %6, %7
+ %mul7 = fmul float %sub6, %div
+ %8 = load float, float* %min, align 4
+ %sub8 = fsub float %8, %7
+ %mul9 = fmul float %sub8, %div
+
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that we do not hoist loads past stores from half diamond.
+; CHECK-LABEL: @noHoistInHalfDiamondPastStore
+; CHECK: load
+; CHECK-NEXT: load
+; CHECK-NEXT: store
+; CHECK-NEXT: br
+; CHECK: load
+; CHECK: load
+; CHECK: load
+; CHECK: br
+define float @noHoistInHalfDiamondPastStore(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+
+ ; Loads should not be hoisted above this store.
+ store float 0.000000e+00, float* @GlobalVar
+
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ ; There are no side effects on the if.then branch.
+ %2 = load float, float* %max, align 4
+ %3 = load float, float* %a, align 4
+ %sub3 = fsub float %2, %3
+ %mul4 = fmul float %sub3, %div
+ %4 = load float, float* %min, align 4
+ %sub5 = fsub float %4, %3
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end:
+ %tmax.0 = phi float [ %mul4, %if.then ], [ %0, %entry ]
+ %tmin.0 = phi float [ %mul6, %if.then ], [ %1, %entry ]
+
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that we do not hoist loads past a store in any branch of a diamond.
+; CHECK-LABEL: @noHoistInDiamondWithOneStore2
+; CHECK: fdiv
+; CHECK: fcmp
+; CHECK: br
+define float @noHoistInDiamondWithOneStore2(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ ; There are no side effects on the if.then branch.
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ br label %if.end
+
+if.else: ; preds = %entry
+ store float 0.000000e+00, float* @GlobalVar
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %tmax.0 = phi float [ %mul2, %if.then ], [ %mul6, %if.else ]
+ %tmin.0 = phi float [ %mul, %if.then ], [ %mul4, %if.else ]
+
+ %6 = load float, float* %max, align 4
+ %7 = load float, float* %a, align 4
+ %sub6 = fsub float %6, %7
+ %mul7 = fmul float %sub6, %div
+ %8 = load float, float* %min, align 4
+ %sub8 = fsub float %8, %7
+ %mul9 = fmul float %sub8, %div
+
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that we do not hoist loads outside a loop containing stores.
+; CHECK-LABEL: @noHoistInLoopsWithStores
+; CHECK: fdiv
+; CHECK: fcmp
+; CHECK: br
+define float @noHoistInLoopsWithStores(float %d, float* %min, float* %max, float* %a) {
+entry:
+ %div = fdiv float 1.000000e+00, %d
+ %cmp = fcmp oge float %div, 0.000000e+00
+ br i1 %cmp, label %do.body, label %if.else
+
+do.body:
+ %0 = load float, float* %min, align 4
+ %1 = load float, float* %a, align 4
+
+ ; It is unsafe to hoist the loads outside the loop because of the store.
+ store float 0.000000e+00, float* @GlobalVar
+
+ %sub = fsub float %0, %1
+ %mul = fmul float %sub, %div
+ %2 = load float, float* %max, align 4
+ %sub1 = fsub float %2, %1
+ %mul2 = fmul float %sub1, %div
+ br label %while.cond
+
+while.cond:
+ %cmp1 = fcmp oge float %mul2, 0.000000e+00
+ br i1 %cmp1, label %if.end, label %do.body
+
+if.else:
+ %3 = load float, float* %max, align 4
+ %4 = load float, float* %a, align 4
+ %sub3 = fsub float %3, %4
+ %mul4 = fmul float %sub3, %div
+ %5 = load float, float* %min, align 4
+ %sub5 = fsub float %5, %4
+ %mul6 = fmul float %sub5, %div
+ br label %if.end
+
+if.end:
+ %tmax.0 = phi float [ %mul2, %while.cond ], [ %mul6, %if.else ]
+ %tmin.0 = phi float [ %mul, %while.cond ], [ %mul4, %if.else ]
+
+ %add = fadd float %tmax.0, %tmin.0
+ ret float %add
+}
+
+; Check that we hoist stores: all the instructions from the then branch
+; should be hoisted.
+; CHECK-LABEL: @hoistStores
+; CHECK: zext
+; CHECK: trunc
+; CHECK: getelementptr
+; CHECK: load
+; CHECK: getelementptr
+; CHECK: store
+; CHECK: load
+; CHECK: load
+; CHECK: zext
+; CHECK: add
+; CHECK: store
+; CHECK: br
+; CHECK: if.then
+; CHECK: br
+
+%struct.foo = type { i16* }
+
+define void @hoistStores(%struct.foo* %s, i32* %coord, i1 zeroext %delta) {
+entry:
+ %frombool = zext i1 %delta to i8
+ %tobool = trunc i8 %frombool to i1
+ br i1 %tobool, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %p = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
+ %0 = load i16*, i16** %p, align 8
+ %incdec.ptr = getelementptr inbounds i16, i16* %0, i32 1
+ store i16* %incdec.ptr, i16** %p, align 8
+ %1 = load i16, i16* %0, align 2
+ %conv = zext i16 %1 to i32
+ %2 = load i32, i32* %coord, align 4
+ %add = add i32 %2, %conv
+ store i32 %add, i32* %coord, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+ %p1 = getelementptr inbounds %struct.foo, %struct.foo* %s, i32 0, i32 0
+ %3 = load i16*, i16** %p1, align 8
+ %incdec.ptr2 = getelementptr inbounds i16, i16* %3, i32 1
+ store i16* %incdec.ptr2, i16** %p1, align 8
+ %4 = load i16, i16* %3, align 2
+ %conv3 = zext i16 %4 to i32
+ %5 = load i32, i32* %coord, align 4
+ %add4 = add i32 %5, %conv3
+ store i32 %add4, i32* %coord, align 4
+ %6 = load i16*, i16** %p1, align 8
+ %incdec.ptr6 = getelementptr inbounds i16, i16* %6, i32 1
+ store i16* %incdec.ptr6, i16** %p1, align 8
+ %7 = load i16, i16* %6, align 2
+ %conv7 = zext i16 %7 to i32
+ %shl = shl i32 %conv7, 8
+ %8 = load i32, i32* %coord, align 4
+ %add8 = add i32 %8, %shl
+ store i32 %add8, i32* %coord, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
diff --git a/test/Transforms/GVN/load-constant-mem.ll b/test/Transforms/GVN/load-constant-mem.ll
index f870485630fa..a61c83d33047 100644
--- a/test/Transforms/GVN/load-constant-mem.ll
+++ b/test/Transforms/GVN/load-constant-mem.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -instcombine -S | grep "ret i32 0"
+; RUN: opt < %s -basicaa -gvn -instcombine -S | FileCheck %s
; PR4189
@G = external constant [4 x i32]
@@ -11,3 +11,9 @@ entry:
%C = sub i32 %A, %B
ret i32 %C
}
+
+; CHECK: define i32 @test(i8* %p, i32 %i) #0 {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i8 4, i8* %p, align 1
+; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: }
diff --git a/test/Transforms/GVN/nonescaping-malloc.ll b/test/Transforms/GVN/nonescaping-malloc.ll
index f83b317fade4..250e27c88f45 100644
--- a/test/Transforms/GVN/nonescaping-malloc.ll
+++ b/test/Transforms/GVN/nonescaping-malloc.ll
@@ -1,7 +1,9 @@
; REQUIRES: asserts
-; RUN: opt < %s -basicaa -gvn -stats -disable-output 2>&1 | grep "Number of loads deleted"
+; RUN: opt < %s -basicaa -gvn -stats -disable-output 2>&1 | FileCheck %s
; rdar://7363102
+; CHECK: Number of loads deleted
+
; GVN should be able to eliminate load %tmp22.i, because it is redundant with
; load %tmp8.i. This requires being able to prove that %tmp7.i doesn't
; alias the malloc'd value %tmp.i20.i.i, which it can do since %tmp7.i
diff --git a/test/Transforms/GVN/pr28562.ll b/test/Transforms/GVN/pr28562.ll
new file mode 100644
index 000000000000..b34be3153096
--- /dev/null
+++ b/test/Transforms/GVN/pr28562.ll
@@ -0,0 +1,9 @@
+; RUN: opt -S -gvn < %s | FileCheck %s
+define i32* @test1(i32* %a) {
+ %x1 = getelementptr inbounds i32, i32* %a, i32 10
+ %x2 = getelementptr i32, i32* %a, i32 10
+ ret i32* %x2
+; CHECK-LABEL: @test1(
+; CHECK: %[[x:.*]] = getelementptr i32, i32* %a, i32 10
+; CHECK: ret i32* %[[x]]
+}
diff --git a/test/Transforms/GVN/pr28626.ll b/test/Transforms/GVN/pr28626.ll
new file mode 100644
index 000000000000..7930e6948253
--- /dev/null
+++ b/test/Transforms/GVN/pr28626.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -gvn-hoist < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1(i1 %a, i1** %d) {
+entry:
+ %0 = load i1*, i1** %d, align 8
+ br i1 %a, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ br label %if.end
+
+if.else: ; preds = %entry
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %c.0 = phi i1 [ 1, %if.then ], [ 0, %if.else ]
+ br i1 %c.0, label %if.then2, label %if.else3
+
+if.then2: ; preds = %if.end
+ %rc = getelementptr inbounds i1, i1* %0, i64 0
+ store i1 %c.0, i1* %rc, align 4
+ br label %if.end6
+
+if.else3: ; preds = %if.end
+ %rc5 = getelementptr inbounds i1, i1* %0, i64 0
+ store i1 %c.0, i1* %rc5, align 4
+ br label %if.end6
+
+if.end6: ; preds = %if.else3, %if.then2
+ ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[load:.*]] = load i1*, i1** %d, align 8
+; CHECK: %[[phi:.*]] = phi i1 [ true, {{.*}} ], [ false, {{.*}} ]
+
+; CHECK: %[[gep0:.*]] = getelementptr inbounds i1, i1* %[[load]], i64 0
+; CHECK: store i1 %[[phi]], i1* %[[gep0]], align 4
+
+; Check that store instructions are hoisted.
+; CHECK-NOT: store \ No newline at end of file
diff --git a/test/Transforms/GVN/rle-must-alias.ll b/test/Transforms/GVN/rle-must-alias.ll
index bf8b139a2e86..e5fafd9d6b31 100644
--- a/test/Transforms/GVN/rle-must-alias.ll
+++ b/test/Transforms/GVN/rle-must-alias.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 "
+; RUN: opt < %s -basicaa -gvn -S | FileCheck %s
; GVN should eliminate the fully redundant %9 GEP which
; allows DEAD to be removed. This is PR3198.
@@ -33,6 +33,7 @@ bb1: ; preds = %entry
bb3: ; preds = %bb1, %bb
%9 = getelementptr [100 x i32], [100 x i32]* @H, i32 0, i32 %i ; <i32*> [#uses=1]
%DEAD = load i32, i32* %9, align 4 ; <i32> [#uses=1]
+; CHECK: %DEAD = phi i32 [ 0, %bb1 ], [ %4, %bb ]
ret i32 %DEAD
bb4: ; preds = %bb1
diff --git a/test/Transforms/GVN/rle-semidominated.ll b/test/Transforms/GVN/rle-semidominated.ll
deleted file mode 100644
index f80d040c93eb..000000000000
--- a/test/Transforms/GVN/rle-semidominated.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -basicaa -gvn -S | grep "DEAD = phi i32 "
-
-define i32 @main(i32* %p, i32 %x, i32 %y) {
-block1:
- %z = load i32, i32* %p
- %cmp = icmp eq i32 %x, %y
- br i1 %cmp, label %block2, label %block3
-
-block2:
- br label %block4
-
-block3:
- %b = bitcast i32 0 to i32
- store i32 %b, i32* %p
- br label %block4
-
-block4:
- %DEAD = load i32, i32* %p
- ret i32 %DEAD
-}
diff --git a/test/Transforms/GVN/volatile-nonvolatile.ll b/test/Transforms/GVN/volatile-nonvolatile.ll
new file mode 100644
index 000000000000..fa5159fbba4b
--- /dev/null
+++ b/test/Transforms/GVN/volatile-nonvolatile.ll
@@ -0,0 +1,61 @@
+; RUN: opt -tbaa -gvn -S < %s | FileCheck %s
+
+%struct.t = type { i32* }
+
+; The loaded address and the location of the address itself are not aliased,
+; so the second reload is not necessary. Check that it can be eliminated.
+; CHECK-LABEL: test1
+; CHECK: load
+; CHECK-NOT: load
+define void @test1(%struct.t* nocapture readonly %p, i32 %v) #0 {
+entry:
+ %m = getelementptr inbounds %struct.t, %struct.t* %p, i32 0, i32 0
+ %0 = load i32*, i32** %m, align 4, !tbaa !1
+ store volatile i32 %v, i32* %0, align 4, !tbaa !6
+ %1 = load i32*, i32** %m, align 4, !tbaa !1
+ store volatile i32 %v, i32* %1, align 4, !tbaa !6
+ ret void
+}
+
+; The store via the loaded address may overwrite the address itself.
+; Make sure that both loads remain.
+; CHECK-LABEL: test2
+; CHECK: load
+; CHECK: store
+; CHECK: load
+define void @test2(%struct.t* nocapture readonly %p, i32 %v) #0 {
+entry:
+ %m = getelementptr inbounds %struct.t, %struct.t* %p, i32 0, i32 0
+ %0 = load i32*, i32** %m, align 4, !tbaa !1
+ store volatile i32 %v, i32* %0, align 4, !tbaa !1
+ %1 = load i32*, i32** %m, align 4, !tbaa !1
+ store volatile i32 %v, i32* %1, align 4, !tbaa !1
+ ret void
+}
+
+; The loads are ordered and non-monotonic. Although they are not aliased to
+; the stores, make sure both are preserved.
+; CHECK-LABEL: test3
+; CHECK: load
+; CHECK: store
+; CHECK: load
+define void @test3(%struct.t* nocapture readonly %p, i32 %v) #0 {
+entry:
+ %m = getelementptr inbounds %struct.t, %struct.t* %p, i32 0, i32 0
+ %0 = load atomic i32*, i32** %m acquire, align 4, !tbaa !1
+ store volatile i32 %v, i32* %0, align 4, !tbaa !6
+ %1 = load atomic i32*, i32** %m acquire, align 4, !tbaa !1
+ store volatile i32 %v, i32* %1, align 4, !tbaa !6
+ ret void
+}
+
+attributes #0 = { norecurse nounwind }
+
+!1 = !{!2, !3, i64 0}
+!2 = !{!"", !3, i64 0}
+!3 = !{!"any pointer", !4, i64 0}
+!4 = !{!"omnipotent char", !5, i64 0}
+!5 = !{!"Simple C/C++ TBAA"}
+!6 = !{!7, !7, i64 0}
+!7 = !{!"int", !4, i64 0}
+
diff --git a/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll b/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
index 49a87d902305..3da0fd5f14ba 100644
--- a/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
+++ b/test/Transforms/GlobalDCE/2002-08-17-FunctionDGE.ll
@@ -1,8 +1,9 @@
; Make sure that functions are removed successfully if they are referred to by
; a global that is dead. Make sure any globals they refer to die as well.
-; RUN: opt < %s -globaldce -S | not grep foo
+; RUN: opt < %s -globaldce -S | FileCheck %s
+; CHECK-NOT: foo
;; Unused, kills %foo
@b = internal global i32 ()* @foo ; <i32 ()**> [#uses=0]
diff --git a/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll b/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
index 58d3f4c7d840..4e45e2bb468a 100644
--- a/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
+++ b/test/Transforms/GlobalDCE/2002-08-17-WorkListTest.ll
@@ -1,7 +1,9 @@
; This testcase tests that a worklist is being used, and that globals can be
; removed if they are the subject of a constexpr and ConstantPointerRef
-; RUN: opt < %s -globaldce -S | not grep global
+; RUN: opt < %s -globaldce -S | FileCheck %s
+
+; CHECK-NOT: global
@t0 = internal global [4 x i8] c"foo\00" ; <[4 x i8]*> [#uses=1]
@t1 = internal global [4 x i8] c"bar\00" ; <[4 x i8]*> [#uses=1]
diff --git a/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll b/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
index 781c57e4ad47..0b1b2792453e 100644
--- a/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
+++ b/test/Transforms/GlobalDCE/2003-07-01-SelfReference.ll
@@ -1,5 +1,7 @@
; distilled from 255.vortex
-; RUN: opt < %s -globaldce -S | not grep testfunc
+; RUN: opt < %s -globaldce -S | FileCheck %s
+
+; CHECK-NOT: testfunc
declare i1 ()* @getfunc()
diff --git a/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll b/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
index 5b2c97f0f85e..ded316597211 100644
--- a/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
+++ b/test/Transforms/GlobalDCE/2003-10-09-PreserveWeakGlobals.ll
@@ -1,6 +1,6 @@
; Weak variables should be preserved by global DCE!
-; RUN: opt < %s -globaldce -S | grep @A
-
+; RUN: opt < %s -globaldce -S | FileCheck %s
+; CHECK: @A
@A = weak global i32 54
diff --git a/test/Transforms/GlobalDCE/basicvariabletest.ll b/test/Transforms/GlobalDCE/basicvariabletest.ll
index a97b66de2c63..ae8ce3f7fd06 100644
--- a/test/Transforms/GlobalDCE/basicvariabletest.ll
+++ b/test/Transforms/GlobalDCE/basicvariabletest.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -globaldce -S | not grep global
+; RUN: opt < %s -passes=globaldce -S | FileCheck %s
+; CHECK-NOT: global
@X = external global i32
@Y = internal global i32 7
diff --git a/test/Transforms/GlobalDCE/externally_available.ll b/test/Transforms/GlobalDCE/externally_available.ll
index cc88cb10dcc6..fca49b29ec8e 100644
--- a/test/Transforms/GlobalDCE/externally_available.ll
+++ b/test/Transforms/GlobalDCE/externally_available.ll
@@ -1,10 +1,12 @@
-; RUN: opt < %s -globaldce -S | not grep test_
+; RUN: opt < %s -globaldce -S | FileCheck %s
; test_function should not be emitted to the .s file.
+; CHECK-NOT: test_function
define available_externally i32 @test_function() {
ret i32 4
}
; test_global should not be emitted to the .s file.
+; CHECK-NOT: test_global
@test_global = available_externally global i32 4
diff --git a/test/Transforms/GlobalDCE/global-ifunc.ll b/test/Transforms/GlobalDCE/global-ifunc.ll
new file mode 100644
index 000000000000..8022452c3485
--- /dev/null
+++ b/test/Transforms/GlobalDCE/global-ifunc.ll
@@ -0,0 +1,13 @@
+; RUN: opt -S -globaldce < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@if = ifunc void (), void ()* @fn
+
+define internal void @fn() {
+entry:
+ ret void
+}
+
+; CHECK-DAG: @if = ifunc void (), void ()* @fn
+; CHECK-DAG: define internal void @fn(
diff --git a/test/Transforms/GlobalMerge/basic.ll b/test/Transforms/GlobalMerge/basic.ll
new file mode 100644
index 000000000000..598d917f74a6
--- /dev/null
+++ b/test/Transforms/GlobalMerge/basic.ll
@@ -0,0 +1,20 @@
+; RUN: opt -global-merge -global-merge-max-offset=100 -S -o - %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @_MergedGlobals = private global { i32, i32 } { i32 1, i32 2 }
+
+; CHECK: @a = internal alias i32, getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 0)
+@a = internal global i32 1
+
+; CHECK: @b = internal alias i32, getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 1)
+@b = internal global i32 2
+
+define void @use() {
+ ; CHECK: load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 0)
+ %x = load i32, i32* @a
+ ; CHECK: load i32, i32* getelementptr inbounds ({ i32, i32 }, { i32, i32 }* @_MergedGlobals, i32 0, i32 1)
+ %y = load i32, i32* @b
+ ret void
+}
diff --git a/test/Transforms/GlobalOpt/2007-05-13-Crash.ll b/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
index 90ba15c37074..bed4fec981ef 100644
--- a/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
+++ b/test/Transforms/GlobalOpt/2007-05-13-Crash.ll
@@ -8,7 +8,7 @@ target triple = "i686-apple-darwin8"
%struct.__builtin_CFString = type { i32*, i32, i8*, i32 }
@_ZZ19SFLGetVisibilityKeyvE19_kSFLLVisibilityKey = internal global %struct.__CFString* null ; <%struct.__CFString**> [#uses=2]
@_ZZ22SFLGetAlwaysVisibleKeyvE22_kSFLLAlwaysVisibleKey = internal global %struct.__CFString* null ; <%struct.__CFString**> [#uses=7]
-internal constant %struct.__builtin_CFString {
+@0 = internal constant %struct.__builtin_CFString {
i32* getelementptr ([0 x i32], [0 x i32]* @__CFConstantStringClassReference, i32 0, i32 0),
i32 1992,
i8* getelementptr ([14 x i8], [14 x i8]* @.str, i32 0, i32 0),
diff --git a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
index 8df7050b38a7..4adc9607c75b 100644
--- a/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
+++ b/test/Transforms/GlobalOpt/2008-01-13-OutOfRangeSROA.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -globalopt -S | grep "16 x .31 x double.. zeroinitializer"
+; RUN: opt < %s -globalopt -S | FileCheck %s
-; The 'X' indices could be larger than 31. Do not SROA the outer indices of this array.
+; The 'X' indices could be larger than 31. Do not SROA the outer
+; indices of this array.
+; CHECK: @mm = {{.*}} [16 x [31 x double]] zeroinitializer
@mm = internal global [16 x [31 x double]] zeroinitializer, align 32
define void @test(i32 %X) {
diff --git a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
index ac663604854d..7818e5d642c1 100644
--- a/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
+++ b/test/Transforms/GlobalOpt/2008-01-29-VolatileGlobal.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -globalopt -S | grep "load volatile"
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK: load volatile
@t0.1441 = internal global double 0x3FD5555555555555, align 8 ; <double*> [#uses=1]
define double @foo() nounwind {
diff --git a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
index 934d928aa88e..c3a6d7b57165 100644
--- a/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
+++ b/test/Transforms/GlobalOpt/2008-04-26-SROA-Global-Align.ll
@@ -2,9 +2,11 @@
; alignments. Elements 0 and 2 must be 16-byte aligned, and element
; 1 must be at least 8 byte aligned (but could be more).
-; RUN: opt < %s -globalopt -S | grep "@G.0 = internal unnamed_addr global .*align 16"
-; RUN: opt < %s -globalopt -S | grep "@G.1 = internal unnamed_addr global .*align 8"
-; RUN: opt < %s -globalopt -S | grep "@G.2 = internal unnamed_addr global .*align 16"
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK: @G.0 = internal unnamed_addr global {{.*}}align 16
+; CHECK: @G.1 = internal unnamed_addr global {{.*}}align 8
+; CHECK: @G.2 = internal unnamed_addr global {{.*}}align 16
+
; rdar://5891920
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
diff --git a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
index d602df1e701c..b9d2d993240e 100644
--- a/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
+++ b/test/Transforms/GlobalOpt/2008-07-17-addrspace.ll
@@ -2,9 +2,11 @@
; values. This used to crash, because globalopt forgot to put the new var in the
; same address space as the old one.
-; RUN: opt < %s -globalopt -S > %t
+; RUN: opt < %s -globalopt -S | FileCheck %s
+
; Check that the new global values still have their address space
-; RUN: cat %t | grep 'addrspace.*global'
+; CHECK: addrspace(1) global
+; CHECK: addrspace(1) global
@struct = internal addrspace(1) global { i32, i32 } zeroinitializer
@array = internal addrspace(1) global [ 2 x i32 ] zeroinitializer
@@ -24,5 +26,3 @@ define void @bar(i32 %R) {
store i32 %R, i32 addrspace(1) * getelementptr ({ i32, i32 }, { i32, i32 } addrspace(1) * @struct, i32 0, i32 0)
ret void
}
-
-
diff --git a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
index 21ec5267b973..7ad24b92a2da 100644
--- a/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
+++ b/test/Transforms/GlobalOpt/2009-01-13-phi-user.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -globalopt -S | grep "phi.*@head"
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK: phi{{.*}}@head
; PR3321
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
index f74f2081dc20..3f6e269a960b 100644
--- a/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
+++ b/test/Transforms/GlobalOpt/2009-03-05-dbg.ll
@@ -1,5 +1,6 @@
; REQUIRES: asserts
-; RUN: opt < %s -globalopt -stats -disable-output 2>&1 | grep "1 globalopt - Number of global vars shrunk to booleans"
+; RUN: opt < %s -globalopt -stats -disable-output 2>&1 | FileCheck %s
+; CHECK: 1 globalopt - Number of global vars shrunk to booleans
@Stop = internal global i32 0 ; <i32*> [#uses=3]
@@ -53,13 +54,12 @@ return: ; preds = %bb2
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-!llvm.dbg.gv = !{!0}
-
+!llvm.dbg.cu = !{!1}
!0 = !DIGlobalVariable(name: "Stop", line: 2, isLocal: true, isDefinition: true, scope: !1, file: !1, type: !2, variable: i32* @Stop)
-!1 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21)
+!1 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: FullDebug, file: !20, enums: !21, retainedTypes: !21, globals: !{!0})
!2 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!3 = !DILocalVariable(name: "i", line: 4, arg: 1, scope: !4, file: !1, type: !2)
-!4 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !5)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !1, scope: !1, type: !5)
!5 = !DISubroutineType(types: !6)
!6 = !{!2, !2}
!7 = !DILocation(line: 5, scope: !8)
@@ -70,7 +70,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!12 = !DILocation(line: 11, scope: !8)
!13 = !DILocation(line: 14, scope: !14)
!14 = distinct !DILexicalBlock(line: 0, column: 0, file: !20, scope: !15)
-!15 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scope: !1, type: !16)
+!15 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 13, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !1, scope: !1, type: !16)
!16 = !DISubroutineType(types: !17)
!17 = !{!2}
!18 = !DILocation(line: 15, scope: !14)
diff --git a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
index 930a96e2182f..d5836ea9254a 100644
--- a/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
+++ b/test/Transforms/GlobalOpt/2009-03-06-Anonymous.ll
@@ -1,9 +1,9 @@
; RUN: opt < %s -globalopt -S | FileCheck %s
-global i32 0
+@0 = global i32 0
; CHECK-DAG: @0 = internal global i32 0
-private global i32 0
+@1 = private global i32 0
; CHECK-DAG: @1 = private global i32 0
define i32* @2() {
diff --git a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
index 469fa765c7af..bec4891a496a 100644
--- a/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
+++ b/test/Transforms/GlobalOpt/2009-03-07-PromotePtrToBool.ll
@@ -1,6 +1,8 @@
-; RUN: opt < %s -globalopt -S | grep "@X = internal unnamed_addr global i32"
+; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin7"
+
+; CHECK: @X = internal unnamed_addr global i32
@X = internal global i32* null ; <i32**> [#uses=2]
@Y = internal global i32 0 ; <i32*> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll b/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
index ad5b440a5abe..c88dc1c2d124 100644
--- a/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
+++ b/test/Transforms/GlobalOpt/2010-10-19-WeakOdr.ll
@@ -2,7 +2,7 @@
; PR8389: Globals with weak_odr linkage type must not be modified
-; CHECK: weak_odr global i32 0
+; CHECK: weak_odr local_unnamed_addr global i32 0
@SomeVar = weak_odr global i32 0
diff --git a/test/Transforms/GlobalOpt/GSROA-section.ll b/test/Transforms/GlobalOpt/GSROA-section.ll
new file mode 100644
index 000000000000..a439fa0797de
--- /dev/null
+++ b/test/Transforms/GlobalOpt/GSROA-section.ll
@@ -0,0 +1,30 @@
+; This test lets globalopt split the global struct and array into different
+; values. The pass needs to preserve section attribute.
+
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; Check that the new global values still have their section assignment.
+; CHECK: @struct
+; CHECK: section ".foo"
+; CHECK: @array
+; CHECK-NOT: section ".foo"
+
+@struct = internal global { i32, i32 } zeroinitializer, section ".foo"
+@array = internal global [ 2 x i32 ] zeroinitializer
+
+define i32 @foo() {
+ %A = load i32, i32* getelementptr ({ i32, i32 }, { i32, i32 }* @struct, i32 0, i32 0)
+ %B = load i32, i32* getelementptr ([ 2 x i32 ], [ 2 x i32 ]* @array, i32 0, i32 0)
+ ; Use the loaded values, so they won't get removed completely
+ %R = add i32 %A, %B
+ ret i32 %R
+}
+
+; We put stores in a different function, so that the global variables won't get
+; optimized away completely.
+define void @bar(i32 %R) {
+ store i32 %R, i32* getelementptr ([ 2 x i32 ], [ 2 x i32 ]* @array, i32 0, i32 0)
+ store i32 %R, i32* getelementptr ({ i32, i32 }, { i32, i32 }* @struct, i32 0, i32 0)
+ ret void
+}
+
+
diff --git a/test/Transforms/GlobalOpt/MallocSROA-section.ll b/test/Transforms/GlobalOpt/MallocSROA-section.ll
new file mode 100644
index 000000000000..75b3cfec1373
--- /dev/null
+++ b/test/Transforms/GlobalOpt/MallocSROA-section.ll
@@ -0,0 +1,28 @@
+; RUN: opt -globalopt -S < %s | FileCheck %s
+; CHECK: @Y.f0
+; CHECK: section ".foo"
+; CHECK: @Y.f1
+; CHECK: section ".foo"
+
+%struct.xyz = type { double, i32 }
+
+@Y = internal global %struct.xyz* null ,section ".foo" ; <%struct.xyz**> [#uses=2]
+@numf2s = external global i32 ; <i32*> [#uses=1]
+
+define void @init_net() {
+entry:
+ %0 = load i32, i32* @numf2s, align 4 ; <i32> [#uses=1]
+ %mallocsize2 = shl i32 %0, 4 ; <i32> [#uses=1]
+ %malloccall3 = tail call i8* @malloc(i32 %mallocsize2) ; <i8*> [#uses=1]
+ %1 = bitcast i8* %malloccall3 to %struct.xyz* ; <%struct.xyz*> [#uses=1]
+ store %struct.xyz* %1, %struct.xyz** @Y, align 8
+ ret void
+}
+
+define void @load_train() {
+entry:
+ %0 = load %struct.xyz*, %struct.xyz** @Y, align 8 ; <%struct.xyz*> [#uses=0]
+ ret void
+}
+
+declare noalias i8* @malloc(i32)
diff --git a/test/Transforms/GlobalOpt/SROA-section.ll b/test/Transforms/GlobalOpt/SROA-section.ll
new file mode 100644
index 000000000000..1589608a67aa
--- /dev/null
+++ b/test/Transforms/GlobalOpt/SROA-section.ll
@@ -0,0 +1,27 @@
+; Verify that section assignment is copied during SROA
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK: @G.0
+; CHECK: section ".foo"
+; CHECK: @G.1
+; CHECK: section ".foo"
+; CHECK: @G.2
+; CHECK: section ".foo"
+
+%T = type { double, double, double }
+@G = internal global %T zeroinitializer, align 16, section ".foo"
+
+define void @test() {
+ store double 1.0, double* getelementptr (%T, %T* @G, i32 0, i32 0), align 16
+ store double 2.0, double* getelementptr (%T, %T* @G, i32 0, i32 1), align 8
+ store double 3.0, double* getelementptr (%T, %T* @G, i32 0, i32 2), align 16
+ ret void
+}
+
+define double @test2() {
+ %V1 = load double, double* getelementptr (%T, %T* @G, i32 0, i32 0), align 16
+ %V2 = load double, double* getelementptr (%T, %T* @G, i32 0, i32 1), align 8
+ %V3 = load double, double* getelementptr (%T, %T* @G, i32 0, i32 2), align 16
+ %R = fadd double %V1, %V2
+ %R2 = fadd double %R, %V3
+ ret double %R2
+}
diff --git a/test/Transforms/GlobalOpt/alias-used-address-space.ll b/test/Transforms/GlobalOpt/alias-used-address-space.ll
index 367f375ec900..08081b89ac68 100644
--- a/test/Transforms/GlobalOpt/alias-used-address-space.ll
+++ b/test/Transforms/GlobalOpt/alias-used-address-space.ll
@@ -16,7 +16,7 @@ target datalayout = "p:32:32:32-p1:16:16:16"
; CHECK-DAG: @llvm.compiler.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @ia to i8*)], section "llvm.metadata"
@sameAsUsed = global [1 x i8*] [i8* addrspacecast(i8 addrspace(1)* @ca to i8*)]
-; CHECK-DAG: @sameAsUsed = global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
+; CHECK-DAG: @sameAsUsed = local_unnamed_addr global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* @c to i8*)]
@ca = internal alias i8, i8 addrspace(1)* @c
; CHECK: @ca = internal alias i8, i8 addrspace(1)* @c
diff --git a/test/Transforms/GlobalOpt/alias-used.ll b/test/Transforms/GlobalOpt/alias-used.ll
index 9ced3974ee87..91601fb99270 100644
--- a/test/Transforms/GlobalOpt/alias-used.ll
+++ b/test/Transforms/GlobalOpt/alias-used.ll
@@ -7,16 +7,16 @@
@ia = internal alias i8, i8* @i
@llvm.used = appending global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca], section "llvm.metadata"
-; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* @ca, i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*)], section "llvm.metadata"
+; CHECK-DAG: @llvm.used = appending global [3 x i8*] [i8* @ca, i8* bitcast (void ()* @f to i8*), i8* bitcast (void ()* @fa to i8*)], section "llvm.metadata"
@llvm.compiler.used = appending global [4 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* bitcast (void ()* @fa to i8*), i8* @ia, i8* @i], section "llvm.metadata"
; CHECK-DAG: @llvm.compiler.used = appending global [2 x i8*] [i8* bitcast (void ()* @fa3 to i8*), i8* @ia], section "llvm.metadata"
@sameAsUsed = global [3 x i8*] [i8* bitcast (void ()* @fa to i8*), i8* bitcast (void ()* @f to i8*), i8* @ca]
-; CHECK-DAG: @sameAsUsed = global [3 x i8*] [i8* bitcast (void ()* @f to i8*), i8* bitcast (void ()* @f to i8*), i8* @c]
+; CHECK-DAG: @sameAsUsed = local_unnamed_addr global [3 x i8*] [i8* bitcast (void ()* @f to i8*), i8* bitcast (void ()* @f to i8*), i8* @c]
@other = global i32* bitcast (void ()* @fa to i32*)
-; CHECK-DAG: @other = global i32* bitcast (void ()* @f to i32*)
+; CHECK-DAG: @other = local_unnamed_addr global i32* bitcast (void ()* @f to i32*)
@fa = internal alias void (), void ()* @f
; CHECK: @fa = internal alias void (), void ()* @f
@@ -45,3 +45,22 @@ define i8* @g2() {
define i8* @h() {
ret i8* @ca
}
+
+; Check that GlobalOpt doesn't try to resolve aliases with GEP operands.
+
+%struct.S = type { i32, i32, i32 }
+@s = global %struct.S { i32 1, i32 2, i32 3 }, align 4
+
+@alias1 = alias i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i64 0, i32 1)
+@alias2 = alias i32, i32* getelementptr inbounds (%struct.S, %struct.S* @s, i64 0, i32 2)
+
+; CHECK: load i32, i32* @alias1, align 4
+; CHECK: load i32, i32* @alias2, align 4
+
+define i32 @foo1() {
+entry:
+ %0 = load i32, i32* @alias1, align 4
+ %1 = load i32, i32* @alias2, align 4
+ %add = add nsw i32 %1, %0
+ ret i32 %add
+}
diff --git a/test/Transforms/GlobalOpt/assume.ll b/test/Transforms/GlobalOpt/assume.ll
index 3f3157a38fbb..b15106bc83ac 100644
--- a/test/Transforms/GlobalOpt/assume.ll
+++ b/test/Transforms/GlobalOpt/assume.ll
@@ -1,6 +1,6 @@
; RUN: opt -S -globalopt < %s | FileCheck %s
-; CHECK: @tmp = global i32 42
+; CHECK: @tmp = local_unnamed_addr global i32 42
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
@tmp = global i32 0
diff --git a/test/Transforms/GlobalOpt/basictest.ll b/test/Transforms/GlobalOpt/basictest.ll
index 4c25e4f9f407..d5294820abe9 100644
--- a/test/Transforms/GlobalOpt/basictest.ll
+++ b/test/Transforms/GlobalOpt/basictest.ll
@@ -1,9 +1,10 @@
-; RUN: opt < %s -globalopt -S | not grep global
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; RUN: opt < %s -passes=globalopt -S | FileCheck %s
+; CHECK-NOT: global
@X = internal global i32 4 ; <i32*> [#uses=1]
define i32 @foo() {
%V = load i32, i32* @X ; <i32> [#uses=1]
ret i32 %V
}
-
diff --git a/test/Transforms/GlobalOpt/constantexpr-dangle.ll b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
index fbbfe69c15ab..3917bfff4119 100644
--- a/test/Transforms/GlobalOpt/constantexpr-dangle.ll
+++ b/test/Transforms/GlobalOpt/constantexpr-dangle.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -instcombine -globalopt -S | \
-; RUN: grep "internal fastcc float @foo"
+; RUN: opt < %s -instcombine -globalopt -S | FileCheck %s
+; CHECK: internal fastcc float @foo
define internal float @foo() {
ret float 0.000000e+00
@@ -10,4 +10,3 @@ define float @bar() {
%tmp2 = fmul float %tmp1, 1.000000e+01 ; <float> [#uses=1]
ret float %tmp2
}
-
diff --git a/test/Transforms/GlobalOpt/constantfold-initializers.ll b/test/Transforms/GlobalOpt/constantfold-initializers.ll
index d0c18812cd94..3c20353d157b 100644
--- a/test/Transforms/GlobalOpt/constantfold-initializers.ll
+++ b/test/Transforms/GlobalOpt/constantfold-initializers.ll
@@ -4,13 +4,13 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@.str91250 = global [3 x i8] zeroinitializer
-; CHECK: @A = global i1 false
+; CHECK: @A = local_unnamed_addr global i1 false
@A = global i1 icmp ne (i64 sub nsw (i64 ptrtoint (i8* getelementptr inbounds ([3 x i8], [3 x i8]* @.str91250, i64 0, i64 1) to i64), i64 ptrtoint ([3 x i8]* @.str91250 to i64)), i64 1)
; PR11352
@xs = global [2 x i32] zeroinitializer, align 4
-; CHECK: @xs = global [2 x i32] [i32 1, i32 1]
+; CHECK: @xs = local_unnamed_addr global [2 x i32] [i32 1, i32 1]
; PR12642
%PR12642.struct = type { i8 }
@@ -32,7 +32,7 @@ entry:
@f = internal global %closure zeroinitializer, align 4
@m = global i32 0, align 4
; CHECK-NOT: @f
-; CHECK: @m = global i32 13
+; CHECK: @m = local_unnamed_addr global i32 13
define internal i32 @test2_helper(%closure* %this, i32 %b) {
entry:
@@ -53,7 +53,7 @@ entry:
; PR19955
@dllimportptr = global i32* null, align 4
-; CHECK: @dllimportptr = global i32* null, align 4
+; CHECK: @dllimportptr = local_unnamed_addr global i32* null, align 4
@dllimportvar = external dllimport global i32
define internal void @test3() {
entry:
@@ -62,7 +62,7 @@ entry:
}
@dllexportptr = global i32* null, align 4
-; CHECK: @dllexportptr = global i32* @dllexportvar, align 4
+; CHECK: @dllexportptr = local_unnamed_addr global i32* @dllexportvar, align 4
@dllexportvar = dllexport global i32 0, align 4
; CHECK: @dllexportvar = dllexport global i32 20, align 4
define internal void @test4() {
@@ -83,7 +83,7 @@ entry:
@test6_v1 = internal global { i32, i32 } { i32 42, i32 0 }, align 8
@test6_v2 = global i32 0, align 4
-; CHECK: @test6_v2 = global i32 42, align 4
+; CHECK: @test6_v2 = local_unnamed_addr global i32 42, align 4
define internal void @test6() {
%load = load { i32, i32 }, { i32, i32 }* @test6_v1, align 8
%xv0 = extractvalue { i32, i32 } %load, 0
diff --git a/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll b/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
index 63dc783ae196..b969345710d5 100644
--- a/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
+++ b/test/Transforms/GlobalOpt/ctor-list-opt-inbounds.ll
@@ -3,8 +3,8 @@
; Don't get fooled by the inbounds keyword; it doesn't change
; the computed address.
-; CHECK: @H = global i32 2
-; CHECK: @I = global i32 2
+; CHECK: @H = local_unnamed_addr global i32 2
+; CHECK: @I = local_unnamed_addr global i32 2
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @CTOR } ]
@addr = external global i32
diff --git a/test/Transforms/GlobalOpt/deadglobal-2.ll b/test/Transforms/GlobalOpt/deadglobal-2.ll
index 6b8717ee1831..92c0f994f367 100644
--- a/test/Transforms/GlobalOpt/deadglobal-2.ll
+++ b/test/Transforms/GlobalOpt/deadglobal-2.ll
@@ -1,7 +1,7 @@
-; RUN: opt < %s -globalopt -S | not grep internal
-
+; RUN: opt < %s -globalopt -S | FileCheck %s
; This is a harder case to delete as the GEP has a variable index.
+; CHECK-NOT: internal
@G = internal global [4 x i32] zeroinitializer
define void @foo(i32 %X) {
diff --git a/test/Transforms/GlobalOpt/globalsra-partial.ll b/test/Transforms/GlobalOpt/globalsra-partial.ll
index 9abcce6f4931..6f24128c42b5 100644
--- a/test/Transforms/GlobalOpt/globalsra-partial.ll
+++ b/test/Transforms/GlobalOpt/globalsra-partial.ll
@@ -1,10 +1,11 @@
; In this case, the global can only be broken up by one level.
-; RUN: opt < %s -globalopt -S | not grep 12345
+; RUN: opt < %s -globalopt -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@G = internal global { i32, [4 x float] } zeroinitializer ; <{ i32, [4 x float] }*> [#uses=3]
+; CHECK-NOT: 12345
define void @onlystore() {
store i32 12345, i32* getelementptr ({ i32, [4 x float] }, { i32, [4 x float] }* @G, i32 0, i32 0)
ret void
@@ -21,4 +22,3 @@ define float @readval(i32 %i) {
%V = load float, float* %Ptr ; <float> [#uses=1]
ret float %V
}
-
diff --git a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
index 5194b2ca7413..4607373ab2c7 100644
--- a/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
+++ b/test/Transforms/GlobalOpt/globalsra-unknown-index.ll
@@ -1,12 +1,10 @@
-; RUN: opt < %s -globalopt -S > %t
-; RUN: grep "@Y = internal unnamed_addr global \[3 x [%]struct.X\] zeroinitializer" %t
-; RUN: grep load %t | count 6
-; RUN: grep "add i32 [%]a, [%]b" %t | count 3
+; RUN: opt < %s -globalopt -S | FileCheck %s
; globalopt should not sra the global, because it can't see the index.
%struct.X = type { [3 x i32], [3 x i32] }
+; CHECK: @Y = internal unnamed_addr global [3 x %struct.X] zeroinitializer
@Y = internal global [3 x %struct.X] zeroinitializer
@addr = external global i8
@@ -15,6 +13,11 @@ define void @frob() {
store i32 1, i32* getelementptr inbounds ([3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 ptrtoint (i8* @addr to i64)), align 4
ret void
}
+
+; CHECK-LABEL: @borf
+; CHECK: %a = load
+; CHECK: %b = load
+; CHECK: add i32 %a, %b
define i32 @borf(i64 %i, i64 %j) {
%p = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 0, i32 0, i64 0
%a = load i32, i32* %p
@@ -23,6 +26,11 @@ define i32 @borf(i64 %i, i64 %j) {
%c = add i32 %a, %b
ret i32 %c
}
+
+; CHECK-LABEL: @borg
+; CHECK: %a = load
+; CHECK: %b = load
+; CHECK: add i32 %a, %b
define i32 @borg(i64 %i, i64 %j) {
%p = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 1, i32 0, i64 1
%a = load i32, i32* %p
@@ -31,6 +39,11 @@ define i32 @borg(i64 %i, i64 %j) {
%c = add i32 %a, %b
ret i32 %c
}
+
+; CHECK-LABEL: @borh
+; CHECK: %a = load
+; CHECK: %b = load
+; CHECK: add i32 %a, %b
define i32 @borh(i64 %i, i64 %j) {
%p = getelementptr inbounds [3 x %struct.X], [3 x %struct.X]* @Y, i64 0, i64 2, i32 0, i64 2
%a = load i32, i32* %p
diff --git a/test/Transforms/GlobalOpt/globalsra.ll b/test/Transforms/GlobalOpt/globalsra.ll
index d593fcd9e15e..8098ec84b2e7 100644
--- a/test/Transforms/GlobalOpt/globalsra.ll
+++ b/test/Transforms/GlobalOpt/globalsra.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -globalopt -S | not grep global
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK-NOT: global
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@G = internal global { i32, float, { double } } {
@@ -22,3 +23,23 @@ define double @constantize() {
ret double %X
}
+@G2 = internal constant { i32, float, { double } } {
+ i32 1,
+ float 1.000000e+00,
+ { double } { double 1.727000e+01 } } ; <{ i32, float, { double } }*> [#uses=3]
+
+define void @onlystore2() {
+ store i32 123, i32* getelementptr ({ i32, float, { double } }, { i32, float, { double } }* @G2, i32 0, i32 0)
+ ret void
+}
+
+define float @storeinit2() {
+ store float 1.000000e+00, float* getelementptr ({ i32, float, { double } }, { i32, float, { double } }* @G2, i32 0, i32 1)
+ %X = load float, float* getelementptr ({ i32, float, { double } }, { i32, float, { double } }* @G2, i32 0, i32 1) ; <float> [#uses=1]
+ ret float %X
+}
+
+define double @constantize2() {
+ %X = load double, double* getelementptr ({ i32, float, { double } }, { i32, float, { double } }* @G2, i32 0, i32 2, i32 0) ; <double> [#uses=1]
+ ret double %X
+}
diff --git a/test/Transforms/GlobalOpt/heap-sra-phi.ll b/test/Transforms/GlobalOpt/heap-sra-phi.ll
index bf9ca7b5311e..995f9dc142c7 100644
--- a/test/Transforms/GlobalOpt/heap-sra-phi.ll
+++ b/test/Transforms/GlobalOpt/heap-sra-phi.ll
@@ -1,5 +1,6 @@
-; RUN: opt < %s -globalopt -S | grep "tmp.f1 = phi i32. "
-; RUN: opt < %s -globalopt -S | grep "tmp.f0 = phi i32. "
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK: tmp.f1 = phi i32*
+; CHECK: tmp.f0 = phi i32*
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
%struct.foo = type { i32, i32 }
diff --git a/test/Transforms/GlobalOpt/invariant.group.barrier.ll b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
index 54d91d408019..80cd411afdc7 100644
--- a/test/Transforms/GlobalOpt/invariant.group.barrier.ll
+++ b/test/Transforms/GlobalOpt/invariant.group.barrier.ll
@@ -2,8 +2,8 @@
; This test is hint, what could globalOpt optimize and what it can't
; FIXME: @tmp and @tmp2 can be safely set to 42
-; CHECK: @tmp = global i32 0
-; CHECK: @tmp2 = global i32 0
+; CHECK: @tmp = local_unnamed_addr global i32 0
+; CHECK: @tmp2 = local_unnamed_addr global i32 0
; CHECK: @tmp3 = global i32 0
@tmp = global i32 0
diff --git a/test/Transforms/GlobalOpt/invoke.ll b/test/Transforms/GlobalOpt/invoke.ll
index a0f7890a985b..a30199397351 100644
--- a/test/Transforms/GlobalOpt/invoke.ll
+++ b/test/Transforms/GlobalOpt/invoke.ll
@@ -2,7 +2,7 @@
; rdar://11022897
; Globalopt should be able to evaluate an invoke.
-; CHECK: @tmp = global i32 1
+; CHECK: @tmp = local_unnamed_addr global i32 1
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_a }]
@tmp = global i32 0
diff --git a/test/Transforms/GlobalOpt/iterate.ll b/test/Transforms/GlobalOpt/iterate.ll
index f7f34fc39d9a..8c6543b685d3 100644
--- a/test/Transforms/GlobalOpt/iterate.ll
+++ b/test/Transforms/GlobalOpt/iterate.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -globalopt -S | not grep %G
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK-NOT: %G
@G = internal global i32 0 ; <i32*> [#uses=1]
@H = internal global { i32* } { i32* @G } ; <{ i32* }*> [#uses=1]
@@ -8,4 +9,3 @@ define i32 @loadg() {
%GV = load i32, i32* %G ; <i32> [#uses=1]
ret i32 %GV
}
-
diff --git a/test/Transforms/GlobalOpt/memcpy.ll b/test/Transforms/GlobalOpt/memcpy.ll
index 437142e8c6e1..e47497114b23 100644
--- a/test/Transforms/GlobalOpt/memcpy.ll
+++ b/test/Transforms/GlobalOpt/memcpy.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -globalopt -S | \
-; RUN: grep "G1 = internal unnamed_addr constant"
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK: G1 = internal unnamed_addr constant
@G1 = internal global [58 x i8] c"asdlfkajsdlfkajsd;lfkajds;lfkjasd;flkajsd;lkfja;sdlkfjasd\00" ; <[58 x i8]*> [#uses=1]
diff --git a/test/Transforms/GlobalOpt/phi-select.ll b/test/Transforms/GlobalOpt/phi-select.ll
index e5cb9819ca0a..86b017c9cb34 100644
--- a/test/Transforms/GlobalOpt/phi-select.ll
+++ b/test/Transforms/GlobalOpt/phi-select.ll
@@ -1,7 +1,8 @@
; Test that PHI nodes and select instructions do not necessarily make stuff
; non-constant.
-; RUN: opt < %s -globalopt -S | not grep global
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK-NOT: global
@X = internal global i32 4 ; <i32*> [#uses=2]
@Y = internal global i32 5 ; <i32*> [#uses=2]
@@ -24,8 +25,3 @@ Cont: ; preds = %T, %0
%V = load i32, i32* %P ; <i32> [#uses=1]
ret i32 %V
}
-
-
-
-
-
diff --git a/test/Transforms/GlobalOpt/pr21191.ll b/test/Transforms/GlobalOpt/pr21191.ll
index 34e15cb3404a..9e201b888be3 100644
--- a/test/Transforms/GlobalOpt/pr21191.ll
+++ b/test/Transforms/GlobalOpt/pr21191.ll
@@ -6,12 +6,12 @@ $c = comdat any
define linkonce_odr void @foo() comdat($c) {
ret void
}
-; CHECK: define linkonce_odr void @foo() comdat($c)
+; CHECK: define linkonce_odr void @foo() local_unnamed_addr comdat($c)
define linkonce_odr void @bar() comdat($c) {
ret void
}
-; CHECK: define linkonce_odr void @bar() comdat($c)
+; CHECK: define linkonce_odr void @bar() local_unnamed_addr comdat($c)
define void @zed() {
call void @foo()
diff --git a/test/Transforms/GlobalOpt/storepointer-compare.ll b/test/Transforms/GlobalOpt/storepointer-compare.ll
index bcf7cafd831c..969443569c20 100644
--- a/test/Transforms/GlobalOpt/storepointer-compare.ll
+++ b/test/Transforms/GlobalOpt/storepointer-compare.ll
@@ -1,5 +1,5 @@
-; RUN: opt < %s -globalopt -S | \
-; RUN: grep "call void @Actual"
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK: call void @Actual
; Check that a comparison does not prevent an indirect call from being made
; direct. The global will still remain, but indirect call elim is still good.
@@ -27,4 +27,3 @@ DoCall: ; preds = %0
isNull: ; preds = %0
ret void
}
-
diff --git a/test/Transforms/GlobalOpt/storepointer.ll b/test/Transforms/GlobalOpt/storepointer.ll
index 9003004d431d..8edaa64a62be 100644
--- a/test/Transforms/GlobalOpt/storepointer.ll
+++ b/test/Transforms/GlobalOpt/storepointer.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -globalopt -S | not grep global
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK-NOT: global
@G = internal global void ()* null ; <void ()**> [#uses=2]
@@ -16,4 +17,3 @@ define void @doit() {
call void %FP( )
ret void
}
-
diff --git a/test/Transforms/GlobalOpt/trivialstore.ll b/test/Transforms/GlobalOpt/trivialstore.ll
index f907d8382d57..9a490879a41f 100644
--- a/test/Transforms/GlobalOpt/trivialstore.ll
+++ b/test/Transforms/GlobalOpt/trivialstore.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -globalopt -S | not grep G
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK-NOT: G
@G = internal global i32 17 ; <i32*> [#uses=3]
@@ -16,4 +17,3 @@ define internal void @dead() {
store i32 123, i32* @G
ret void
}
-
diff --git a/test/Transforms/GlobalOpt/undef-init.ll b/test/Transforms/GlobalOpt/undef-init.ll
index d304821225ea..71fad343e56d 100644
--- a/test/Transforms/GlobalOpt/undef-init.ll
+++ b/test/Transforms/GlobalOpt/undef-init.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -globalopt -S | not grep store
+; RUN: opt < %s -globalopt -S | FileCheck %s
+; CHECK-NOT: store
@llvm.global_ctors = appending global [1 x { i32, void ()* }] [ { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I__Z3foov } ] ; <[1 x { i32, void ()* }]*> [#uses=0]
@X.0 = internal global i32 undef ; <i32*> [#uses=2]
@@ -14,4 +15,3 @@ entry:
store i32 1, i32* @X.0
ret void
}
-
diff --git a/test/Transforms/GlobalOpt/unnamed-addr.ll b/test/Transforms/GlobalOpt/unnamed-addr.ll
index de436c62a347..9f11f1bd92c2 100644
--- a/test/Transforms/GlobalOpt/unnamed-addr.ll
+++ b/test/Transforms/GlobalOpt/unnamed-addr.ll
@@ -10,7 +10,7 @@
; CHECK: @b = internal global i32 0, align 4
; CHECK: @c = internal unnamed_addr global i32 0, align 4
; CHECK: @d = internal unnamed_addr constant [4 x i8] c"foo\00", align 1
-; CHECK: @e = linkonce_odr global i32 0
+; CHECK: @e = linkonce_odr local_unnamed_addr global i32 0
; CHECK: define internal fastcc void @used_internal() unnamed_addr {
define internal void @used_internal() {
diff --git a/test/Transforms/GuardWidening/basic.ll b/test/Transforms/GuardWidening/basic.ll
new file mode 100644
index 000000000000..5b746a5d7ddd
--- /dev/null
+++ b/test/Transforms/GuardWidening/basic.ll
@@ -0,0 +1,381 @@
+; RUN: opt -S -guard-widening < %s | FileCheck %s
+; RUN: opt -S -passes=guard-widening < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+; Basic test case: we wide the first check to check both the
+; conditions.
+define void @f_0(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_0(
+entry:
+; CHECK: %wide.chk = and i1 %cond_0, %cond_1
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: ret void
+
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+}
+
+; Same as @f_0, but with using a more general notion of postdominance.
+define void @f_1(i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_1(
+entry:
+; CHECK: %wide.chk = and i1 %cond_0, %cond_1
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: br i1 undef, label %left, label %right
+
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %left, label %right
+
+left:
+ br label %merge
+
+right:
+ br label %merge
+
+merge:
+; CHECK: merge:
+; CHECK-NOT: call void (i1, ...) @llvm.experimental.guard(
+; CHECK: ret void
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+}
+
+; Like @f_1, but we have some code we need to hoist before we can
+; widen a dominanting check.
+define void @f_2(i32 %a, i32 %b) {
+; CHECK-LABEL: @f_2(
+entry:
+; CHECK: %cond_0 = icmp ult i32 %a, 10
+; CHECK: %cond_1 = icmp ult i32 %b, 10
+; CHECK: %wide.chk = and i1 %cond_0, %cond_1
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: br i1 undef, label %left, label %right
+
+ %cond_0 = icmp ult i32 %a, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %left, label %right
+
+left:
+ br label %merge
+
+right:
+ br label %merge
+
+merge:
+ %cond_1 = icmp ult i32 %b, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+}
+
+; Negative test: don't hoist stuff out of control flow
+; indiscriminately, since that can make us do more work than needed.
+define void @f_3(i32 %a, i32 %b) {
+; CHECK-LABEL: @f_3(
+entry:
+; CHECK: %cond_0 = icmp ult i32 %a, 10
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+; CHECK: br i1 undef, label %left, label %right
+
+ %cond_0 = icmp ult i32 %a, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %left, label %right
+
+left:
+; CHECK: left:
+; CHECK: %cond_1 = icmp ult i32 %b, 10
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+; CHECK: ret void
+
+ %cond_1 = icmp ult i32 %b, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+
+right:
+ ret void
+}
+
+; But hoisting out of control flow is fine if it makes a loop computed
+; condition loop invariant. This behavior may require some tuning in
+; the future.
+define void @f_4(i32 %a, i32 %b) {
+; CHECK-LABEL: @f_4(
+entry:
+; CHECK: %cond_0 = icmp ult i32 %a, 10
+; CHECK: %cond_1 = icmp ult i32 %b, 10
+; CHECK: %wide.chk = and i1 %cond_0, %cond_1
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: br i1 undef, label %loop, label %leave
+
+ %cond_0 = icmp ult i32 %a, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %loop, label %leave
+
+loop:
+ %cond_1 = icmp ult i32 %b, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ br i1 undef, label %loop, label %leave
+
+leave:
+ ret void
+}
+
+; Hoisting out of control flow is also fine if we can widen the
+; dominating check without doing any extra work.
+define void @f_5(i32 %a) {
+; CHECK-LABEL: @f_5(
+entry:
+; CHECK: %wide.chk = icmp uge i32 %a, 11
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: br i1 undef, label %left, label %right
+
+ %cond_0 = icmp ugt i32 %a, 7
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %left, label %right
+
+left:
+ %cond_1 = icmp ugt i32 %a, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+
+right:
+ ret void
+}
+
+; Negative test: the load from %a can be safely speculated to before
+; the first guard, but there is no guarantee that it will produce the
+; same value.
+define void @f_6(i1* dereferenceable(32) %a, i1* %b, i1 %unknown) {
+; CHECK-LABEL: @f_6(
+; CHECK: call void (i1, ...) @llvm.experimental.guard(
+; CHECK: call void (i1, ...) @llvm.experimental.guard(
+; CHECK: ret void
+entry:
+ %cond_0 = load i1, i1* %a
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ store i1 %unknown, i1* %b
+ %cond_1 = load i1, i1* %a
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+}
+
+; All else equal, we try to widen the earliest guard we can. This
+; heuristic can use some tuning.
+define void @f_7(i32 %a, i1* %cond_buf) {
+; CHECK-LABEL: @f_7(
+entry:
+; CHECK: %cond_1 = load volatile i1, i1* %cond_buf
+; CHECK: %cond_3 = icmp ult i32 %a, 7
+; CHECK: %wide.chk = and i1 %cond_1, %cond_3
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: %cond_2 = load volatile i1, i1* %cond_buf
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_2) [ "deopt"() ]
+; CHECK: br i1 undef, label %left, label %right
+
+ %cond_1 = load volatile i1, i1* %cond_buf
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ %cond_2 = load volatile i1, i1* %cond_buf
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_2) [ "deopt"() ]
+ br i1 undef, label %left, label %right
+
+left:
+ %cond_3 = icmp ult i32 %a, 7
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_3) [ "deopt"() ]
+ br label %left
+
+right:
+ ret void
+}
+
+; In this case the earliest dominating guard is in a loop, and we
+; don't want to put extra work in there. This heuristic can use some
+; tuning.
+define void @f_8(i32 %a, i1 %cond_1, i1 %cond_2) {
+; CHECK-LABEL: @f_8(
+entry:
+ br label %loop
+
+loop:
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ br i1 undef, label %loop, label %leave
+
+leave:
+; CHECK: leave:
+; CHECK: %cond_3 = icmp ult i32 %a, 7
+; CHECK: %wide.chk = and i1 %cond_2, %cond_3
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: br i1 undef, label %loop2, label %leave2
+
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_2) [ "deopt"() ]
+ br i1 undef, label %loop2, label %leave2
+
+loop2:
+ %cond_3 = icmp ult i32 %a, 7
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_3) [ "deopt"() ]
+ br label %loop2
+
+leave2:
+ ret void
+}
+
+; In cases like these where there isn't any "obviously profitable"
+; widening sites, we refuse to do anything.
+define void @f_9(i32 %a, i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_9(
+entry:
+ br label %first_loop
+
+first_loop:
+; CHECK: first_loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+; CHECK: br i1 undef, label %first_loop, label %second_loop
+
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %first_loop, label %second_loop
+
+second_loop:
+; CHECK: second_loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+; CHECK: br label %second_loop
+
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ br label %second_loop
+}
+
+; Same situation as in @f_9: no "obviously profitable" widening sites,
+; so we refuse to do anything.
+define void @f_10(i32 %a, i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_10(
+entry:
+ br label %loop
+
+loop:
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+; CHECK: br i1 undef, label %loop, label %no_loop
+
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %loop, label %no_loop
+
+no_loop:
+; CHECK: no_loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+; CHECK: ret void
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+}
+
+; With guards in loops, we're okay hoisting out the guard into the
+; containing loop.
+define void @f_11(i32 %a, i1 %cond_0, i1 %cond_1) {
+; CHECK-LABEL: @f_11(
+entry:
+ br label %inner
+
+inner:
+; CHECK: inner:
+; CHECK: %wide.chk = and i1 %cond_0, %cond_1
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: br i1 undef, label %inner, label %outer
+
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %inner, label %outer
+
+outer:
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ br label %inner
+}
+
+; Checks that we are adequately guarded against exponential-time
+; behavior when hoisting code.
+define void @f_12(i32 %a0) {
+; CHECK-LABEL: @f_12
+
+; Eliding the earlier 29 multiplications for brevity
+; CHECK: %a30 = mul i32 %a29, %a29
+; CHECK-NEXT: %cond = trunc i32 %a30 to i1
+; CHECK-NEXT: %wide.chk = and i1 true, %cond
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK-NEXT: ret void
+
+entry:
+ call void(i1, ...) @llvm.experimental.guard(i1 true) [ "deopt"() ]
+ %a1 = mul i32 %a0, %a0
+ %a2 = mul i32 %a1, %a1
+ %a3 = mul i32 %a2, %a2
+ %a4 = mul i32 %a3, %a3
+ %a5 = mul i32 %a4, %a4
+ %a6 = mul i32 %a5, %a5
+ %a7 = mul i32 %a6, %a6
+ %a8 = mul i32 %a7, %a7
+ %a9 = mul i32 %a8, %a8
+ %a10 = mul i32 %a9, %a9
+ %a11 = mul i32 %a10, %a10
+ %a12 = mul i32 %a11, %a11
+ %a13 = mul i32 %a12, %a12
+ %a14 = mul i32 %a13, %a13
+ %a15 = mul i32 %a14, %a14
+ %a16 = mul i32 %a15, %a15
+ %a17 = mul i32 %a16, %a16
+ %a18 = mul i32 %a17, %a17
+ %a19 = mul i32 %a18, %a18
+ %a20 = mul i32 %a19, %a19
+ %a21 = mul i32 %a20, %a20
+ %a22 = mul i32 %a21, %a21
+ %a23 = mul i32 %a22, %a22
+ %a24 = mul i32 %a23, %a23
+ %a25 = mul i32 %a24, %a24
+ %a26 = mul i32 %a25, %a25
+ %a27 = mul i32 %a26, %a26
+ %a28 = mul i32 %a27, %a27
+ %a29 = mul i32 %a28, %a28
+ %a30 = mul i32 %a29, %a29
+ %cond = trunc i32 %a30 to i1
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond) [ "deopt"() ]
+ ret void
+}
+
+define void @f_13(i32 %a) {
+; CHECK-LABEL: @f_13(
+entry:
+; CHECK: %wide.chk = icmp ult i32 %a, 10
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk) [ "deopt"() ]
+; CHECK: br i1 undef, label %left, label %right
+
+ %cond_0 = icmp ult i32 %a, 14
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %left, label %right
+
+left:
+ %cond_1 = icmp slt i32 %a, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+
+right:
+ ret void
+}
+
+define void @f_14(i32 %a) {
+; CHECK-LABEL: @f_14(
+entry:
+; CHECK: %cond_0 = icmp ult i32 %a, 14
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+; CHECK: br i1 undef, label %left, label %right
+
+ %cond_0 = icmp ult i32 %a, 14
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_0) [ "deopt"() ]
+ br i1 undef, label %left, label %right
+
+left:
+; CHECK: left:
+; CHECK: %cond_1 = icmp sgt i32 %a, 10
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+
+ %cond_1 = icmp sgt i32 %a, 10
+ call void(i1, ...) @llvm.experimental.guard(i1 %cond_1) [ "deopt"() ]
+ ret void
+
+right:
+ ret void
+}
diff --git a/test/Transforms/GuardWidening/range-check-merging.ll b/test/Transforms/GuardWidening/range-check-merging.ll
new file mode 100644
index 000000000000..6440dadce1e8
--- /dev/null
+++ b/test/Transforms/GuardWidening/range-check-merging.ll
@@ -0,0 +1,235 @@
+; RUN: opt -S -guard-widening < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1,...)
+
+define void @f_0(i32 %x, i32* %length_buf) {
+; CHECK-LABEL: @f_0(
+; CHECK-NOT: @llvm.experimental.guard
+; CHECK: %wide.chk2 = and i1 %chk3, %chk0
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
+; CHECK: ret void
+entry:
+ %length = load i32, i32* %length_buf, !range !0
+ %chk0 = icmp ult i32 %x, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = add i32 %x, 1
+ %chk1 = icmp ult i32 %x.inc1, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = add i32 %x, 2
+ %chk2 = icmp ult i32 %x.inc2, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = add i32 %x, 3
+ %chk3 = icmp ult i32 %x.inc3, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+define void @f_1(i32 %x, i32* %length_buf) {
+; CHECK-LABEL: @f_1(
+; CHECK-NOT: llvm.experimental.guard
+; CHECK: %wide.chk2 = and i1 %chk3, %chk0
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
+; CHECK: ret void
+entry:
+ %length = load i32, i32* %length_buf, !range !0
+ %chk0 = icmp ult i32 %x, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = add i32 %x, 1
+ %chk1 = icmp ult i32 %x.inc1, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = add i32 %x.inc1, 2
+ %chk2 = icmp ult i32 %x.inc2, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = add i32 %x.inc2, 3
+ %chk3 = icmp ult i32 %x.inc3, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+define void @f_2(i32 %a, i32* %length_buf) {
+; CHECK-LABEL: @f_2(
+; CHECK-NOT: llvm.experimental.guard
+; CHECK: %wide.chk2 = and i1 %chk3, %chk0
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
+; CHECK: ret void
+entry:
+ %x = and i32 %a, 4294967040 ;; 4294967040 == 0xffffff00
+ %length = load i32, i32* %length_buf, !range !0
+ %chk0 = icmp ult i32 %x, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = or i32 %x, 1
+ %chk1 = icmp ult i32 %x.inc1, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = or i32 %x, 2
+ %chk2 = icmp ult i32 %x.inc2, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = or i32 %x, 3
+ %chk3 = icmp ult i32 %x.inc3, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+define void @f_3(i32 %a, i32* %length_buf) {
+; CHECK-LABEL: @f_3(
+; CHECK-NOT: llvm.experimental.guard
+; CHECK: %wide.chk2 = and i1 %chk3, %chk0
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
+; CHECK: ret void
+entry:
+ %x = and i32 %a, 4294967040 ;; 4294967040 == 0xffffff00
+ %length = load i32, i32* %length_buf, !range !0
+ %chk0 = icmp ult i32 %x, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = add i32 %x, 1
+ %chk1 = icmp ult i32 %x.inc1, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = or i32 %x.inc1, 2
+ %chk2 = icmp ult i32 %x.inc2, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = add i32 %x.inc2, 3
+ %chk3 = icmp ult i32 %x.inc3, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+define void @f_4(i32 %x, i32* %length_buf) {
+; CHECK-LABEL: @f_4(
+; CHECK-NOT: llvm.experimental.guard
+
+; Note: we NOT guarding on "and i1 %chk3, %chk0", that would be incorrect.
+; CHECK: %wide.chk2 = and i1 %chk3, %chk1
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
+; CHECK: ret void
+entry:
+ %length = load i32, i32* %length_buf, !range !0
+ %chk0 = icmp ult i32 %x, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = add i32 %x, -1024
+ %chk1 = icmp ult i32 %x.inc1, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = add i32 %x, 2
+ %chk2 = icmp ult i32 %x.inc2, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = add i32 %x, 3
+ %chk3 = icmp ult i32 %x.inc3, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+define void @f_5(i32 %x, i32* %length_buf) {
+; CHECK-LABEL: @f_5(
+; CHECK-NOT: llvm.experimental.guard
+; CHECK: %wide.chk2 = and i1 %chk1, %chk2
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
+; CHECK: ret void
+entry:
+ %length = load i32, i32* %length_buf, !range !0
+ %chk0 = icmp ult i32 %x, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = add i32 %x, 1
+ %chk1 = icmp ult i32 %x.inc1, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = add i32 %x.inc1, -200
+ %chk2 = icmp ult i32 %x.inc2, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = add i32 %x.inc2, 3
+ %chk3 = icmp ult i32 %x.inc3, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+
+; Negative test: we can't merge these checks into
+;
+; (%x + -2147483647) u< L && (%x + 3) u< L
+;
+; because if %length == INT_MAX and %x == -3 then
+;
+; (%x + -2147483647) == i32 2147483646 u< L (L is 2147483647)
+; (%x + 3) == 0 u< L
+;
+; But (%x + 2) == -1 is not u< L
+;
+define void @f_6(i32 %x, i32* %length_buf) {
+; CHECK-LABEL: @f_6(
+; CHECK-NOT: llvm.experimental.guard
+; CHECK: %wide.chk = and i1 %chk0, %chk1
+; CHECK: %wide.chk1 = and i1 %wide.chk, %chk2
+; CHECK: %wide.chk2 = and i1 %wide.chk1, %chk3
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 %wide.chk2) [ "deopt"() ]
+entry:
+ %length = load i32, i32* %length_buf, !range !0
+ %chk0 = icmp ult i32 %x, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = add i32 %x, -2147483647 ;; -2147483647 == (i32 INT_MIN)+1 == -(i32 INT_MAX)
+ %chk1 = icmp ult i32 %x.inc1, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = add i32 %x, 2
+ %chk2 = icmp ult i32 %x.inc2, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = add i32 %x, 3
+ %chk3 = icmp ult i32 %x.inc3, %length
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+
+define void @f_7(i32 %x, i32* %length_buf) {
+; CHECK-LABEL: @f_7(
+
+; CHECK: [[COND_0:%[^ ]+]] = and i1 %chk3.b, %chk0.b
+; CHECK: [[COND_1:%[^ ]+]] = and i1 %chk0.a, [[COND_0]]
+; CHECK: [[COND_2:%[^ ]+]] = and i1 %chk3.a, [[COND_1]]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 [[COND_2]]) [ "deopt"() ]
+
+entry:
+ %length_a = load volatile i32, i32* %length_buf, !range !0
+ %length_b = load volatile i32, i32* %length_buf, !range !0
+ %chk0.a = icmp ult i32 %x, %length_a
+ %chk0.b = icmp ult i32 %x, %length_b
+ %chk0 = and i1 %chk0.a, %chk0.b
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk0) [ "deopt"() ]
+
+ %x.inc1 = add i32 %x, 1
+ %chk1.a = icmp ult i32 %x.inc1, %length_a
+ %chk1.b = icmp ult i32 %x.inc1, %length_b
+ %chk1 = and i1 %chk1.a, %chk1.b
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk1) [ "deopt"() ]
+
+ %x.inc2 = add i32 %x, 2
+ %chk2.a = icmp ult i32 %x.inc2, %length_a
+ %chk2.b = icmp ult i32 %x.inc2, %length_b
+ %chk2 = and i1 %chk2.a, %chk2.b
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk2) [ "deopt"() ]
+
+ %x.inc3 = add i32 %x, 3
+ %chk3.a = icmp ult i32 %x.inc3, %length_a
+ %chk3.b = icmp ult i32 %x.inc3, %length_b
+ %chk3 = and i1 %chk3.a, %chk3.b
+ call void(i1, ...) @llvm.experimental.guard(i1 %chk3) [ "deopt"() ]
+ ret void
+}
+
+
+!0 = !{i32 0, i32 2147483648}
diff --git a/test/Transforms/IPConstantProp/comdat-ipo.ll b/test/Transforms/IPConstantProp/comdat-ipo.ll
new file mode 100644
index 000000000000..6c4c44c33e25
--- /dev/null
+++ b/test/Transforms/IPConstantProp/comdat-ipo.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -ipconstprop -S | FileCheck %s
+
+; See PR26774
+
+define i32 @baz() {
+ ret i32 10
+}
+
+; We can const-prop @baz's return value *into* @foo, but cannot
+; constprop @foo's return value into bar.
+
+define linkonce_odr i32 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: %val = call i32 @baz()
+; CHECK-NEXT: ret i32 10
+
+ %val = call i32 @baz()
+ ret i32 %val
+}
+
+define i32 @bar() {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: %val = call i32 @foo()
+; CHECK-NEXT: ret i32 %val
+
+ %val = call i32 @foo()
+ ret i32 %val
+}
diff --git a/test/Transforms/IPConstantProp/fp-bc-icmp-const-fold.ll b/test/Transforms/IPConstantProp/fp-bc-icmp-const-fold.ll
new file mode 100644
index 000000000000..8f97225ca446
--- /dev/null
+++ b/test/Transforms/IPConstantProp/fp-bc-icmp-const-fold.ll
@@ -0,0 +1,52 @@
+; RUN: opt -S -ipsccp < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @test(i32 signext %n) {
+
+; CHECK-LABEL: @test
+
+entry:
+ br i1 undef, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ ret void
+
+if.end: ; preds = %entry
+ br i1 undef, label %if.then2, label %if.end4
+
+if.then2: ; preds = %if.end
+ unreachable
+
+if.end4: ; preds = %if.end
+ %sub.n = select i1 undef, i32 undef, i32 %n
+ switch i32 %sub.n, label %if.else14 [
+ i32 0, label %if.then9
+ i32 1, label %if.then12
+ ]
+
+if.then9: ; preds = %if.end4
+ unreachable
+
+if.then12: ; preds = %if.end4
+ unreachable
+
+if.else14: ; preds = %if.end4
+ br label %do.body
+
+do.body: ; preds = %do.body, %if.else14
+ %scale.0 = phi ppc_fp128 [ 0xM3FF00000000000000000000000000000, %if.else14 ], [ %scale.0, %do.body ]
+ br i1 undef, label %do.body, label %if.then33
+
+if.then33: ; preds = %do.body
+ br i1 undef, label %_ZN5boost4math4signIgEEiRKT_.exit30, label %cond.false.i28
+
+cond.false.i28: ; preds = %if.then33
+ %0 = bitcast ppc_fp128 %scale.0 to i128
+ %tobool.i26 = icmp slt i128 %0, 0
+ br label %_ZN5boost4math4signIgEEiRKT_.exit30
+
+_ZN5boost4math4signIgEEiRKT_.exit30: ; preds = %cond.false.i28, %if.then33
+ unreachable
+}
+
diff --git a/test/Transforms/IPConstantProp/global.ll b/test/Transforms/IPConstantProp/global.ll
index d3ba14658f6e..5e34696d5662 100644
--- a/test/Transforms/IPConstantProp/global.ll
+++ b/test/Transforms/IPConstantProp/global.ll
@@ -1,3 +1,4 @@
+; RUN: opt < %s -S -passes=ipsccp | FileCheck %s
; RUN: opt < %s -S -ipsccp | FileCheck %s
@_ZL6test1g = internal global i32 42, align 4
diff --git a/test/Transforms/IRCE/conjunctive-checks.ll b/test/Transforms/IRCE/conjunctive-checks.ll
new file mode 100644
index 000000000000..be5cfef8f0ea
--- /dev/null
+++ b/test/Transforms/IRCE/conjunctive-checks.ll
@@ -0,0 +1,99 @@
+; RUN: opt -S -irce < %s | FileCheck %s
+
+define void @f_0(i32 *%arr, i32 *%a_len_ptr, i32 %n, i1* %cond_buf) {
+; CHECK-LABEL: @f_0(
+
+; CHECK-LABEL: loop.preheader:
+; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
+; CHECK: [[not_safe_range_end:[^ ]+]] = sub i32 3, %len
+; CHECK: [[not_exit_main_loop_at_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_n]], [[not_safe_range_end]]
+; CHECK: [[not_exit_main_loop_at_hiclamp:[^ ]+]] = select i1 [[not_exit_main_loop_at_hiclamp_cmp]], i32 [[not_n]], i32 [[not_safe_range_end]]
+; CHECK: [[exit_main_loop_at_hiclamp:[^ ]+]] = sub i32 -1, [[not_exit_main_loop_at_hiclamp]]
+; CHECK: [[exit_main_loop_at_loclamp_cmp:[^ ]+]] = icmp sgt i32 [[exit_main_loop_at_hiclamp]], 0
+; CHECK: [[exit_main_loop_at_loclamp:[^ ]+]] = select i1 [[exit_main_loop_at_loclamp_cmp]], i32 [[exit_main_loop_at_hiclamp]], i32 0
+; CHECK: [[enter_main_loop:[^ ]+]] = icmp slt i32 0, [[exit_main_loop_at_loclamp]]
+; CHECK: br i1 [[enter_main_loop]], label %loop, label %main.pseudo.exit
+
+ entry:
+ %len = load i32, i32* %a_len_ptr, !range !0
+ %first.itr.check = icmp sgt i32 %n, 0
+ br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+ %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %idx.for.abc = add i32 %idx, 4
+ %abc.actual = icmp slt i32 %idx.for.abc, %len
+ %cond = load volatile i1, i1* %cond_buf
+ %abc = and i1 %cond, %abc.actual
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+; CHECK: loop:
+; CHECK: %cond = load volatile i1, i1* %cond_buf
+; CHECK: %abc = and i1 %cond, true
+; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+ in.bounds:
+ %addr = getelementptr i32, i32* %arr, i32 %idx.for.abc
+ store i32 0, i32* %addr
+ %next = icmp slt i32 %idx.next, %n
+ br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+ ret void
+
+ exit:
+ ret void
+}
+
+define void @f_1(
+ i32* %arr_a, i32* %a_len_ptr, i32* %arr_b, i32* %b_len_ptr, i32 %n) {
+; CHECK-LABEL: @f_1(
+
+; CHECK-LABEL: loop.preheader:
+; CHECK: [[not_len_b:[^ ]+]] = sub i32 -1, %len.b
+; CHECK: [[not_len_a:[^ ]+]] = sub i32 -1, %len.a
+; CHECK: [[smax_not_len_cond:[^ ]+]] = icmp sgt i32 [[not_len_b]], [[not_len_a]]
+; CHECK: [[smax_not_len:[^ ]+]] = select i1 [[smax_not_len_cond]], i32 [[not_len_b]], i32 [[not_len_a]]
+; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
+; CHECK: [[not_upper_limit_cond_loclamp:[^ ]+]] = icmp sgt i32 [[smax_not_len]], [[not_n]]
+; CHECK: [[not_upper_limit_loclamp:[^ ]+]] = select i1 [[not_upper_limit_cond_loclamp]], i32 [[smax_not_len]], i32 [[not_n]]
+; CHECK: [[upper_limit_loclamp:[^ ]+]] = sub i32 -1, [[not_upper_limit_loclamp]]
+; CHECK: [[upper_limit_cmp:[^ ]+]] = icmp sgt i32 [[upper_limit_loclamp]], 0
+; CHECK: [[upper_limit:[^ ]+]] = select i1 [[upper_limit_cmp]], i32 [[upper_limit_loclamp]], i32 0
+
+ entry:
+ %len.a = load i32, i32* %a_len_ptr, !range !0
+ %len.b = load i32, i32* %b_len_ptr, !range !0
+ %first.itr.check = icmp sgt i32 %n, 0
+ br i1 %first.itr.check, label %loop, label %exit
+
+ loop:
+ %idx = phi i32 [ 0, %entry ] , [ %idx.next, %in.bounds ]
+ %idx.next = add i32 %idx, 1
+ %abc.a = icmp slt i32 %idx, %len.a
+ %abc.b = icmp slt i32 %idx, %len.b
+ %abc = and i1 %abc.a, %abc.b
+ br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+; CHECK: loop:
+; CHECK: %abc = and i1 true, true
+; CHECK: br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+
+ in.bounds:
+ %addr.a = getelementptr i32, i32* %arr_a, i32 %idx
+ store i32 0, i32* %addr.a
+ %addr.b = getelementptr i32, i32* %arr_b, i32 %idx
+ store i32 -1, i32* %addr.b
+ %next = icmp slt i32 %idx.next, %n
+ br i1 %next, label %loop, label %exit
+
+ out.of.bounds:
+ ret void
+
+ exit:
+ ret void
+}
+
+!0 = !{i32 0, i32 2147483647}
+!1 = !{!"branch_weights", i32 64, i32 4}
diff --git a/test/Transforms/IRCE/decrementing-loop.ll b/test/Transforms/IRCE/decrementing-loop.ll
index 6ff3e76c7546..a3a03b182a50 100644
--- a/test/Transforms/IRCE/decrementing-loop.ll
+++ b/test/Transforms/IRCE/decrementing-loop.ll
@@ -28,7 +28,6 @@ define void @decrementing_loop(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
ret void
; CHECK: loop.preheader:
-; CHECK: [[indvar_start:[^ ]+]] = add i32 %n, -1
; CHECK: [[not_len:[^ ]+]] = sub i32 -1, %len
; CHECK: [[not_n:[^ ]+]] = sub i32 -1, %n
; CHECK: [[not_len_hiclamp_cmp:[^ ]+]] = icmp sgt i32 [[not_len]], [[not_n]]
diff --git a/test/Transforms/IRCE/only-lower-check.ll b/test/Transforms/IRCE/only-lower-check.ll
index 69abc89c1a66..428c3aabdec8 100644
--- a/test/Transforms/IRCE/only-lower-check.ll
+++ b/test/Transforms/IRCE/only-lower-check.ll
@@ -4,7 +4,7 @@
; CHECK-NEXT: InductiveRangeCheck:
; CHECK-NEXT: Kind: RANGE_CHECK_LOWER
; CHECK-NEXT: Offset: (-1 + %n) Scale: -1 Length: (null)
-; CHECK-NEXT: Branch: br i1 %abc, label %in.bounds, label %out.of.bounds
+; CHECK-NEXT: CheckUse: br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 Operand: 0
; CHECK-NEXT: irce: in function only_lower_check: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
define void @only_lower_check(i32 *%arr, i32 *%a_len_ptr, i32 %n) {
diff --git a/test/Transforms/IRCE/only-upper-check.ll b/test/Transforms/IRCE/only-upper-check.ll
index dda3f3f6dd87..8e3e1ffe99b1 100644
--- a/test/Transforms/IRCE/only-upper-check.ll
+++ b/test/Transforms/IRCE/only-upper-check.ll
@@ -4,7 +4,7 @@
; CHECK-NEXT:InductiveRangeCheck:
; CHECK-NEXT: Kind: RANGE_CHECK_UPPER
; CHECK-NEXT: Offset: %offset Scale: 1 Length: %len = load i32, i32* %a_len_ptr, !range !0
-; CHECK-NEXT: Branch: br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1
+; CHECK-NEXT: CheckUse: br i1 %abc, label %in.bounds, label %out.of.bounds, !prof !1 Operand: 0
; CHECK-NEXT: irce: in function incrementing: constrained Loop at depth 1 containing: %loop<header><exiting>,%in.bounds<latch><exiting>
define void @incrementing(i32 *%arr, i32 *%a_len_ptr, i32 %n, i32 %offset) {
diff --git a/test/Transforms/IndVarSimplify/AMDGPU/lit.local.cfg b/test/Transforms/IndVarSimplify/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..2a665f06be72
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/AMDGPU/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll b/test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll
new file mode 100644
index 000000000000..aa4fb8e68eb3
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/AMDGPU/no-widen-to-i64.ll
@@ -0,0 +1,98 @@
+; RUN: opt -S -mtriple=amdgcn-unknown-amdhsa -indvars %s | FileCheck %s
+
+; Bug 21148
+
+; Induction variables should not be widened for 64-bit integers,
+; despite being a legal type.
+;
+; The cost of basic arithmetic instructions on a 64-bit integer are
+; twice as expensive as that on a 32-bit integer, or split into 2
+; 32-bit components.
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; CHECK-LABEL: @indvar_32_bit(
+; CHECK-NOT: sext i32
+; CHECK: phi i32
+define void @indvar_32_bit(i32 %n, i32* nocapture %output) {
+entry:
+ %cmp5 = icmp sgt i32 %n, 0
+ br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.06 = phi i32 [ 0, %for.body.preheader ], [ %add, %for.body ]
+ %mul = mul nsw i32 %i.06, %i.06
+ %tmp0 = sext i32 %i.06 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %output, i64 %tmp0
+ store i32 %mul, i32* %arrayidx, align 4
+ %add = add nsw i32 %i.06, 3
+ %cmp = icmp slt i32 %add, %n
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; CHECK-LABEL: @no_promote_i32(
+; CHECK-NOT: sext i32
+; CHECK: br
+; CHECK-NOT: shl i64
+; CHECK-NOT: ashr i64
+; CHECK-NOT: mul nsw i64
+; CHECK-NOT: add nsw i64
+define void @no_promote_i32(i32 addrspace(1)* %out, i32 %a, i32 %b) {
+entry:
+ br label %for.body
+
+for.body:
+ %inc = phi i32 [ 0, %entry ], [ %inc.i, %for.body ]
+ %tmp0 = add i32 %a, %inc
+ %shl = shl i32 %inc, 8
+ %shr = ashr exact i32 %shl, 8
+ %mul = mul nsw i32 %shr, %a
+ %add = add nsw i32 %mul, %b
+ %tmp1 = sext i32 %add to i64
+ %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 %tmp1
+ store i32 %tmp0, i32 addrspace(1)* %arrayidx1, align 4
+ %inc.i = add nsw i32 %inc, 1
+ %cmp = icmp slt i32 %inc.i, 16
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; FIXME: This should really be promoted to i64, since it will need to
+; be legalized anyway.
+
+; CHECK-LABEL: @indvar_48_bit(
+define void @indvar_48_bit(i48 %n, i48* nocapture %output) {
+entry:
+ %cmp5 = icmp sgt i48 %n, 0
+ br i1 %cmp5, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.06 = phi i48 [ 0, %for.body.preheader ], [ %add, %for.body ]
+ %mul = mul nsw i48 %i.06, %i.06
+ %tmp0 = sext i48 %i.06 to i64
+ %arrayidx = getelementptr inbounds i48, i48* %output, i64 %tmp0
+ store i48 %mul, i48* %arrayidx, align 4
+ %add = add nsw i48 %i.06, 3
+ %cmp = icmp slt i48 %add, %n
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
diff --git a/test/Transforms/IndVarSimplify/backedge-on-min-max.ll b/test/Transforms/IndVarSimplify/backedge-on-min-max.ll
index bb26ca5bafb0..bc846c49a8a4 100644
--- a/test/Transforms/IndVarSimplify/backedge-on-min-max.ll
+++ b/test/Transforms/IndVarSimplify/backedge-on-min-max.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt -lcssa -loop-simplify -S < %s | opt -S -passes='require<targetir>,require<scalar-evolution>,require<domtree>,loop(indvars)'
;; --- signed ---
diff --git a/test/Transforms/IndVarSimplify/elim-extend.ll b/test/Transforms/IndVarSimplify/elim-extend.ll
index 98701c338520..8daac23ea74d 100644
--- a/test/Transforms/IndVarSimplify/elim-extend.ll
+++ b/test/Transforms/IndVarSimplify/elim-extend.ll
@@ -22,7 +22,7 @@ loop:
store i8 0, i8* %postadr
%postivnsw = add nsw i32 %ivnsw, 1
%postofsnsw = sext i32 %postivnsw to i64
- %postadrnsw = getelementptr i8, i8* %base, i64 %postofsnsw
+ %postadrnsw = getelementptr inbounds i8, i8* %base, i64 %postofsnsw
store i8 0, i8* %postadrnsw
%cond = icmp sgt i32 %limit, %iv
br i1 %cond, label %loop, label %exit
diff --git a/test/Transforms/IndVarSimplify/iv-widen.ll b/test/Transforms/IndVarSimplify/iv-widen.ll
index ccf9fa0aa0ac..bf635903fdf2 100644
--- a/test/Transforms/IndVarSimplify/iv-widen.ll
+++ b/test/Transforms/IndVarSimplify/iv-widen.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -indvars -S | FileCheck %s
+; RUN: opt -lcssa -loop-simplify -S < %s | opt -S -passes='require<targetir>,require<scalar-evolution>,require<domtree>,loop(indvars)'
; Provide legal integer types.
target datalayout = "n8:16:32:64"
diff --git a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
index feb4b35a20a7..52743073d55c 100644
--- a/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
+++ b/test/Transforms/IndVarSimplify/lftr-address-space-pointers.ll
@@ -11,7 +11,7 @@ entry:
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %0
+; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(2)* %base, i8 %idx.trunc
; CHECK: for.body:
; CHECK: phi i8 addrspace(2)*
@@ -43,7 +43,7 @@ entry:
br i1 %cmp1, label %for.body, label %for.end
; Make sure the added GEP has the right index type
-; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %0
+; CHECK: %lftr.limit = getelementptr i8, i8 addrspace(3)* %base, i16 %idx.trunc
; CHECK: for.body:
; CHECK: phi i8 addrspace(3)*
diff --git a/test/Transforms/IndVarSimplify/loop_evaluate10.ll b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
index e51a3410e35a..bb33a6404c21 100644
--- a/test/Transforms/IndVarSimplify/loop_evaluate10.ll
+++ b/test/Transforms/IndVarSimplify/loop_evaluate10.ll
@@ -48,6 +48,4 @@ bb5: ; preds = %bb3
ret i32 0
}
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
declare void @abort() noreturn nounwind
diff --git a/test/Transforms/IndVarSimplify/overflow-intrinsics.ll b/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
new file mode 100644
index 000000000000..7715abc8ada5
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/overflow-intrinsics.ll
@@ -0,0 +1,137 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f_sadd(i8* %a) {
+; CHECK-LABEL: @f_sadd(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.sadd.with.overflow
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap() #2, !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp slt i32 %2, 16
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_uadd(i8* %a) {
+; CHECK-LABEL: @f_uadd(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 0, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.uadd.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.uadd.with.overflow
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap(), !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp slt i32 %2, 16
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_ssub(i8* nocapture %a) {
+; CHECK-LABEL: @f_ssub(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.ssub.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+; CHECK: for.body:
+; CHECK-NOT: @llvm.ssub.with.overflow.i32
+; CHECK: br i1 false, label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap(), !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp sgt i32 %2, -1
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+define void @f_usub(i8* nocapture %a) {
+; CHECK-LABEL: @f_usub(
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %cont
+ ret void
+
+for.body: ; preds = %entry, %cont
+ %i.04 = phi i32 [ 15, %entry ], [ %2, %cont ]
+ %idxprom = sext i32 %i.04 to i64
+ %arrayidx = getelementptr inbounds i8, i8* %a, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %0 = tail call { i32, i1 } @llvm.usub.with.overflow.i32(i32 %i.04, i32 1)
+ %1 = extractvalue { i32, i1 } %0, 1
+
+; It is theoretically possible to prove this, but SCEV cannot
+; represent non-unsigned-wrapping subtraction operations.
+
+; CHECK: for.body:
+; CHECK: [[COND:%[^ ]+]] = extractvalue { i32, i1 } %1, 1
+; CHECK-NEXT: br i1 [[COND]], label %trap, label %cont, !nosanitize !0
+ br i1 %1, label %trap, label %cont, !nosanitize !{}
+
+trap: ; preds = %for.body
+ tail call void @llvm.trap(), !nosanitize !{}
+ unreachable, !nosanitize !{}
+
+cont: ; preds = %for.body
+ %2 = extractvalue { i32, i1 } %0, 0
+ %cmp = icmp sgt i32 %2, -1
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.uadd.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.ssub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.usub.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.smul.with.overflow.i32(i32, i32) nounwind readnone
+declare { i32, i1 } @llvm.umul.with.overflow.i32(i32, i32) nounwind readnone
+
+declare void @llvm.trap() #2
diff --git a/test/Transforms/IndVarSimplify/overflowcheck.ll b/test/Transforms/IndVarSimplify/overflowcheck.ll
deleted file mode 100644
index c3c033dfaece..000000000000
--- a/test/Transforms/IndVarSimplify/overflowcheck.ll
+++ /dev/null
@@ -1,56 +0,0 @@
-; RUN: opt < %s -indvars -liv-reduce -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx"
-
-; CHECK-LABEL: @addwithoverflow
-; CHECK-LABEL: loop1:
-; CHECK-NOT: zext
-; CHECK: add nsw
-; CHECK: @llvm.sadd.with.overflow
-; CHECK-LABEL: loop2:
-; CHECK-NOT: extractvalue
-; CHECK: add nuw
-; CHECK: @llvm.sadd.with.overflow
-; CHECK-LABEL: loop3:
-; CHECK-NOT: extractvalue
-; CHECK: ret
-define i64 @addwithoverflow(i32 %n, i64* %a) {
-entry:
- br label %loop0
-
-loop0:
- %i = phi i32 [ 0, %entry ], [ %i1val, %loop3 ]
- %s = phi i32 [ 0, %entry ], [ %addsval, %loop3 ]
- %bc = icmp ult i32 %i, %n
- br i1 %bc, label %loop1, label %exit
-
-loop1:
- %zxt = zext i32 %i to i64
- %ofs = shl nuw nsw i64 %zxt, 3
- %gep = getelementptr i64, i64* %a, i64 %zxt
- %v = load i64, i64* %gep, align 8
- %truncv = trunc i64 %v to i32
- %adds = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %s, i32 %truncv)
- %ovflows = extractvalue { i32, i1 } %adds, 1
- br i1 %ovflows, label %exit, label %loop2
-
-loop2:
- %addsval = extractvalue { i32, i1 } %adds, 0
- %i1 = call { i32, i1 } @llvm.sadd.with.overflow.i32(i32 %i, i32 1)
- %i1check = extractvalue { i32, i1 } %i1, 1
- br i1 %i1check, label %exit, label %loop3
-
-loop3:
- %i1val = extractvalue { i32, i1 } %i1, 0
- %test = icmp slt i32 %i1val, %n
- br i1 %test, label %return, label %loop0
-
-return:
- %ret = zext i32 %addsval to i64
- ret i64 %ret
-
-exit:
- unreachable
-}
-
-declare { i32, i1 } @llvm.sadd.with.overflow.i32(i32, i32)
diff --git a/test/Transforms/IndVarSimplify/pr24783.ll b/test/Transforms/IndVarSimplify/pr24783.ll
index 637cb1e196c5..2c19aada35ef 100644
--- a/test/Transforms/IndVarSimplify/pr24783.ll
+++ b/test/Transforms/IndVarSimplify/pr24783.ll
@@ -6,9 +6,6 @@ target triple = "powerpc64-unknown-linux-gnu"
define void @f(i32* %end.s, i8** %loc, i32 %p) {
; CHECK-LABEL: @f(
entry:
-; CHECK: [[P_SEXT:%[0-9a-z]+]] = sext i32 %p to i64
-; CHECK: [[END:%[0-9a-z]+]] = getelementptr i32, i32* %end.s, i64 [[P_SEXT]]
-
%end = getelementptr inbounds i32, i32* %end.s, i32 %p
%init = bitcast i32* %end.s to i8*
br label %while.body.i
@@ -22,7 +19,7 @@ while.body.i:
loop.exit:
; CHECK: loop.exit:
-; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %scevgep to i8*
+; CHECK: [[END_BCASTED:%[a-z0-9]+]] = bitcast i32* %end to i8*
; CHECK: store i8* [[END_BCASTED]], i8** %loc
%ptr.inc.lcssa = phi i8* [ %ptr.inc, %while.body.i ]
store i8* %ptr.inc.lcssa, i8** %loc
diff --git a/test/Transforms/IndVarSimplify/pr25576.ll b/test/Transforms/IndVarSimplify/pr25576.ll
new file mode 100644
index 000000000000..c9ebc479535b
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr25576.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fn1() {
+; CHECK-LABEL: @fn1(
+entry:
+ br label %for.cond.loopexit
+
+for.cond.loopexit: ; preds = %for.inc7, %for.cond.loopexit, %entry
+ %c.1.lcssa = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.cond.loopexit ], [ 0, %entry ]
+ br i1 undef, label %for.cond.loopexit, label %for.cond4.preheader
+
+for.cond4.preheader: ; preds = %for.inc7, %for.cond.loopexit
+ %c.17 = phi i32 [ %inc8, %for.inc7 ], [ 0, %for.cond.loopexit ]
+ br label %for.body6
+
+for.body6: ; preds = %for.body6, %for.cond4.preheader
+ %inc14 = phi i32 [ 0, %for.cond4.preheader ], [ %inc, %for.body6 ]
+ %idxprom = zext i32 %inc14 to i64
+ %inc = add i32 %inc14, 1
+ %cmp5 = icmp ult i32 %inc, 2
+ br i1 %cmp5, label %for.body6, label %for.inc7
+
+for.inc7: ; preds = %for.body6
+ %inc.lcssa = phi i32 [ %inc, %for.body6 ]
+ %inc8 = add i32 %c.17, 1
+ %cmp = icmp ult i32 %inc8, %inc.lcssa
+ br i1 %cmp, label %for.cond4.preheader, label %for.cond.loopexit
+}
diff --git a/test/Transforms/IndVarSimplify/pr26973.ll b/test/Transforms/IndVarSimplify/pr26973.ll
new file mode 100644
index 000000000000..8bad303ce88c
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr26973.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -indvars < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+@a = common global double* null, align 8
+@b = common global double 0.000000e+00, align 8
+
+define void @fn1(i32 %p1) {
+; CHECK-LABEL: @fn1(
+entry:
+ %ld = load double*, double** @a, align 8
+ br label %outer.loop
+
+outer.loop:
+ %iv.outer = phi i32 [ %p1, %entry ], [ %iv.outer.dec, %outer.be ]
+ %idxprom = sext i32 %iv.outer to i64
+ %arrayidx = getelementptr inbounds double, double* %ld, i64 %idxprom
+ %arrayidx.bc = bitcast double* %arrayidx to i64*
+ br label %inner.loop
+
+inner.loop:
+ %iv.inner = phi i32 [ %iv.outer, %outer.loop ], [ %iv.inner.dec, %inner.loop ]
+ %ld.arr = load i64, i64* %arrayidx.bc, align 8
+ store i64 %ld.arr, i64* bitcast (double* @b to i64*), align 8
+ %iv.inner.dec = add nsw i32 %iv.inner, -1
+ %cmp = icmp slt i32 %iv.outer, %iv.inner.dec
+ br i1 %cmp, label %outer.be, label %inner.loop
+
+outer.be:
+ %iv.outer.dec = add nsw i32 %iv.outer, -1
+ br label %outer.loop
+}
diff --git a/test/Transforms/IndVarSimplify/pr26974.ll b/test/Transforms/IndVarSimplify/pr26974.ll
new file mode 100644
index 000000000000..28a736441cd0
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr26974.ll
@@ -0,0 +1,60 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; indvars will try to replace %b.0.lcssa with %t.1. If it does this,
+; it will break LCSSA.
+
+@c = external global i32, align 4
+
+; CHECK-LABEL: @fn1
+define void @fn1() {
+entry:
+ br label %for.body
+
+for.cond1.preheader: ; preds = %for.body
+ %0 = load i32, i32* @c, align 4
+ br i1 undef, label %for.cond1.us.preheader, label %for.cond1
+
+for.cond1.us.preheader: ; preds = %for.cond1.preheader
+ br label %for.cond1.us
+
+for.cond1.us: ; preds = %for.cond1.us, %for.cond1.us.preheader
+ br label %for.cond1.us
+
+for.body: ; preds = %for.body, %entry
+ br i1 undef, label %for.body, label %for.cond1.preheader
+
+for.cond1: ; preds = %for.cond1.preheader
+ br i1 true, label %for.body9.lr.ph, label %for.cond13.preheader
+
+for.body9.lr.ph: ; preds = %for.cond1
+ br i1 undef, label %for.body9.us.preheader, label %for.body9
+
+for.body9.us.preheader: ; preds = %for.body9.lr.ph
+ br label %for.body9.us
+
+for.body9.us: ; preds = %for.body9.us, %for.body9.us.preheader
+ br label %for.body9.us
+
+for.cond13.preheader: ; preds = %for.body9, %for.cond1
+ %b.0.lcssa = phi i32 [ %0, %for.body9 ], [ 0, %for.cond1 ]
+ br label %for.cond13
+
+for.body9: ; preds = %for.body9.lr.ph
+ br label %for.cond13.preheader
+
+for.cond13: ; preds = %for.cond13, %for.cond13.preheader
+ %d.1 = phi i32 [ %t.1, %for.cond13 ], [ %0, %for.cond13.preheader ]
+ %t.1 = phi i32 [ %b.0.lcssa, %for.cond13 ], [ %0, %for.cond13.preheader ]
+ br i1 undef, label %for.cond18.preheader, label %for.cond13
+
+for.cond18.preheader: ; preds = %for.cond13
+ br label %for.cond18
+
+for.cond18: ; preds = %for.cond18, %for.cond18.preheader
+ %b.1 = phi i32 [ %xor, %for.cond18 ], [ %b.0.lcssa, %for.cond18.preheader ]
+ %add = add nsw i32 %b.1, %d.1
+ %xor = xor i32 %add, %b.1
+ br label %for.cond18
+}
diff --git a/test/Transforms/IndVarSimplify/pr27133.ll b/test/Transforms/IndVarSimplify/pr27133.ll
new file mode 100644
index 000000000000..1262407ea826
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/pr27133.ll
@@ -0,0 +1,38 @@
+; RUN: opt -indvars -S < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+define i32 @fn2() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %c.0 = phi i32 [ %inc, %for.inc ], [ 0, %entry ]
+; CHECK: %[[WIDE:.*]] = phi i64
+; CHECK: %[[NORM:.*]] = phi i32
+; CHECK: invoke void @fn1(i64 %[[WIDE]])
+ %idxprom = sext i32 %c.0 to i64
+ invoke void @fn1(i64 %idxprom)
+ to label %for.inc unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %for.cond
+ %c.0.lcssa = phi i32 [ %c.0, %for.cond ]
+; CHECK: %[[LCSSA:.*]] = phi i32 [ %[[NORM]],
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+ catchret from %1 to label %exit
+
+exit:
+; CHECK: ret i32 %[[LCSSA]]
+ ret i32 %c.0.lcssa
+
+for.inc: ; preds = %for.cond
+ %inc = add nsw nuw i32 %c.0, 1
+ br label %for.cond
+}
+
+declare void @fn1(i64 %idxprom)
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll b/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
new file mode 100644
index 000000000000..6993946b9514
--- /dev/null
+++ b/test/Transforms/IndVarSimplify/rewrite-loop-exit-value.ll
@@ -0,0 +1,63 @@
+; RUN: opt -indvars -instcombine -S < %s | FileCheck %s
+
+;; Test that loop's exit value is rewritten to its initial
+;; value from loop preheader
+define i32 @test1(i32* %var) {
+; CHECK-LABEL: @test1
+entry:
+ %cond = icmp eq i32* %var, null
+ br label %header
+
+header:
+ %phi_indvar = phi i32 [0, %entry], [%indvar, %loop]
+ br i1 %cond, label %loop, label %exit
+
+loop:
+ %indvar = add i32 %phi_indvar, 1
+ br label %header
+
+exit:
+; CHECK: ret i32 0
+ ret i32 %phi_indvar
+}
+
+;; Test that we can not rewrite loop exit value if it's not
+;; a phi node (%indvar is an add instruction in this test).
+define i32 @test2(i32* %var) {
+; CHECK-LABEL: @test2
+entry:
+ %cond = icmp eq i32* %var, null
+ br label %header
+
+header:
+ %phi_indvar = phi i32 [0, %entry], [%indvar, %header]
+ %indvar = add i32 %phi_indvar, 1
+ br i1 %cond, label %header, label %exit
+
+exit:
+; CHECK: ret i32 %indvar
+ ret i32 %indvar
+}
+
+;; Test that we can not rewrite loop exit value if the condition
+;; is not in loop header.
+define i32 @test3(i32* %var) {
+; CHECK-LABEL: @test3
+entry:
+ %cond1 = icmp eq i32* %var, null
+ br label %header
+
+header:
+ %phi_indvar = phi i32 [0, %entry], [%indvar, %header], [%indvar, %body]
+ %indvar = add i32 %phi_indvar, 1
+ %cond2 = icmp eq i32 %indvar, 10
+ br i1 %cond2, label %header, label %body
+
+body:
+ br i1 %cond1, label %header, label %exit
+
+exit:
+; CHECK: ret i32 %phi_indvar
+ ret i32 %phi_indvar
+}
+
diff --git a/test/Transforms/IndVarSimplify/sharpen-range.ll b/test/Transforms/IndVarSimplify/sharpen-range.ll
index c103da9cec70..e9fac3900a55 100644
--- a/test/Transforms/IndVarSimplify/sharpen-range.ll
+++ b/test/Transforms/IndVarSimplify/sharpen-range.ll
@@ -1,4 +1,5 @@
;; RUN: opt -S < %s -indvars | FileCheck %s
+; RUN: opt -lcssa -loop-simplify -S < %s | opt -S -passes='require<targetir>,require<scalar-evolution>,require<domtree>,loop(indvars)'
;; Check if llvm can narrow !range metadata based on loop entry
;; predicates.
diff --git a/test/Transforms/IndVarSimplify/sink-trapping.ll b/test/Transforms/IndVarSimplify/sink-trapping.ll
index a18000c5f8a8..d6e049507604 100644
--- a/test/Transforms/IndVarSimplify/sink-trapping.ll
+++ b/test/Transforms/IndVarSimplify/sink-trapping.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -indvars -S | FileCheck %s --check-prefix=CHECK
+; RUN: opt < %s -indvars -S | FileCheck %s
declare i1 @b()
diff --git a/test/Transforms/IndVarSimplify/udiv.ll b/test/Transforms/IndVarSimplify/udiv.ll
index 45c703c99dde..b3f2c2a6a667 100644
--- a/test/Transforms/IndVarSimplify/udiv.ll
+++ b/test/Transforms/IndVarSimplify/udiv.ll
@@ -127,12 +127,12 @@ declare i32 @atoi(i8* nocapture) nounwind readonly
declare i32 @printf(i8* nocapture, ...) nounwind
-; IndVars shouldn't be afraid to emit a udiv here, since there's a udiv in
-; the original code.
+; IndVars doesn't emit a udiv in for.body.preheader since SCEVExpander::expand will
+; find out there's already a udiv in the original code.
; CHECK-LABEL: @foo(
; CHECK: for.body.preheader:
-; CHECK-NEXT: udiv
+; CHECK-NOT: udiv
define void @foo(double* %p, i64 %n) nounwind {
entry:
diff --git a/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll b/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
index 7272ef1ec135..30dcbf500883 100644
--- a/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
+++ b/test/Transforms/IndVarSimplify/ult-sub-to-eq.ll
@@ -32,15 +32,9 @@ for.end: ; preds = %for.body, %entry
; CHECK-LABEL: @test1(
-; First check that we move the sub into the preheader, it doesn't have to be
-; executed if %cmp4 == false
-; CHECK: for.body.preheader:
-; CHECK: sub i32 %data_len, %sample
-; CHECK: br label %for.body
-
-; Second, check that we turn the IV test into an eq.
+; check that we turn the IV test into an eq.
; CHECK: %lftr.wideiv = trunc i64 %indvars.iv.next to i32
-; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %0
+; CHECK: %exitcond = icmp ne i32 %lftr.wideiv, %sub
; CHECK: br i1 %exitcond, label %for.body, label %for.end.loopexit
}
diff --git a/test/Transforms/InferFunctionAttrs/annotate.ll b/test/Transforms/InferFunctionAttrs/annotate.ll
index 1cb7ab137c02..039114d0cb98 100644
--- a/test/Transforms/InferFunctionAttrs/annotate.ll
+++ b/test/Transforms/InferFunctionAttrs/annotate.ll
@@ -1,20 +1,8 @@
-; RUN: opt < %s -inferattrs -S | FileCheck %s
-; RUN: opt < %s -passes=inferattrs -S | FileCheck %s
-; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck -check-prefix=CHECK-POSIX %s
-
-declare i8* @fopen(i8*, i8*)
-; CHECK: declare noalias i8* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0:#[0-9]]]
-
-declare i8 @strlen(i8*)
-; CHECK: declare i8 @strlen(i8* nocapture) [[G1:#[0-9]]]
-
-declare i32* @realloc(i32*, i32)
-; CHECK: declare noalias i32* @realloc(i32* nocapture, i32) [[G0]]
-
-; Test deliberately wrong declaration
-
-declare i32 @strcpy(...)
-; CHECK: declare i32 @strcpy(...)
+; RUN: opt < %s -mtriple=x86_64-- -inferattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-- -passes=inferattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-DARWIN %s
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -inferattrs -S | FileCheck -check-prefix=CHECK -check-prefix=CHECK-LINUX %s
+; RUN: opt < %s -mtriple=nvptx -inferattrs -S | FileCheck -check-prefix=CHECK-NVPTX %s
; operator new routines
declare i8* @_Znwj(i64)
@@ -22,14 +10,867 @@ declare i8* @_Znwj(i64)
declare i8* @_Znwm(i64)
; CHECK: declare noalias nonnull i8* @_Znwm(i64)
+declare i32 @__nvvm_reflect(i8*)
+; CHECK-NVPTX: declare i32 @__nvvm_reflect(i8*) [[G0:#[0-9]+]]
+; CHECK-NVPTX: attributes [[G0]] = { nounwind readnone }
+
+
+; Check all the libc functions (thereby also exercising the prototype check).
+; Note that it's OK to modify these as attributes might be missing. These checks
+; reflect the currently inferred attributes.
+
+; Use an opaque pointer type for all the (possibly opaque) structs.
+%opaque = type opaque
+
+; CHECK: declare double @__cospi(double)
+declare double @__cospi(double)
+
+; CHECK: declare float @__cospif(float)
+declare float @__cospif(float)
+
+; CHECK: declare double @__sinpi(double)
+declare double @__sinpi(double)
+
+; CHECK: declare float @__sinpif(float)
+declare float @__sinpif(float)
+
+; CHECK: declare i32 @abs(i32)
+declare i32 @abs(i32)
+
+; CHECK: declare i32 @access(i8* nocapture readonly, i32) [[G0:#[0-9]+]]
+declare i32 @access(i8*, i32)
+
+; CHECK: declare double @acos(double)
+declare double @acos(double)
+
+; CHECK: declare float @acosf(float)
+declare float @acosf(float)
+
+; CHECK: declare double @acosh(double)
+declare double @acosh(double)
+
+; CHECK: declare float @acoshf(float)
+declare float @acoshf(float)
+
+; CHECK: declare x86_fp80 @acoshl(x86_fp80)
+declare x86_fp80 @acoshl(x86_fp80)
+
+; CHECK: declare x86_fp80 @acosl(x86_fp80)
+declare x86_fp80 @acosl(x86_fp80)
+
+; CHECK: declare double @asin(double)
+declare double @asin(double)
+
+; CHECK: declare float @asinf(float)
+declare float @asinf(float)
+
+; CHECK: declare double @asinh(double)
+declare double @asinh(double)
+
+; CHECK: declare float @asinhf(float)
+declare float @asinhf(float)
+
+; CHECK: declare x86_fp80 @asinhl(x86_fp80)
+declare x86_fp80 @asinhl(x86_fp80)
+
+; CHECK: declare x86_fp80 @asinl(x86_fp80)
+declare x86_fp80 @asinl(x86_fp80)
+
+; CHECK: declare double @atan(double)
+declare double @atan(double)
+
+; CHECK: declare double @atan2(double, double)
+declare double @atan2(double, double)
+
+; CHECK: declare float @atan2f(float, float)
+declare float @atan2f(float, float)
+
+; CHECK: declare x86_fp80 @atan2l(x86_fp80, x86_fp80)
+declare x86_fp80 @atan2l(x86_fp80, x86_fp80)
+
+; CHECK: declare float @atanf(float)
+declare float @atanf(float)
+
+; CHECK: declare double @atanh(double)
+declare double @atanh(double)
+
+; CHECK: declare float @atanhf(float)
+declare float @atanhf(float)
+
+; CHECK: declare x86_fp80 @atanhl(x86_fp80)
+declare x86_fp80 @atanhl(x86_fp80)
+
+; CHECK: declare x86_fp80 @atanl(x86_fp80)
+declare x86_fp80 @atanl(x86_fp80)
+
+; CHECK: declare double @atof(i8* nocapture) [[G1:#[0-9]+]]
+declare double @atof(i8*)
+
+; CHECK: declare i32 @atoi(i8* nocapture) [[G1]]
+declare i32 @atoi(i8*)
+
+; CHECK: declare i64 @atol(i8* nocapture) [[G1]]
+declare i64 @atol(i8*)
+
+; CHECK: declare i64 @atoll(i8* nocapture) [[G1]]
+declare i64 @atoll(i8*)
+
+; CHECK: declare i32 @bcmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+declare i32 @bcmp(i8*, i8*, i64)
+
+; CHECK: declare void @bcopy(i8* nocapture readonly, i8* nocapture, i64) [[G0]]
+declare void @bcopy(i8*, i8*, i64)
+
+; CHECK: declare void @bzero(i8* nocapture, i64) [[G0]]
+declare void @bzero(i8*, i64)
+
+; CHECK: declare noalias i8* @calloc(i64, i64) [[G0]]
+declare i8* @calloc(i64, i64)
+
+; CHECK: declare double @cbrt(double)
+declare double @cbrt(double)
+
+; CHECK: declare float @cbrtf(float)
+declare float @cbrtf(float)
+
+; CHECK: declare x86_fp80 @cbrtl(x86_fp80)
+declare x86_fp80 @cbrtl(x86_fp80)
+
+; CHECK: declare double @ceil(double)
+declare double @ceil(double)
+
+; CHECK: declare float @ceilf(float)
+declare float @ceilf(float)
+
+; CHECK: declare x86_fp80 @ceill(x86_fp80)
+declare x86_fp80 @ceill(x86_fp80)
+
+; CHECK: declare i32 @chmod(i8* nocapture readonly, i16 zeroext) [[G0]]
+declare i32 @chmod(i8*, i16 zeroext)
+
+; CHECK: declare i32 @chown(i8* nocapture readonly, i32, i32) [[G0]]
+declare i32 @chown(i8*, i32, i32)
+
+; CHECK: declare void @clearerr(%opaque* nocapture) [[G0]]
+declare void @clearerr(%opaque*)
+
+; CHECK: declare i32 @closedir(%opaque* nocapture) [[G0]]
+declare i32 @closedir(%opaque*)
+
+; CHECK: declare double @copysign(double, double)
+declare double @copysign(double, double)
+
+; CHECK: declare float @copysignf(float, float)
+declare float @copysignf(float, float)
+
+; CHECK: declare x86_fp80 @copysignl(x86_fp80, x86_fp80)
+declare x86_fp80 @copysignl(x86_fp80, x86_fp80)
+
+; CHECK: declare double @cos(double)
+declare double @cos(double)
+
+; CHECK: declare float @cosf(float)
+declare float @cosf(float)
+
+; CHECK: declare double @cosh(double)
+declare double @cosh(double)
+
+; CHECK: declare float @coshf(float)
+declare float @coshf(float)
+
+; CHECK: declare x86_fp80 @coshl(x86_fp80)
+declare x86_fp80 @coshl(x86_fp80)
+
+; CHECK: declare x86_fp80 @cosl(x86_fp80)
+declare x86_fp80 @cosl(x86_fp80)
+
+; CHECK: declare i8* @ctermid(i8* nocapture) [[G0]]
+declare i8* @ctermid(i8*)
+
+; CHECK: declare double @exp(double)
+declare double @exp(double)
+
+; CHECK: declare double @exp2(double)
+declare double @exp2(double)
+
+; CHECK: declare float @exp2f(float)
+declare float @exp2f(float)
+
+; CHECK: declare x86_fp80 @exp2l(x86_fp80)
+declare x86_fp80 @exp2l(x86_fp80)
+
+; CHECK: declare float @expf(float)
+declare float @expf(float)
+
+; CHECK: declare x86_fp80 @expl(x86_fp80)
+declare x86_fp80 @expl(x86_fp80)
+
+; CHECK: declare double @expm1(double)
+declare double @expm1(double)
+
+; CHECK: declare float @expm1f(float)
+declare float @expm1f(float)
+
+; CHECK: declare x86_fp80 @expm1l(x86_fp80)
+declare x86_fp80 @expm1l(x86_fp80)
+
+; CHECK: declare double @fabs(double)
+declare double @fabs(double)
+
+; CHECK: declare float @fabsf(float)
+declare float @fabsf(float)
+
+; CHECK: declare x86_fp80 @fabsl(x86_fp80)
+declare x86_fp80 @fabsl(x86_fp80)
+
+; CHECK: declare i32 @fclose(%opaque* nocapture) [[G0]]
+declare i32 @fclose(%opaque*)
+
+; CHECK: declare noalias %opaque* @fdopen(i32, i8* nocapture readonly) [[G0]]
+declare %opaque* @fdopen(i32, i8*)
+
+; CHECK: declare i32 @feof(%opaque* nocapture) [[G0]]
+declare i32 @feof(%opaque*)
+
+; CHECK: declare i32 @ferror(%opaque* nocapture) [[G1]]
+declare i32 @ferror(%opaque*)
+
+; CHECK: declare i32 @fflush(%opaque* nocapture) [[G0]]
+declare i32 @fflush(%opaque*)
+
+; CHECK: declare i32 @ffs(i32)
+declare i32 @ffs(i32)
+
+; CHECK: declare i32 @ffsl(i64)
+declare i32 @ffsl(i64)
+
+; CHECK: declare i32 @ffsll(i64)
+declare i32 @ffsll(i64)
+
+; CHECK: declare i32 @fgetc(%opaque* nocapture) [[G0]]
+declare i32 @fgetc(%opaque*)
+
+; CHECK: declare i32 @fgetpos(%opaque* nocapture, i64* nocapture) [[G0]]
+declare i32 @fgetpos(%opaque*, i64*)
+
+; CHECK: declare i8* @fgets(i8*, i32, %opaque* nocapture) [[G0]]
+declare i8* @fgets(i8*, i32, %opaque*)
+
+; CHECK: declare i32 @fileno(%opaque* nocapture) [[G0]]
+declare i32 @fileno(%opaque*)
+
+; CHECK: declare void @flockfile(%opaque* nocapture) [[G0]]
+declare void @flockfile(%opaque*)
+
+; CHECK: declare double @floor(double)
+declare double @floor(double)
+
+; CHECK: declare float @floorf(float)
+declare float @floorf(float)
+
+; CHECK: declare x86_fp80 @floorl(x86_fp80)
+declare x86_fp80 @floorl(x86_fp80)
+
+; CHECK: declare i32 @fls(i32)
+declare i32 @fls(i32)
+
+; CHECK: declare i32 @flsl(i64)
+declare i32 @flsl(i64)
+
+; CHECK: declare i32 @flsll(i64)
+declare i32 @flsll(i64)
+
+; CHECK: declare double @fmax(double, double)
+declare double @fmax(double, double)
+
+; CHECK: declare float @fmaxf(float, float)
+declare float @fmaxf(float, float)
+
+; CHECK: declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
+declare x86_fp80 @fmaxl(x86_fp80, x86_fp80)
+
+; CHECK: declare double @fmin(double, double)
+declare double @fmin(double, double)
+
+; CHECK: declare float @fminf(float, float)
+declare float @fminf(float, float)
+
+; CHECK: declare x86_fp80 @fminl(x86_fp80, x86_fp80)
+declare x86_fp80 @fminl(x86_fp80, x86_fp80)
+
+; CHECK: declare double @fmod(double, double)
+declare double @fmod(double, double)
+
+; CHECK: declare float @fmodf(float, float)
+declare float @fmodf(float, float)
+
+; CHECK: declare x86_fp80 @fmodl(x86_fp80, x86_fp80)
+declare x86_fp80 @fmodl(x86_fp80, x86_fp80)
+
+; CHECK: declare noalias %opaque* @fopen(i8* nocapture readonly, i8* nocapture readonly) [[G0]]
+declare %opaque* @fopen(i8*, i8*)
+
+; CHECK: declare i32 @fprintf(%opaque* nocapture, i8* nocapture readonly, ...) [[G0]]
+declare i32 @fprintf(%opaque*, i8*, ...)
+
+; CHECK: declare i32 @fputc(i32, %opaque* nocapture) [[G0]]
+declare i32 @fputc(i32, %opaque*)
+
+; CHECK: declare i32 @fputs(i8* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @fputs(i8*, %opaque*)
+
+; CHECK: declare i64 @fread(i8* nocapture, i64, i64, %opaque* nocapture) [[G0]]
+declare i64 @fread(i8*, i64, i64, %opaque*)
+
+; CHECK: declare void @free(i8* nocapture) [[G0]]
+declare void @free(i8*)
+
+; CHECK: declare double @frexp(double, i32* nocapture) [[G0]]
+declare double @frexp(double, i32*)
+
+; CHECK: declare float @frexpf(float, i32* nocapture) [[G0]]
+declare float @frexpf(float, i32*)
+
+; CHECK: declare x86_fp80 @frexpl(x86_fp80, i32* nocapture) [[G0]]
+declare x86_fp80 @frexpl(x86_fp80, i32*)
+
+; CHECK: declare i32 @fscanf(%opaque* nocapture, i8* nocapture readonly, ...) [[G0]]
+declare i32 @fscanf(%opaque*, i8*, ...)
+
+; CHECK: declare i32 @fseek(%opaque* nocapture, i64, i32) [[G0]]
+declare i32 @fseek(%opaque*, i64, i32)
+
+; CHECK: declare i32 @fseeko(%opaque* nocapture, i64, i32) [[G0]]
+declare i32 @fseeko(%opaque*, i64, i32)
+
+; CHECK-LINUX: declare i32 @fseeko64(%opaque* nocapture, i64, i32) [[G0]]
+declare i32 @fseeko64(%opaque*, i64, i32)
+
+; CHECK: declare i32 @fsetpos(%opaque* nocapture, i64*) [[G0]]
+declare i32 @fsetpos(%opaque*, i64*)
+
+; CHECK: declare i32 @fstat(i32, %opaque* nocapture) [[G0]]
+declare i32 @fstat(i32, %opaque*)
+
+; CHECK-LINUX: declare i32 @fstat64(i32, %opaque* nocapture) [[G0]]
+declare i32 @fstat64(i32, %opaque*)
+
+; CHECK: declare i32 @fstatvfs(i32, %opaque* nocapture) [[G0]]
+declare i32 @fstatvfs(i32, %opaque*)
+
+; CHECK-LINUX: declare i32 @fstatvfs64(i32, %opaque* nocapture) [[G0]]
+declare i32 @fstatvfs64(i32, %opaque*)
+
+; CHECK: declare i64 @ftell(%opaque* nocapture) [[G0]]
+declare i64 @ftell(%opaque*)
+
+; CHECK: declare i64 @ftello(%opaque* nocapture) [[G0]]
+declare i64 @ftello(%opaque*)
+
+; CHECK-LINUX: declare i64 @ftello64(%opaque* nocapture) [[G0]]
+declare i64 @ftello64(%opaque*)
+
+; CHECK: declare i32 @ftrylockfile(%opaque* nocapture) [[G0]]
+declare i32 @ftrylockfile(%opaque*)
+
+; CHECK: declare void @funlockfile(%opaque* nocapture) [[G0]]
+declare void @funlockfile(%opaque*)
+
+; CHECK: declare i64 @fwrite(i8* nocapture, i64, i64, %opaque* nocapture) [[G0]]
+declare i64 @fwrite(i8*, i64, i64, %opaque*)
+
+; CHECK: declare i32 @getc(%opaque* nocapture) [[G0]]
+declare i32 @getc(%opaque*)
+
+; CHECK: declare i32 @getc_unlocked(%opaque* nocapture) [[G0]]
+declare i32 @getc_unlocked(%opaque*)
+
+; CHECK: declare i32 @getchar()
+declare i32 @getchar()
+
+; CHECK: declare i8* @getenv(i8* nocapture) [[G1]]
+declare i8* @getenv(i8*)
+
+; CHECK: declare i32 @getitimer(i32, %opaque* nocapture) [[G0]]
+declare i32 @getitimer(i32, %opaque*)
+
+; CHECK: declare i32 @getlogin_r(i8* nocapture, i64) [[G0]]
+declare i32 @getlogin_r(i8*, i64)
+
+; CHECK: declare %opaque* @getpwnam(i8* nocapture readonly) [[G0]]
+declare %opaque* @getpwnam(i8*)
+
+; CHECK: declare i8* @gets(i8*)
+declare i8* @gets(i8*)
+
+; CHECK: declare i32 @gettimeofday(%opaque* nocapture, i8* nocapture) [[G0]]
+declare i32 @gettimeofday(%opaque*, i8*)
+
+; CHECK: declare i32 @isascii(i32)
+declare i32 @isascii(i32)
+
+; CHECK: declare i32 @isdigit(i32)
+declare i32 @isdigit(i32)
+
+; CHECK: declare i64 @labs(i64)
+declare i64 @labs(i64)
+
+; CHECK: declare i32 @lchown(i8* nocapture readonly, i32, i32) [[G0]]
+declare i32 @lchown(i8*, i32, i32)
+
+; CHECK: declare double @ldexp(double, i32)
+declare double @ldexp(double, i32)
+
+; CHECK: declare float @ldexpf(float, i32)
+declare float @ldexpf(float, i32)
+
+; CHECK: declare x86_fp80 @ldexpl(x86_fp80, i32)
+declare x86_fp80 @ldexpl(x86_fp80, i32)
+
+; CHECK: declare i64 @llabs(i64)
+declare i64 @llabs(i64)
+
+; CHECK: declare double @log(double)
+declare double @log(double)
+
+; CHECK: declare double @log10(double)
+declare double @log10(double)
+
+; CHECK: declare float @log10f(float)
+declare float @log10f(float)
+
+; CHECK: declare x86_fp80 @log10l(x86_fp80)
+declare x86_fp80 @log10l(x86_fp80)
+
+; CHECK: declare double @log1p(double)
+declare double @log1p(double)
+
+; CHECK: declare float @log1pf(float)
+declare float @log1pf(float)
+
+; CHECK: declare x86_fp80 @log1pl(x86_fp80)
+declare x86_fp80 @log1pl(x86_fp80)
+
+; CHECK: declare double @log2(double)
+declare double @log2(double)
+
+; CHECK: declare float @log2f(float)
+declare float @log2f(float)
+
+; CHECK: declare x86_fp80 @log2l(x86_fp80)
+declare x86_fp80 @log2l(x86_fp80)
+
+; CHECK: declare double @logb(double)
+declare double @logb(double)
+
+; CHECK: declare float @logbf(float)
+declare float @logbf(float)
+
+; CHECK: declare x86_fp80 @logbl(x86_fp80)
+declare x86_fp80 @logbl(x86_fp80)
+
+; CHECK: declare float @logf(float)
+declare float @logf(float)
+
+; CHECK: declare x86_fp80 @logl(x86_fp80)
+declare x86_fp80 @logl(x86_fp80)
+
+; CHECK: declare i32 @lstat(i8* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @lstat(i8*, %opaque*)
+
+; CHECK-LINUX: declare i32 @lstat64(i8* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @lstat64(i8*, %opaque*)
+
+; CHECK: declare noalias i8* @malloc(i64) [[G0]]
+declare i8* @malloc(i64)
+
+; CHECK-LINUX: declare noalias i8* @memalign(i64, i64)
+declare i8* @memalign(i64, i64)
+
+; CHECK: declare i8* @memccpy(i8*, i8* nocapture readonly, i32, i64) [[G0]]
+declare i8* @memccpy(i8*, i8*, i32, i64)
+
+; CHECK: declare i8* @memchr(i8*, i32, i64) [[G1]]
+declare i8* @memchr(i8*, i32, i64)
+
+; CHECK: declare i32 @memcmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+declare i32 @memcmp(i8*, i8*, i64)
+
+; CHECK: declare i8* @memcpy(i8*, i8* nocapture readonly, i64) [[G0]]
+declare i8* @memcpy(i8*, i8*, i64)
+
+; CHECK: declare i8* @memmove(i8*, i8* nocapture readonly, i64) [[G0]]
+declare i8* @memmove(i8*, i8*, i64)
+
+; CHECK: declare i8* @memset(i8*, i32, i64)
+declare i8* @memset(i8*, i32, i64)
+
+; CHECK: declare i32 @mkdir(i8* nocapture readonly, i16 zeroext) [[G0]]
+declare i32 @mkdir(i8*, i16 zeroext)
+
+; CHECK: declare i64 @mktime(%opaque* nocapture) [[G0]]
+declare i64 @mktime(%opaque*)
+
+; CHECK: declare double @modf(double, double* nocapture) [[G0]]
+declare double @modf(double, double*)
+
+; CHECK: declare float @modff(float, float* nocapture) [[G0]]
+declare float @modff(float, float*)
+
+; CHECK: declare x86_fp80 @modfl(x86_fp80, x86_fp80* nocapture) [[G0]]
+declare x86_fp80 @modfl(x86_fp80, x86_fp80*)
+
+; CHECK: declare double @nearbyint(double)
+declare double @nearbyint(double)
+
+; CHECK: declare float @nearbyintf(float)
+declare float @nearbyintf(float)
+
+; CHECK: declare x86_fp80 @nearbyintl(x86_fp80)
+declare x86_fp80 @nearbyintl(x86_fp80)
+
+; CHECK: declare i32 @open(i8* nocapture readonly, i32, ...)
+declare i32 @open(i8*, i32, ...)
+
+; CHECK-LINUX: declare i32 @open64(i8* nocapture readonly, i32, ...)
+declare i32 @open64(i8*, i32, ...)
+
+; CHECK: declare noalias %opaque* @opendir(i8* nocapture readonly) [[G0]]
+declare %opaque* @opendir(i8*)
+
+; CHECK: declare i32 @pclose(%opaque* nocapture) [[G0]]
+declare i32 @pclose(%opaque*)
+
+; CHECK: declare void @perror(i8* nocapture readonly) [[G0]]
+declare void @perror(i8*)
+
+; CHECK: declare noalias %opaque* @popen(i8* nocapture readonly, i8* nocapture readonly) [[G0]]
+declare %opaque* @popen(i8*, i8*)
+
+; CHECK: declare i32 @posix_memalign(i8**, i64, i64)
+declare i32 @posix_memalign(i8**, i64, i64)
+
+; CHECK: declare double @pow(double, double)
+declare double @pow(double, double)
+
+; CHECK: declare float @powf(float, float)
+declare float @powf(float, float)
+
+; CHECK: declare x86_fp80 @powl(x86_fp80, x86_fp80)
+declare x86_fp80 @powl(x86_fp80, x86_fp80)
+
+; CHECK: declare i64 @pread(i32, i8* nocapture, i64, i64)
+declare i64 @pread(i32, i8*, i64, i64)
+
+; CHECK: declare i32 @printf(i8* nocapture readonly, ...) [[G0]]
+declare i32 @printf(i8*, ...)
+
+; CHECK: declare i32 @putc(i32, %opaque* nocapture) [[G0]]
+declare i32 @putc(i32, %opaque*)
+
+; CHECK: declare i32 @putchar(i32)
+declare i32 @putchar(i32)
+
+; CHECK: declare i32 @puts(i8* nocapture readonly) [[G0]]
+declare i32 @puts(i8*)
+
+; CHECK: declare i64 @pwrite(i32, i8* nocapture readonly, i64, i64)
+declare i64 @pwrite(i32, i8*, i64, i64)
+
+; CHECK: declare void @qsort(i8*, i64, i64, i32 (i8*, i8*)* nocapture)
+declare void @qsort(i8*, i64, i64, i32 (i8*, i8*)*)
+
+; CHECK: declare i64 @read(i32, i8* nocapture, i64)
+declare i64 @read(i32, i8*, i64)
+
+; CHECK: declare i64 @readlink(i8* nocapture readonly, i8* nocapture, i64) [[G0]]
+declare i64 @readlink(i8*, i8*, i64)
+
+; CHECK: declare noalias i8* @realloc(i8* nocapture, i64) [[G0]]
+declare i8* @realloc(i8*, i64)
+
+; CHECK: declare i8* @reallocf(i8*, i64)
+declare i8* @reallocf(i8*, i64)
+
+; CHECK: declare i8* @realpath(i8* nocapture readonly, i8*)
+declare i8* @realpath(i8*, i8*)
+
+; CHECK: declare i32 @remove(i8* nocapture readonly) [[G0]]
+declare i32 @remove(i8*)
+
+; CHECK: declare i32 @rename(i8* nocapture readonly, i8* nocapture readonly) [[G0]]
+declare i32 @rename(i8*, i8*)
+
+; CHECK: declare void @rewind(%opaque* nocapture) [[G0]]
+declare void @rewind(%opaque*)
+
+; CHECK: declare double @rint(double)
+declare double @rint(double)
+
+; CHECK: declare float @rintf(float)
+declare float @rintf(float)
+
+; CHECK: declare x86_fp80 @rintl(x86_fp80)
+declare x86_fp80 @rintl(x86_fp80)
+
+; CHECK: declare i32 @rmdir(i8* nocapture readonly) [[G0]]
+declare i32 @rmdir(i8*)
+
+; CHECK: declare double @round(double)
+declare double @round(double)
+
+; CHECK: declare float @roundf(float)
+declare float @roundf(float)
+
+; CHECK: declare x86_fp80 @roundl(x86_fp80)
+declare x86_fp80 @roundl(x86_fp80)
+
+; CHECK: declare i32 @scanf(i8* nocapture readonly, ...) [[G0]]
+declare i32 @scanf(i8*, ...)
+
+; CHECK: declare void @setbuf(%opaque* nocapture, i8*) [[G0]]
+declare void @setbuf(%opaque*, i8*)
+
+; CHECK: declare i32 @setitimer(i32, %opaque* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @setitimer(i32, %opaque*, %opaque*)
+
+; CHECK: declare i32 @setvbuf(%opaque* nocapture, i8*, i32, i64) [[G0]]
+declare i32 @setvbuf(%opaque*, i8*, i32, i64)
+
+; CHECK: declare double @sin(double)
+declare double @sin(double)
+
+; CHECK: declare float @sinf(float)
+declare float @sinf(float)
+
+; CHECK: declare double @sinh(double)
+declare double @sinh(double)
+
+; CHECK: declare float @sinhf(float)
+declare float @sinhf(float)
+
+; CHECK: declare x86_fp80 @sinhl(x86_fp80)
+declare x86_fp80 @sinhl(x86_fp80)
+
+; CHECK: declare x86_fp80 @sinl(x86_fp80)
+declare x86_fp80 @sinl(x86_fp80)
+
+; CHECK: declare i32 @snprintf(i8* nocapture, i64, i8* nocapture readonly, ...) [[G0]]
+declare i32 @snprintf(i8*, i64, i8*, ...)
+
+; CHECK: declare i32 @sprintf(i8* nocapture, i8* nocapture readonly, ...) [[G0]]
+declare i32 @sprintf(i8*, i8*, ...)
+
+; CHECK: declare double @sqrt(double)
+declare double @sqrt(double)
+
+; CHECK: declare float @sqrtf(float)
+declare float @sqrtf(float)
+
+; CHECK: declare x86_fp80 @sqrtl(x86_fp80)
+declare x86_fp80 @sqrtl(x86_fp80)
+
+; CHECK: declare i32 @sscanf(i8* nocapture readonly, i8* nocapture readonly, ...) [[G0]]
+declare i32 @sscanf(i8*, i8*, ...)
+
+; CHECK: declare i32 @stat(i8* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @stat(i8*, %opaque*)
+
+; CHECK-LINUX: declare i32 @stat64(i8* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @stat64(i8*, %opaque*)
+
+; CHECK: declare i32 @statvfs(i8* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @statvfs(i8*, %opaque*)
+
+; CHECK-LINUX: declare i32 @statvfs64(i8* nocapture readonly, %opaque* nocapture) [[G0]]
+declare i32 @statvfs64(i8*, %opaque*)
+
+; CHECK: declare i8* @stpcpy(i8*, i8* nocapture readonly) [[G0]]
+declare i8* @stpcpy(i8*, i8*)
+
+; CHECK: declare i8* @stpncpy(i8*, i8* nocapture readonly, i64) [[G0]]
+declare i8* @stpncpy(i8*, i8*, i64)
+
+; CHECK: declare i32 @strcasecmp(i8* nocapture, i8* nocapture) [[G1]]
+declare i32 @strcasecmp(i8*, i8*)
+
+; CHECK: declare i8* @strcat(i8*, i8* nocapture readonly) [[G0]]
+declare i8* @strcat(i8*, i8*)
+
+; CHECK: declare i8* @strchr(i8*, i32) [[G1]]
+declare i8* @strchr(i8*, i32)
+
+; CHECK: declare i32 @strcmp(i8* nocapture, i8* nocapture) [[G1]]
+declare i32 @strcmp(i8*, i8*)
+
+; CHECK: declare i32 @strcoll(i8* nocapture, i8* nocapture) [[G1]]
+declare i32 @strcoll(i8*, i8*)
+
+; CHECK: declare i8* @strcpy(i8*, i8* nocapture readonly) [[G0]]
+declare i8* @strcpy(i8*, i8*)
+
+; CHECK: declare i64 @strcspn(i8* nocapture, i8* nocapture) [[G1]]
+declare i64 @strcspn(i8*, i8*)
+
+; CHECK: declare noalias i8* @strdup(i8* nocapture readonly) [[G0]]
+declare i8* @strdup(i8*)
+
+; CHECK: declare i64 @strlen(i8* nocapture) [[G1]]
+declare i64 @strlen(i8*)
+
+; CHECK: declare i32 @strncasecmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+declare i32 @strncasecmp(i8*, i8*, i64)
+
+; CHECK: declare i8* @strncat(i8*, i8* nocapture readonly, i64) [[G0]]
+declare i8* @strncat(i8*, i8*, i64)
+
+; CHECK: declare i32 @strncmp(i8* nocapture, i8* nocapture, i64) [[G1]]
+declare i32 @strncmp(i8*, i8*, i64)
+
+; CHECK: declare i8* @strncpy(i8*, i8* nocapture readonly, i64) [[G0]]
+declare i8* @strncpy(i8*, i8*, i64)
+
+; CHECK: declare noalias i8* @strndup(i8* nocapture readonly, i64) [[G0]]
+declare i8* @strndup(i8*, i64)
+
+; CHECK: declare i64 @strnlen(i8*, i64)
+declare i64 @strnlen(i8*, i64)
+
+; CHECK: declare i8* @strpbrk(i8*, i8* nocapture) [[G1]]
+declare i8* @strpbrk(i8*, i8*)
+
+; CHECK: declare i8* @strrchr(i8*, i32) [[G1]]
+declare i8* @strrchr(i8*, i32)
+
+; CHECK: declare i64 @strspn(i8* nocapture, i8* nocapture) [[G1]]
+declare i64 @strspn(i8*, i8*)
+
+; CHECK: declare i8* @strstr(i8*, i8* nocapture) [[G1]]
+declare i8* @strstr(i8*, i8*)
+
+; CHECK: declare double @strtod(i8* readonly, i8** nocapture) [[G0]]
+declare double @strtod(i8*, i8**)
+
+; CHECK: declare float @strtof(i8* readonly, i8** nocapture) [[G0]]
+declare float @strtof(i8*, i8**)
+
+; CHECK: declare i8* @strtok(i8*, i8* nocapture readonly) [[G0]]
+declare i8* @strtok(i8*, i8*)
+
+; CHECK: declare i8* @strtok_r(i8*, i8* nocapture readonly, i8**) [[G0]]
+declare i8* @strtok_r(i8*, i8*, i8**)
+
+; CHECK: declare i64 @strtol(i8* readonly, i8** nocapture, i32) [[G0]]
+declare i64 @strtol(i8*, i8**, i32)
+
+; CHECK: declare x86_fp80 @strtold(i8* readonly, i8** nocapture) [[G0]]
+declare x86_fp80 @strtold(i8*, i8**)
+
+; CHECK: declare i64 @strtoll(i8* readonly, i8** nocapture, i32) [[G0]]
+declare i64 @strtoll(i8*, i8**, i32)
+
+; CHECK: declare i64 @strtoul(i8* readonly, i8** nocapture, i32) [[G0]]
+declare i64 @strtoul(i8*, i8**, i32)
+
+; CHECK: declare i64 @strtoull(i8* readonly, i8** nocapture, i32) [[G0]]
+declare i64 @strtoull(i8*, i8**, i32)
+
+; CHECK: declare i64 @strxfrm(i8* nocapture, i8* nocapture readonly, i64) [[G0]]
+declare i64 @strxfrm(i8*, i8*, i64)
+
+; CHECK: declare i32 @system(i8* nocapture readonly)
+declare i32 @system(i8*)
+
+; CHECK: declare double @tan(double)
+declare double @tan(double)
+
+; CHECK: declare float @tanf(float)
+declare float @tanf(float)
+
+; CHECK: declare double @tanh(double)
+declare double @tanh(double)
+
+; CHECK: declare float @tanhf(float)
+declare float @tanhf(float)
+
+; CHECK: declare x86_fp80 @tanhl(x86_fp80)
+declare x86_fp80 @tanhl(x86_fp80)
+
+; CHECK: declare x86_fp80 @tanl(x86_fp80)
+declare x86_fp80 @tanl(x86_fp80)
+
+; CHECK: declare i64 @times(%opaque* nocapture) [[G0]]
+declare i64 @times(%opaque*)
+
+; CHECK: declare noalias %opaque* @tmpfile() [[G0]]
+declare %opaque* @tmpfile()
+
+; CHECK-LINUX: declare noalias %opaque* @tmpfile64() [[G0]]
+declare %opaque* @tmpfile64()
+
+; CHECK: declare i32 @toascii(i32)
+declare i32 @toascii(i32)
+
+; CHECK: declare double @trunc(double)
+declare double @trunc(double)
+
+; CHECK: declare float @truncf(float)
+declare float @truncf(float)
+
+; CHECK: declare x86_fp80 @truncl(x86_fp80)
+declare x86_fp80 @truncl(x86_fp80)
+
+; CHECK: declare i32 @uname(%opaque* nocapture) [[G0]]
+declare i32 @uname(%opaque*)
+
+; CHECK: declare i32 @ungetc(i32, %opaque* nocapture) [[G0]]
+declare i32 @ungetc(i32, %opaque*)
+
+; CHECK: declare i32 @unlink(i8* nocapture readonly) [[G0]]
+declare i32 @unlink(i8*)
+
+; CHECK: declare i32 @unsetenv(i8* nocapture readonly) [[G0]]
+declare i32 @unsetenv(i8*)
+
+; CHECK: declare i32 @utime(i8* nocapture readonly, %opaque* nocapture readonly) [[G0]]
+declare i32 @utime(i8*, %opaque*)
+
+; CHECK: declare i32 @utimes(i8* nocapture readonly, %opaque* nocapture readonly) [[G0]]
+declare i32 @utimes(i8*, %opaque*)
+
+; CHECK: declare noalias i8* @valloc(i64) [[G0]]
+declare i8* @valloc(i64)
+
+; CHECK: declare i32 @vfprintf(%opaque* nocapture, i8* nocapture readonly, %opaque*) [[G0]]
+declare i32 @vfprintf(%opaque*, i8*, %opaque*)
+
+; CHECK: declare i32 @vfscanf(%opaque* nocapture, i8* nocapture readonly, %opaque*) [[G0]]
+declare i32 @vfscanf(%opaque*, i8*, %opaque*)
+
+; CHECK: declare i32 @vprintf(i8* nocapture readonly, %opaque*) [[G0]]
+declare i32 @vprintf(i8*, %opaque*)
+
+; CHECK: declare i32 @vscanf(i8* nocapture readonly, %opaque*) [[G0]]
+declare i32 @vscanf(i8*, %opaque*)
+
+; CHECK: declare i32 @vsnprintf(i8* nocapture, i64, i8* nocapture readonly, %opaque*) [[G0]]
+declare i32 @vsnprintf(i8*, i64, i8*, %opaque*)
+
+; CHECK: declare i32 @vsprintf(i8* nocapture, i8* nocapture readonly, %opaque*) [[G0]]
+declare i32 @vsprintf(i8*, i8*, %opaque*)
+
+; CHECK: declare i32 @vsscanf(i8* nocapture readonly, i8* nocapture readonly, %opaque*) [[G0]]
+declare i32 @vsscanf(i8*, i8*, %opaque*)
+
+; CHECK: declare i64 @write(i32, i8* nocapture readonly, i64)
+declare i64 @write(i32, i8*, i64)
+
+
+; memset_pattern16 isn't available everywhere.
+; CHECK-DARWIN: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[G2:#[0-9]+]]
declare void @memset_pattern16(i8*, i8*, i64)
-; CHECK: declare void @memset_pattern16(i8*, i8*, i64)
-; CHECK-POSIX: declare void @memset_pattern16(i8*, i8* readonly, i64) [[G2:#[0-9]+]]
-declare i32 @gettimeofday(i8*, i8*)
-; CHECK-POSIX: declare i32 @gettimeofday(i8* nocapture, i8* nocapture) [[G0:#[0-9]+]]
; CHECK: attributes [[G0]] = { nounwind }
; CHECK: attributes [[G1]] = { nounwind readonly }
-; CHECK-POSIX: attributes [[G0]] = { nounwind }
-; CHECK-POSIX: attributes [[G2]] = { argmemonly }
+; CHECK-DARWIN: attributes [[G2]] = { argmemonly }
diff --git a/test/Transforms/InferFunctionAttrs/no-proto.ll b/test/Transforms/InferFunctionAttrs/no-proto.ll
new file mode 100644
index 000000000000..256f5c356b81
--- /dev/null
+++ b/test/Transforms/InferFunctionAttrs/no-proto.ll
@@ -0,0 +1,850 @@
+; RUN: opt < %s -mtriple=x86_64-unknown-linux -inferattrs -S | FileCheck %s
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -inferattrs -S | FileCheck %s
+
+; Check that we don't modify libc functions with invalid prototypes.
+
+; CHECK: declare void @__cospi(...)
+declare void @__cospi(...)
+
+; CHECK: declare void @__cospif(...)
+declare void @__cospif(...)
+
+; CHECK: declare void @__sinpi(...)
+declare void @__sinpi(...)
+
+; CHECK: declare void @__sinpif(...)
+declare void @__sinpif(...)
+
+; CHECK: declare void @abs(...)
+declare void @abs(...)
+
+; CHECK: declare void @access(...)
+declare void @access(...)
+
+; CHECK: declare void @acos(...)
+declare void @acos(...)
+
+; CHECK: declare void @acosf(...)
+declare void @acosf(...)
+
+; CHECK: declare void @acosh(...)
+declare void @acosh(...)
+
+; CHECK: declare void @acoshf(...)
+declare void @acoshf(...)
+
+; CHECK: declare void @acoshl(...)
+declare void @acoshl(...)
+
+; CHECK: declare void @acosl(...)
+declare void @acosl(...)
+
+; CHECK: declare void @asin(...)
+declare void @asin(...)
+
+; CHECK: declare void @asinf(...)
+declare void @asinf(...)
+
+; CHECK: declare void @asinh(...)
+declare void @asinh(...)
+
+; CHECK: declare void @asinhf(...)
+declare void @asinhf(...)
+
+; CHECK: declare void @asinhl(...)
+declare void @asinhl(...)
+
+; CHECK: declare void @asinl(...)
+declare void @asinl(...)
+
+; CHECK: declare void @atan(...)
+declare void @atan(...)
+
+; CHECK: declare void @atan2(...)
+declare void @atan2(...)
+
+; CHECK: declare void @atan2f(...)
+declare void @atan2f(...)
+
+; CHECK: declare void @atan2l(...)
+declare void @atan2l(...)
+
+; CHECK: declare void @atanf(...)
+declare void @atanf(...)
+
+; CHECK: declare void @atanh(...)
+declare void @atanh(...)
+
+; CHECK: declare void @atanhf(...)
+declare void @atanhf(...)
+
+; CHECK: declare void @atanhl(...)
+declare void @atanhl(...)
+
+; CHECK: declare void @atanl(...)
+declare void @atanl(...)
+
+; CHECK: declare void @atof(...)
+declare void @atof(...)
+
+; CHECK: declare void @atoi(...)
+declare void @atoi(...)
+
+; CHECK: declare void @atol(...)
+declare void @atol(...)
+
+; CHECK: declare void @atoll(...)
+declare void @atoll(...)
+
+; CHECK: declare void @bcmp(...)
+declare void @bcmp(...)
+
+; CHECK: declare void @bcopy(...)
+declare void @bcopy(...)
+
+; CHECK: declare void @bzero(...)
+declare void @bzero(...)
+
+; CHECK: declare void @calloc(...)
+declare void @calloc(...)
+
+; CHECK: declare void @cbrt(...)
+declare void @cbrt(...)
+
+; CHECK: declare void @cbrtf(...)
+declare void @cbrtf(...)
+
+; CHECK: declare void @cbrtl(...)
+declare void @cbrtl(...)
+
+; CHECK: declare void @ceil(...)
+declare void @ceil(...)
+
+; CHECK: declare void @ceilf(...)
+declare void @ceilf(...)
+
+; CHECK: declare void @ceill(...)
+declare void @ceill(...)
+
+; CHECK: declare void @chmod(...)
+declare void @chmod(...)
+
+; CHECK: declare void @chown(...)
+declare void @chown(...)
+
+; CHECK: declare void @clearerr(...)
+declare void @clearerr(...)
+
+; CHECK: declare void @closedir(...)
+declare void @closedir(...)
+
+; CHECK: declare void @copysign(...)
+declare void @copysign(...)
+
+; CHECK: declare void @copysignf(...)
+declare void @copysignf(...)
+
+; CHECK: declare void @copysignl(...)
+declare void @copysignl(...)
+
+; CHECK: declare void @cos(...)
+declare void @cos(...)
+
+; CHECK: declare void @cosf(...)
+declare void @cosf(...)
+
+; CHECK: declare void @cosh(...)
+declare void @cosh(...)
+
+; CHECK: declare void @coshf(...)
+declare void @coshf(...)
+
+; CHECK: declare void @coshl(...)
+declare void @coshl(...)
+
+; CHECK: declare void @cosl(...)
+declare void @cosl(...)
+
+; CHECK: declare void @ctermid(...)
+declare void @ctermid(...)
+
+; CHECK: declare void @exp(...)
+declare void @exp(...)
+
+; CHECK: declare void @exp2(...)
+declare void @exp2(...)
+
+; CHECK: declare void @exp2f(...)
+declare void @exp2f(...)
+
+; CHECK: declare void @exp2l(...)
+declare void @exp2l(...)
+
+; CHECK: declare void @expf(...)
+declare void @expf(...)
+
+; CHECK: declare void @expl(...)
+declare void @expl(...)
+
+; CHECK: declare void @expm1(...)
+declare void @expm1(...)
+
+; CHECK: declare void @expm1f(...)
+declare void @expm1f(...)
+
+; CHECK: declare void @expm1l(...)
+declare void @expm1l(...)
+
+; CHECK: declare void @fabs(...)
+declare void @fabs(...)
+
+; CHECK: declare void @fabsf(...)
+declare void @fabsf(...)
+
+; CHECK: declare void @fabsl(...)
+declare void @fabsl(...)
+
+; CHECK: declare void @fclose(...)
+declare void @fclose(...)
+
+; CHECK: declare void @fdopen(...)
+declare void @fdopen(...)
+
+; CHECK: declare void @feof(...)
+declare void @feof(...)
+
+; CHECK: declare void @ferror(...)
+declare void @ferror(...)
+
+; CHECK: declare void @fflush(...)
+declare void @fflush(...)
+
+; CHECK: declare void @ffs(...)
+declare void @ffs(...)
+
+; CHECK: declare void @ffsl(...)
+declare void @ffsl(...)
+
+; CHECK: declare void @ffsll(...)
+declare void @ffsll(...)
+
+; CHECK: declare void @fgetc(...)
+declare void @fgetc(...)
+
+; CHECK: declare void @fgetpos(...)
+declare void @fgetpos(...)
+
+; CHECK: declare void @fgets(...)
+declare void @fgets(...)
+
+; CHECK: declare void @fileno(...)
+declare void @fileno(...)
+
+; CHECK: declare void @flockfile(...)
+declare void @flockfile(...)
+
+; CHECK: declare void @floor(...)
+declare void @floor(...)
+
+; CHECK: declare void @floorf(...)
+declare void @floorf(...)
+
+; CHECK: declare void @floorl(...)
+declare void @floorl(...)
+
+; CHECK: declare void @fls(...)
+declare void @fls(...)
+
+; CHECK: declare void @flsl(...)
+declare void @flsl(...)
+
+; CHECK: declare void @flsll(...)
+declare void @flsll(...)
+
+; CHECK: declare void @fmax(...)
+declare void @fmax(...)
+
+; CHECK: declare void @fmaxf(...)
+declare void @fmaxf(...)
+
+; CHECK: declare void @fmaxl(...)
+declare void @fmaxl(...)
+
+; CHECK: declare void @fmin(...)
+declare void @fmin(...)
+
+; CHECK: declare void @fminf(...)
+declare void @fminf(...)
+
+; CHECK: declare void @fminl(...)
+declare void @fminl(...)
+
+; CHECK: declare void @fmod(...)
+declare void @fmod(...)
+
+; CHECK: declare void @fmodf(...)
+declare void @fmodf(...)
+
+; CHECK: declare void @fmodl(...)
+declare void @fmodl(...)
+
+; CHECK: declare void @fopen(...)
+declare void @fopen(...)
+
+; CHECK: declare void @fprintf(...)
+declare void @fprintf(...)
+
+; CHECK: declare void @fputc(...)
+declare void @fputc(...)
+
+; CHECK: declare void @fputs(...)
+declare void @fputs(...)
+
+; CHECK: declare void @fread(...)
+declare void @fread(...)
+
+; CHECK: declare void @free(...)
+declare void @free(...)
+
+; CHECK: declare void @frexp(...)
+declare void @frexp(...)
+
+; CHECK: declare void @frexpf(...)
+declare void @frexpf(...)
+
+; CHECK: declare void @frexpl(...)
+declare void @frexpl(...)
+
+; CHECK: declare void @fscanf(...)
+declare void @fscanf(...)
+
+; CHECK: declare void @fseek(...)
+declare void @fseek(...)
+
+; CHECK: declare void @fseeko(...)
+declare void @fseeko(...)
+
+; CHECK: declare void @fseeko64(...)
+declare void @fseeko64(...)
+
+; CHECK: declare void @fsetpos(...)
+declare void @fsetpos(...)
+
+; CHECK: declare void @fstat(...)
+declare void @fstat(...)
+
+; CHECK: declare void @fstat64(...)
+declare void @fstat64(...)
+
+; CHECK: declare void @fstatvfs(...)
+declare void @fstatvfs(...)
+
+; CHECK: declare void @fstatvfs64(...)
+declare void @fstatvfs64(...)
+
+; CHECK: declare void @ftell(...)
+declare void @ftell(...)
+
+; CHECK: declare void @ftello(...)
+declare void @ftello(...)
+
+; CHECK: declare void @ftello64(...)
+declare void @ftello64(...)
+
+; CHECK: declare void @ftrylockfile(...)
+declare void @ftrylockfile(...)
+
+; CHECK: declare void @funlockfile(...)
+declare void @funlockfile(...)
+
+; CHECK: declare void @fwrite(...)
+declare void @fwrite(...)
+
+; CHECK: declare void @getc(...)
+declare void @getc(...)
+
+; CHECK: declare void @getc_unlocked(...)
+declare void @getc_unlocked(...)
+
+; CHECK: declare void @getchar(...)
+declare void @getchar(...)
+
+; CHECK: declare void @getenv(...)
+declare void @getenv(...)
+
+; CHECK: declare void @getitimer(...)
+declare void @getitimer(...)
+
+; CHECK: declare void @getlogin_r(...)
+declare void @getlogin_r(...)
+
+; CHECK: declare void @getpwnam(...)
+declare void @getpwnam(...)
+
+; CHECK: declare void @gets(...)
+declare void @gets(...)
+
+; CHECK: declare void @gettimeofday(...)
+declare void @gettimeofday(...)
+
+; CHECK: declare void @isascii(...)
+declare void @isascii(...)
+
+; CHECK: declare void @isdigit(...)
+declare void @isdigit(...)
+
+; CHECK: declare void @labs(...)
+declare void @labs(...)
+
+; CHECK: declare void @lchown(...)
+declare void @lchown(...)
+
+; CHECK: declare void @ldexp(...)
+declare void @ldexp(...)
+
+; CHECK: declare void @ldexpf(...)
+declare void @ldexpf(...)
+
+; CHECK: declare void @ldexpl(...)
+declare void @ldexpl(...)
+
+; CHECK: declare void @llabs(...)
+declare void @llabs(...)
+
+; CHECK: declare void @log(...)
+declare void @log(...)
+
+; CHECK: declare void @log10(...)
+declare void @log10(...)
+
+; CHECK: declare void @log10f(...)
+declare void @log10f(...)
+
+; CHECK: declare void @log10l(...)
+declare void @log10l(...)
+
+; CHECK: declare void @log1p(...)
+declare void @log1p(...)
+
+; CHECK: declare void @log1pf(...)
+declare void @log1pf(...)
+
+; CHECK: declare void @log1pl(...)
+declare void @log1pl(...)
+
+; CHECK: declare void @log2(...)
+declare void @log2(...)
+
+; CHECK: declare void @log2f(...)
+declare void @log2f(...)
+
+; CHECK: declare void @log2l(...)
+declare void @log2l(...)
+
+; CHECK: declare void @logb(...)
+declare void @logb(...)
+
+; CHECK: declare void @logbf(...)
+declare void @logbf(...)
+
+; CHECK: declare void @logbl(...)
+declare void @logbl(...)
+
+; CHECK: declare void @logf(...)
+declare void @logf(...)
+
+; CHECK: declare void @logl(...)
+declare void @logl(...)
+
+; CHECK: declare void @lstat(...)
+declare void @lstat(...)
+
+; CHECK: declare void @lstat64(...)
+declare void @lstat64(...)
+
+; CHECK: declare void @malloc(...)
+declare void @malloc(...)
+
+; CHECK: declare void @memalign(...)
+declare void @memalign(...)
+
+; CHECK: declare void @memccpy(...)
+declare void @memccpy(...)
+
+; CHECK: declare void @memchr(...)
+declare void @memchr(...)
+
+; CHECK: declare void @memcmp(...)
+declare void @memcmp(...)
+
+; CHECK: declare void @memcpy(...)
+declare void @memcpy(...)
+
+; CHECK: declare void @memmove(...)
+declare void @memmove(...)
+
+; CHECK: declare void @memset(...)
+declare void @memset(...)
+
+; CHECK: declare void @memset_pattern16(...)
+declare void @memset_pattern16(...)
+
+; CHECK: declare void @mkdir(...)
+declare void @mkdir(...)
+
+; CHECK: declare void @mktime(...)
+declare void @mktime(...)
+
+; CHECK: declare void @modf(...)
+declare void @modf(...)
+
+; CHECK: declare void @modff(...)
+declare void @modff(...)
+
+; CHECK: declare void @modfl(...)
+declare void @modfl(...)
+
+; CHECK: declare void @nearbyint(...)
+declare void @nearbyint(...)
+
+; CHECK: declare void @nearbyintf(...)
+declare void @nearbyintf(...)
+
+; CHECK: declare void @nearbyintl(...)
+declare void @nearbyintl(...)
+
+; CHECK: declare void @open(...)
+declare void @open(...)
+
+; CHECK: declare void @open64(...)
+declare void @open64(...)
+
+; CHECK: declare void @opendir(...)
+declare void @opendir(...)
+
+; CHECK: declare void @pclose(...)
+declare void @pclose(...)
+
+; CHECK: declare void @perror(...)
+declare void @perror(...)
+
+; CHECK: declare void @popen(...)
+declare void @popen(...)
+
+; CHECK: declare void @posix_memalign(...)
+declare void @posix_memalign(...)
+
+; CHECK: declare void @pow(...)
+declare void @pow(...)
+
+; CHECK: declare void @powf(...)
+declare void @powf(...)
+
+; CHECK: declare void @powl(...)
+declare void @powl(...)
+
+; CHECK: declare void @pread(...)
+declare void @pread(...)
+
+; CHECK: declare void @printf(...)
+declare void @printf(...)
+
+; CHECK: declare void @putc(...)
+declare void @putc(...)
+
+; CHECK: declare void @putchar(...)
+declare void @putchar(...)
+
+; CHECK: declare void @puts(...)
+declare void @puts(...)
+
+; CHECK: declare void @pwrite(...)
+declare void @pwrite(...)
+
+; CHECK: declare void @qsort(...)
+declare void @qsort(...)
+
+; CHECK: declare void @read(...)
+declare void @read(...)
+
+; CHECK: declare void @readlink(...)
+declare void @readlink(...)
+
+; CHECK: declare void @realloc(...)
+declare void @realloc(...)
+
+; CHECK: declare void @reallocf(...)
+declare void @reallocf(...)
+
+; CHECK: declare void @realpath(...)
+declare void @realpath(...)
+
+; CHECK: declare void @remove(...)
+declare void @remove(...)
+
+; CHECK: declare void @rename(...)
+declare void @rename(...)
+
+; CHECK: declare void @rewind(...)
+declare void @rewind(...)
+
+; CHECK: declare void @rint(...)
+declare void @rint(...)
+
+; CHECK: declare void @rintf(...)
+declare void @rintf(...)
+
+; CHECK: declare void @rintl(...)
+declare void @rintl(...)
+
+; CHECK: declare void @rmdir(...)
+declare void @rmdir(...)
+
+; CHECK: declare void @round(...)
+declare void @round(...)
+
+; CHECK: declare void @roundf(...)
+declare void @roundf(...)
+
+; CHECK: declare void @roundl(...)
+declare void @roundl(...)
+
+; CHECK: declare void @scanf(...)
+declare void @scanf(...)
+
+; CHECK: declare void @setbuf(...)
+declare void @setbuf(...)
+
+; CHECK: declare void @setitimer(...)
+declare void @setitimer(...)
+
+; CHECK: declare void @setvbuf(...)
+declare void @setvbuf(...)
+
+; CHECK: declare void @sin(...)
+declare void @sin(...)
+
+; CHECK: declare void @sinf(...)
+declare void @sinf(...)
+
+; CHECK: declare void @sinh(...)
+declare void @sinh(...)
+
+; CHECK: declare void @sinhf(...)
+declare void @sinhf(...)
+
+; CHECK: declare void @sinhl(...)
+declare void @sinhl(...)
+
+; CHECK: declare void @sinl(...)
+declare void @sinl(...)
+
+; CHECK: declare void @snprintf(...)
+declare void @snprintf(...)
+
+; CHECK: declare void @sprintf(...)
+declare void @sprintf(...)
+
+; CHECK: declare void @sqrt(...)
+declare void @sqrt(...)
+
+; CHECK: declare void @sqrtf(...)
+declare void @sqrtf(...)
+
+; CHECK: declare void @sqrtl(...)
+declare void @sqrtl(...)
+
+; CHECK: declare void @sscanf(...)
+declare void @sscanf(...)
+
+; CHECK: declare void @stat(...)
+declare void @stat(...)
+
+; CHECK: declare void @stat64(...)
+declare void @stat64(...)
+
+; CHECK: declare void @statvfs(...)
+declare void @statvfs(...)
+
+; CHECK: declare void @statvfs64(...)
+declare void @statvfs64(...)
+
+; CHECK: declare void @stpcpy(...)
+declare void @stpcpy(...)
+
+; CHECK: declare void @stpncpy(...)
+declare void @stpncpy(...)
+
+; CHECK: declare void @strcasecmp(...)
+declare void @strcasecmp(...)
+
+; CHECK: declare void @strcat(...)
+declare void @strcat(...)
+
+; CHECK: declare void @strchr(...)
+declare void @strchr(...)
+
+; CHECK: declare void @strcmp(...)
+declare void @strcmp(...)
+
+; CHECK: declare void @strcoll(...)
+declare void @strcoll(...)
+
+; CHECK: declare void @strcpy(...)
+declare void @strcpy(...)
+
+; CHECK: declare void @strcspn(...)
+declare void @strcspn(...)
+
+; CHECK: declare void @strdup(...)
+declare void @strdup(...)
+
+; CHECK: declare void @strlen(...)
+declare void @strlen(...)
+
+; CHECK: declare void @strncasecmp(...)
+declare void @strncasecmp(...)
+
+; CHECK: declare void @strncat(...)
+declare void @strncat(...)
+
+; CHECK: declare void @strncmp(...)
+declare void @strncmp(...)
+
+; CHECK: declare void @strncpy(...)
+declare void @strncpy(...)
+
+; CHECK: declare void @strndup(...)
+declare void @strndup(...)
+
+; CHECK: declare void @strnlen(...)
+declare void @strnlen(...)
+
+; CHECK: declare void @strpbrk(...)
+declare void @strpbrk(...)
+
+; CHECK: declare void @strrchr(...)
+declare void @strrchr(...)
+
+; CHECK: declare void @strspn(...)
+declare void @strspn(...)
+
+; CHECK: declare void @strstr(...)
+declare void @strstr(...)
+
+; CHECK: declare void @strtod(...)
+declare void @strtod(...)
+
+; CHECK: declare void @strtof(...)
+declare void @strtof(...)
+
+; CHECK: declare void @strtok(...)
+declare void @strtok(...)
+
+; CHECK: declare void @strtok_r(...)
+declare void @strtok_r(...)
+
+; CHECK: declare void @strtol(...)
+declare void @strtol(...)
+
+; CHECK: declare void @strtold(...)
+declare void @strtold(...)
+
+; CHECK: declare void @strtoll(...)
+declare void @strtoll(...)
+
+; CHECK: declare void @strtoul(...)
+declare void @strtoul(...)
+
+; CHECK: declare void @strtoull(...)
+declare void @strtoull(...)
+
+; CHECK: declare void @strxfrm(...)
+declare void @strxfrm(...)
+
+; CHECK: declare void @system(...)
+declare void @system(...)
+
+; CHECK: declare void @tan(...)
+declare void @tan(...)
+
+; CHECK: declare void @tanf(...)
+declare void @tanf(...)
+
+; CHECK: declare void @tanh(...)
+declare void @tanh(...)
+
+; CHECK: declare void @tanhf(...)
+declare void @tanhf(...)
+
+; CHECK: declare void @tanhl(...)
+declare void @tanhl(...)
+
+; CHECK: declare void @tanl(...)
+declare void @tanl(...)
+
+; CHECK: declare void @times(...)
+declare void @times(...)
+
+; CHECK: declare void @tmpfile(...)
+declare void @tmpfile(...)
+
+; CHECK: declare void @tmpfile64(...)
+declare void @tmpfile64(...)
+
+; CHECK: declare void @toascii(...)
+declare void @toascii(...)
+
+; CHECK: declare void @trunc(...)
+declare void @trunc(...)
+
+; CHECK: declare void @truncf(...)
+declare void @truncf(...)
+
+; CHECK: declare void @truncl(...)
+declare void @truncl(...)
+
+; CHECK: declare void @uname(...)
+declare void @uname(...)
+
+; CHECK: declare void @ungetc(...)
+declare void @ungetc(...)
+
+; CHECK: declare void @unlink(...)
+declare void @unlink(...)
+
+; CHECK: declare void @unsetenv(...)
+declare void @unsetenv(...)
+
+; CHECK: declare void @utime(...)
+declare void @utime(...)
+
+; CHECK: declare void @utimes(...)
+declare void @utimes(...)
+
+; CHECK: declare void @valloc(...)
+declare void @valloc(...)
+
+; CHECK: declare void @vfprintf(...)
+declare void @vfprintf(...)
+
+; CHECK: declare void @vfscanf(...)
+declare void @vfscanf(...)
+
+; CHECK: declare void @vprintf(...)
+declare void @vprintf(...)
+
+; CHECK: declare void @vscanf(...)
+declare void @vscanf(...)
+
+; CHECK: declare void @vsnprintf(...)
+declare void @vsnprintf(...)
+
+; CHECK: declare void @vsprintf(...)
+declare void @vsprintf(...)
+
+; CHECK: declare void @vsscanf(...)
+declare void @vsscanf(...)
+
+; CHECK: declare void @write(...)
+declare void @write(...)
diff --git a/test/Transforms/Inline/PR4909.ll b/test/Transforms/Inline/PR4909.ll
index 24545f9aa883..86b005c8a152 100644
--- a/test/Transforms/Inline/PR4909.ll
+++ b/test/Transforms/Inline/PR4909.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -partial-inliner -disable-output
+; RUN: opt < %s -passes=partial-inliner -disable-output
define i32 @f() {
entry:
diff --git a/test/Transforms/Inline/alloca-dbgdeclare-merge.ll b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
index 5314f0b8397d..35e02d6eb419 100644
--- a/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
+++ b/test/Transforms/Inline/alloca-dbgdeclare-merge.ll
@@ -70,15 +70,14 @@ attributes #2 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fp
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "../1.c", directory: "/code/llvm-git/build")
!2 = !{}
-!3 = !{!4, !7, !8}
-!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{null}
-!7 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, variables: !2)
-!8 = distinct !DISubprogram(name: "h", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, isOptimized: false, variables: !2)
+!7 = distinct !DISubprogram(name: "g", scope: !1, file: !1, line: 6, type: !5, isLocal: false, isDefinition: true, scopeLine: 6, isOptimized: false, unit: !0, variables: !2)
+!8 = distinct !DISubprogram(name: "h", scope: !1, file: !1, line: 11, type: !5, isLocal: false, isDefinition: true, scopeLine: 11, isOptimized: false, unit: !0, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 4}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.8.0 (trunk 248518) (llvm/trunk 248512)"}
diff --git a/test/Transforms/Inline/alloca-dbgdeclare.ll b/test/Transforms/Inline/alloca-dbgdeclare.ll
index 39575311b4b4..d06a9299d92b 100644
--- a/test/Transforms/Inline/alloca-dbgdeclare.ll
+++ b/test/Transforms/Inline/alloca-dbgdeclare.ll
@@ -82,34 +82,33 @@ attributes #3 = { noreturn nounwind }
!llvm.module.flags = !{!28, !29}
!llvm.ident = !{!30}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: 1, file: !1, enums: !2, retainedTypes: !3, subprograms: !14, globals: !25, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !3, globals: !25, imports: !2)
!1 = !DIFile(filename: "<stdin>", directory: "")
!2 = !{}
!3 = !{!4}
!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "A", line: 1, size: 192, align: 64, file: !5, elements: !6, identifier: "_ZTS1A")
!5 = !DIFile(filename: "test.cpp", directory: "")
!6 = !{!7, !9}
-!7 = !DIDerivedType(tag: DW_TAG_member, name: "arg0", line: 2, size: 32, align: 32, file: !5, scope: !"_ZTS1A", baseType: !8)
+!7 = !DIDerivedType(tag: DW_TAG_member, name: "arg0", line: 2, size: 32, align: 32, file: !5, scope: !4, baseType: !8)
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!9 = !DIDerivedType(tag: DW_TAG_member, name: "arg1", line: 3, size: 128, align: 64, offset: 64, file: !5, scope: !"_ZTS1A", baseType: !10)
+!9 = !DIDerivedType(tag: DW_TAG_member, name: "arg1", line: 3, size: 128, align: 64, offset: 64, file: !5, scope: !4, baseType: !10)
!10 = !DICompositeType(tag: DW_TAG_array_type, size: 128, align: 64, baseType: !11, elements: !12)
!11 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
!12 = !{!13}
!13 = !DISubrange(count: 2)
-!14 = !{!15, !21, !24}
-!15 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !5, scope: !16, type: !17, variables: !19)
+!15 = distinct !DISubprogram(name: "fn3", linkageName: "_Z3fn31A", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 6, file: !5, scope: !16, type: !17, variables: !19)
!16 = !DIFile(filename: "test.cpp", directory: "")
!17 = !DISubroutineType(types: !18)
-!18 = !{null, !"_ZTS1A"}
+!18 = !{null, !4}
!19 = !{!20}
-!20 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
-!21 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 11, file: !5, scope: !16, type: !22, variables: !2)
+!20 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !4)
+!21 = distinct !DISubprogram(name: "fn4", linkageName: "_Z3fn4v", line: 11, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 11, file: !5, scope: !16, type: !22, variables: !2)
!22 = !DISubroutineType(types: !23)
!23 = !{null}
-!24 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 13, file: !5, scope: !16, type: !22, variables: !2)
+!24 = distinct !DISubprogram(name: "fn5", linkageName: "_Z3fn5v", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 13, file: !5, scope: !16, type: !22, variables: !2)
!25 = !{!26, !27}
-!26 = !DIGlobalVariable(name: "a", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @a)
-!27 = !DIGlobalVariable(name: "b", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !"_ZTS1A", variable: %struct.A* @b)
+!26 = !DIGlobalVariable(name: "a", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !4, variable: %struct.A* @a)
+!27 = !DIGlobalVariable(name: "b", line: 4, isLocal: false, isDefinition: true, scope: null, file: !16, type: !4, variable: %struct.A* @b)
!28 = !{i32 2, !"Dwarf Version", i32 4}
!29 = !{i32 2, !"Debug Info Version", i32 3}
!30 = !{!"clang version 3.7.0 (trunk 227480) (llvm/trunk 227517)"}
@@ -118,7 +117,7 @@ attributes #3 = { noreturn nounwind }
!33 = !DILocation(line: 7, scope: !34)
!34 = distinct !DILexicalBlock(line: 7, column: 0, file: !5, scope: !15)
!35 = !{!36, !37, i64 0}
-!36 = !{!"_ZTS1A", !37, i64 0, !38, i64 8}
+!36 = !{!4, !37, i64 0, !38, i64 8}
!37 = !{!"int", !38, i64 0}
!38 = !{!"omnipotent char", !39, i64 0}
!39 = !{!"Simple C/C++ TBAA"}
@@ -128,7 +127,7 @@ attributes #3 = { noreturn nounwind }
!43 = !{!37, !37, i64 0}
!44 = !{!38, !38, i64 0}
!45 = !DILocation(line: 9, scope: !15)
-!46 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !"_ZTS1A")
+!46 = !DILocalVariable(name: "p1", line: 6, arg: 1, scope: !15, file: !16, type: !4)
!47 = distinct !DILocation(line: 11, scope: !21)
!48 = !DIExpression(DW_OP_bit_piece, 32, 160)
!49 = !DILocation(line: 6, scope: !15, inlinedAt: !47)
diff --git a/test/Transforms/Inline/alloca_test.ll b/test/Transforms/Inline/alloca_test.ll
index 8464259ce1f8..c9d8f11cd845 100644
--- a/test/Transforms/Inline/alloca_test.ll
+++ b/test/Transforms/Inline/alloca_test.ll
@@ -1,23 +1,55 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; This test ensures that alloca instructions in the entry block for an inlined
; function are moved to the top of the function they are inlined into.
;
; RUN: opt -S -inline < %s | FileCheck %s
define i32 @func(i32 %i) {
- %X = alloca i32 ; <i32*> [#uses=1]
- store i32 %i, i32* %X
- ret i32 %i
+ %X = alloca i32
+ store i32 %i, i32* %X
+ ret i32 %i
}
declare void @bar()
define i32 @main(i32 %argc) {
+; CHECK-LABEL: @main(
+; CHECK-NEXT: Entry:
+; CHECK-NEXT: [[X_I:%.*]] = alloca i32
+;
Entry:
-; CHECK: Entry
-; CHECK-NEXT: alloca
- call void @bar( )
- %X = call i32 @func( i32 7 ) ; <i32> [#uses=1]
- %Y = add i32 %X, %argc ; <i32> [#uses=1]
- ret i32 %Y
+ call void @bar( )
+ %X = call i32 @func( i32 7 )
+ %Y = add i32 %X, %argc
+ ret i32 %Y
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=27277
+; Don't assume that the size is a ConstantInt (an undef value is also a constant).
+
+define void @PR27277(i32 %p1) {
+; CHECK-LABEL: @PR27277(
+; CHECK-NEXT: [[VLA:%.*]] = alloca double, i32 %p1
+; CHECK-NEXT: call void @PR27277(i32 undef)
+; CHECK-NEXT: ret void
+;
+ %vla = alloca double, i32 %p1
+ call void @PR27277(i32 undef)
+ ret void
+}
+
+; Don't assume that the size is a ConstantInt (a ConstExpr is also a constant).
+
+@GV = common global i32* null
+
+define void @PR27277_part2(i32 %p1) {
+; CHECK-LABEL: @PR27277_part2(
+; CHECK-NEXT: [[VLA:%.*]] = alloca double, i32 %p1
+; CHECK-NEXT: call void @PR27277_part2(i32 ptrtoint (i32** @GV to i32))
+; CHECK-NEXT: ret void
+;
+ %vla = alloca double, i32 %p1
+ call void @PR27277_part2(i32 ptrtoint (i32** @GV to i32))
+ ret void
}
diff --git a/test/Transforms/Inline/array-alloca.ll b/test/Transforms/Inline/array-alloca.ll
new file mode 100644
index 000000000000..57aecebd8baf
--- /dev/null
+++ b/test/Transforms/Inline/array-alloca.ll
@@ -0,0 +1,36 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+%struct.A = type { i32 }
+
+define void @callee1(i32 %M) {
+entry:
+ %vla = alloca i32, i32 %M, align 16
+ ret void
+}
+
+define void @callee2(i32 %M) {
+entry:
+ %vla = alloca %struct.A, i32 %M, align 16
+ ret void
+}
+
+define void @callee3(i128 %M) {
+entry:
+ %vla = alloca i32, i128 %M, align 16
+ ret void
+}
+
+; CHECK-LABEL: @caller
+define void @caller() #0 {
+entry:
+ call void @caller()
+; CHECK-NOT: call void @callee1
+ call void @callee1(i32 256)
+; CHECK: call void @callee2
+ call void @callee2(i32 4096)
+; CHECK: call void @callee3
+; This is to test that there is no overflow in computing allocated size
+; call void @callee3(i128 0x8000000000000000);
+ call void @callee3(i128 9223372036854775808);
+ ret void
+}
+
diff --git a/test/Transforms/Inline/attributes.ll b/test/Transforms/Inline/attributes.ll
index 0458fa23f795..c1c5e7c70c7d 100644
--- a/test/Transforms/Inline/attributes.ll
+++ b/test/Transforms/Inline/attributes.ll
@@ -17,6 +17,10 @@ define i32 @sanitize_memory_callee(i32 %i) sanitize_memory {
ret i32 %i
}
+define i32 @safestack_callee(i32 %i) safestack {
+ ret i32 %i
+}
+
define i32 @alwaysinline_callee(i32 %i) alwaysinline {
ret i32 %i
}
@@ -33,6 +37,10 @@ define i32 @alwaysinline_sanitize_memory_callee(i32 %i) alwaysinline sanitize_me
ret i32 %i
}
+define i32 @alwaysinline_safestack_callee(i32 %i) alwaysinline safestack {
+ ret i32 %i
+}
+
; Check that:
; * noattr callee is inlined into noattr caller,
@@ -111,6 +119,17 @@ define i32 @test_sanitize_thread(i32 %arg) sanitize_thread {
; CHECK-NEXT: ret i32
}
+define i32 @test_safestack(i32 %arg) safestack {
+ %x1 = call i32 @noattr_callee(i32 %arg)
+ %x2 = call i32 @safestack_callee(i32 %x1)
+ %x3 = call i32 @alwaysinline_callee(i32 %x2)
+ %x4 = call i32 @alwaysinline_safestack_callee(i32 %x3)
+ ret i32 %x4
+; CHECK-LABEL: @test_safestack(
+; CHECK-NEXT: @noattr_callee
+; CHECK-NEXT: ret i32
+}
+
; Check that a function doesn't get inlined if target-cpu strings don't match
; exactly.
define i32 @test_target_cpu_callee0(i32 %i) "target-cpu"="corei7" {
@@ -241,6 +260,49 @@ define i32 @test_no-implicit-float3(i32 %i) noimplicitfloat {
; CHECK-NEXT: ret i32
}
+; Check that no-jump-tables flag propagates from inlined callee to caller
+
+define i32 @no-use-jump-tables_callee0(i32 %i) {
+ ret i32 %i
+; CHECK: @no-use-jump-tables_callee0(i32 %i) {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @no-use-jump-tables_callee1(i32 %i) "no-jump-tables"="true" {
+ ret i32 %i
+; CHECK: @no-use-jump-tables_callee1(i32 %i) [[NOUSEJUMPTABLES:#[0-9]+]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-use-jump-tables0(i32 %i) {
+ %1 = call i32 @no-use-jump-tables_callee0(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-use-jump-tables0(i32 %i) {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-use-jump-tables1(i32 %i) {
+ %1 = call i32 @no-use-jump-tables_callee1(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-use-jump-tables1(i32 %i) [[NOUSEJUMPTABLES]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-use-jump-tables2(i32 %i) "no-jump-tables"="true" {
+ %1 = call i32 @no-use-jump-tables_callee0(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-use-jump-tables2(i32 %i) [[NOUSEJUMPTABLES]] {
+; CHECK-NEXT: ret i32
+}
+
+define i32 @test_no-use-jump-tables3(i32 %i) "no-jump-tables"="true" {
+ %1 = call i32 @no-use-jump-tables_callee1(i32 %i)
+ ret i32 %1
+; CHECK: @test_no-use-jump-tables3(i32 %i) [[NOUSEJUMPTABLES]] {
+; CHECK-NEXT: ret i32
+}
+
; CHECK: attributes [[FPMAD_FALSE]] = { "less-precise-fpmad"="false" }
; CHECK: attributes [[FPMAD_TRUE]] = { "less-precise-fpmad"="true" }
; CHECK: attributes [[NOIMPLICITFLOAT]] = { noimplicitfloat }
+; CHECK: attributes [[NOUSEJUMPTABLES]] = { "no-jump-tables"="true" } \ No newline at end of file
diff --git a/test/Transforms/Inline/basictest.ll b/test/Transforms/Inline/basictest.ll
index 8f0b3eafaecb..5847e623831f 100644
--- a/test/Transforms/Inline/basictest.ll
+++ b/test/Transforms/Inline/basictest.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -scalarrepl -S | FileCheck %s
+; RUN: opt < %s -inline -sroa -S | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
define i32 @test1f(i32 %i) {
diff --git a/test/Transforms/Inline/blockaddress.ll b/test/Transforms/Inline/blockaddress.ll
index 8eb307250330..22ad8821f069 100644
--- a/test/Transforms/Inline/blockaddress.ll
+++ b/test/Transforms/Inline/blockaddress.ll
@@ -26,3 +26,25 @@ entry:
call void @doit(i8** @ptr1, i32 %cond)
ret void
}
+
+; PR27233: We can inline @run into @init. Don't crash on it.
+;
+; CHECK-LABEL: define void @init
+; CHECK: store i8* blockaddress(@run, %bb)
+; CHECK-SAME: @run.bb
+define void @init() {
+entry:
+ call void @run()
+ ret void
+}
+
+define void @run() {
+entry:
+ store i8* blockaddress(@run, %bb), i8** getelementptr inbounds ([1 x i8*], [1 x i8*]* @run.bb, i64 0, i64 0), align 8
+ ret void
+
+bb:
+ unreachable
+}
+
+@run.bb = global [1 x i8*] zeroinitializer
diff --git a/test/Transforms/Inline/comdat-ipo.ll b/test/Transforms/Inline/comdat-ipo.ll
new file mode 100644
index 000000000000..8bdea0bdbe0d
--- /dev/null
+++ b/test/Transforms/Inline/comdat-ipo.ll
@@ -0,0 +1,19 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+define i32 @caller() {
+; CHECK-LABEL: @caller(
+; CHECK-NEXT: %val2 = call i32 @linkonce_callee(i32 42)
+; CHECK-NEXT: ret i32 %val2
+
+ %val = call i32 @odr_callee()
+ %val2 = call i32 @linkonce_callee(i32 %val);
+ ret i32 %val2
+}
+
+define linkonce_odr i32 @odr_callee() {
+ ret i32 42
+}
+
+define linkonce i32 @linkonce_callee(i32 %val) {
+ ret i32 %val
+}
diff --git a/test/Transforms/Inline/crash2.ll b/test/Transforms/Inline/crash2.ll
index 4c0dfaea0364..e3a136010ee8 100644
--- a/test/Transforms/Inline/crash2.ll
+++ b/test/Transforms/Inline/crash2.ll
@@ -1,4 +1,4 @@
-; RUN: opt -inline -scalarrepl -max-cg-scc-iterations=1 -disable-output < %s
+; RUN: opt -inline -sroa -max-cg-scc-iterations=1 -disable-output < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.3"
diff --git a/test/Transforms/Inline/debug-info-duplicate-calls.ll b/test/Transforms/Inline/debug-info-duplicate-calls.ll
index 442ff325863c..c87e328f24e5 100644
--- a/test/Transforms/Inline/debug-info-duplicate-calls.ll
+++ b/test/Transforms/Inline/debug-info-duplicate-calls.ll
@@ -38,19 +38,19 @@
; CHECK-DAG: [[F3:![0-9]+]] = distinct !DISubprogram(name: "f3"
; CHECK-DAG: [[F4:![0-9]+]] = distinct !DISubprogram(name: "f4"
-; CHECK: [[fcs1_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs1:![0-9]+]])
-; CHECK: [[fcs1_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4:![0-9]+]])
-; CHECK: [[fcs1_f4]] = {{.*}}, scope: [[F4]], inlinedAt: [[fcs1:![0-9]+]])
-; CHECK: [[fcs1]] = {{.*}}, scope: [[F]])
-; CHECK: [[fcs1_f4_f3cs2_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs2:![0-9]+]])
-; CHECK: [[fcs1_f4_f3cs2]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4]])
-
-; CHECK: [[fcs2_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs2_f4_f3cs1:![0-9]+]])
-; CHECK: [[fcs2_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs2_f4:![0-9]+]])
-; CHECK: [[fcs2_f4]] = {{.*}}, scope: [[F4]], inlinedAt: [[fcs2:![0-9]+]])
-; CHECK: [[fcs2]] = {{.*}}, scope: [[F]])
-; CHECK: [[fcs2_f4_f3cs2_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs2_f4_f3cs2:![0-9]+]])
-; CHECK: [[fcs2_f4_f3cs2]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs2_f4]])
+; CHECK-DAG: [[fcs1_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs1:![0-9]+]])
+; CHECK-DAG: [[fcs1_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4:![0-9]+]])
+; CHECK-DAG: [[fcs1_f4]] = {{.*}}, scope: [[F4]], inlinedAt: [[fcs1:![0-9]+]])
+; CHECK-DAG: [[fcs1]] = {{.*}}, scope: [[F]])
+; CHECK-DAG: [[fcs1_f4_f3cs2_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs1_f4_f3cs2:![0-9]+]])
+; CHECK-DAG: [[fcs1_f4_f3cs2]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs1_f4]])
+
+; CHECK-DAG: [[fcs2_f4_f3cs1_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs2_f4_f3cs1:![0-9]+]])
+; CHECK-DAG: [[fcs2_f4_f3cs1]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs2_f4:![0-9]+]])
+; CHECK-DAG: [[fcs2_f4]] = {{.*}}, scope: [[F4]], inlinedAt: [[fcs2:![0-9]+]])
+; CHECK-DAG: [[fcs2]] = {{.*}}, scope: [[F]])
+; CHECK-DAG: [[fcs2_f4_f3cs2_f2]] = {{.*}}, scope: [[F2]], inlinedAt: [[fcs2_f4_f3cs2:![0-9]+]])
+; CHECK-DAG: [[fcs2_f4_f3cs2]] = {{.*}}, scope: [[F3]], inlinedAt: [[fcs2_f4]])
$_Z2f4v = comdat any
@@ -98,16 +98,15 @@ attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "n
!llvm.module.flags = !{!10, !11}
!llvm.ident = !{!12}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)", isOptimized: false, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
!2 = !{}
-!3 = !{!4, !7, !8, !9}
-!4 = distinct !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 13, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "f", line: 13, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 13, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "debug-info-duplicate-calls.cpp", directory: "/tmp/dbginfo")
!6 = !DISubroutineType(types: !2)
-!7 = distinct !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
-!8 = distinct !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
-!9 = distinct !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
+!7 = distinct !DISubprogram(name: "f4", line: 10, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "f3", line: 7, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!9 = distinct !DISubprogram(name: "f2", line: 4, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !2)
!10 = !{i32 2, !"Dwarf Version", i32 4}
!11 = !{i32 2, !"Debug Info Version", i32 3}
!12 = !{!"clang version 3.7.0 (trunk 226474) (llvm/trunk 226478)"}
diff --git a/test/Transforms/Inline/debug-invoke.ll b/test/Transforms/Inline/debug-invoke.ll
index c547559d8c2a..a1c27b00ea59 100644
--- a/test/Transforms/Inline/debug-invoke.ll
+++ b/test/Transforms/Inline/debug-invoke.ll
@@ -31,7 +31,14 @@ lpad:
}
!llvm.module.flags = !{!1}
+!llvm.dbg.cu = !{!5}
+
!1 = !{i32 2, !"Debug Info Version", i32 3}
-!2 = distinct !DISubprogram()
+!2 = distinct !DISubprogram(unit: !5)
!3 = !DILocation(line: 1, scope: !2)
!4 = !DILocation(line: 2, scope: !2)
+!5 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !6,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
+!6 = !DIFile(filename: "path/davidino", directory: "/path/to/dir")
diff --git a/test/Transforms/Inline/deoptimize-intrinsic-cconv.ll b/test/Transforms/Inline/deoptimize-intrinsic-cconv.ll
new file mode 100644
index 000000000000..4e2c3fe4786e
--- /dev/null
+++ b/test/Transforms/Inline/deoptimize-intrinsic-cconv.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare cc42 i32 @llvm.experimental.deoptimize.i32(...)
+
+define i32 @callee_with_coldcc() alwaysinline {
+ %v0 = call cc42 i32(...) @llvm.experimental.deoptimize.i32(i32 1) [ "deopt"() ]
+ ret i32 %v0
+}
+
+define void @caller_with_coldcc() {
+; CHECK-LABEL: @caller_with_coldcc(
+; CHECK-NEXT: call cc42 void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"() ]
+; CHECK-NEXT: ret void
+
+ %val = call i32 @callee_with_coldcc()
+ ret void
+}
+
+; CHECK: declare cc42 void @llvm.experimental.deoptimize.isVoid(...)
diff --git a/test/Transforms/Inline/deoptimize-intrinsic.ll b/test/Transforms/Inline/deoptimize-intrinsic.ll
new file mode 100644
index 000000000000..848a9db0542b
--- /dev/null
+++ b/test/Transforms/Inline/deoptimize-intrinsic.ll
@@ -0,0 +1,123 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare i8 @llvm.experimental.deoptimize.i8(...)
+declare i32 @llvm.experimental.deoptimize.i32(...)
+
+define i8 @callee(i1* %c) alwaysinline {
+ %c0 = load volatile i1, i1* %c
+ br i1 %c0, label %left, label %right
+
+left:
+ %c1 = load volatile i1, i1* %c
+ br i1 %c1, label %lleft, label %lright
+
+lleft:
+ %v0 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ]
+ ret i8 %v0
+
+lright:
+ ret i8 10
+
+right:
+ %c2 = load volatile i1, i1* %c
+ br i1 %c2, label %rleft, label %rright
+
+rleft:
+ %v1 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1, i32 300, float 500.0, <2 x i32*> undef) [ "deopt"(i32 1) ]
+ ret i8 %v1
+
+rright:
+ %v2 = call i8(...) @llvm.experimental.deoptimize.i8() [ "deopt"(i32 1) ]
+ ret i8 %v2
+}
+
+define void @caller_0(i1* %c, i8* %ptr) {
+; CHECK-LABEL: @caller_0(
+entry:
+ %v = call i8 @callee(i1* %c) [ "deopt"(i32 2) ]
+ store i8 %v, i8* %ptr
+ ret void
+
+; CHECK: lleft.i:
+; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"(i32 2, i32 1) ]
+; CHECK-NEXT: ret void
+
+; CHECK: rleft.i:
+; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1, i32 300, float 5.000000e+02, <2 x i32*> undef) [ "deopt"(i32 2, i32 1) ]
+; CHECK-NEXT: ret void
+
+; CHECK: rright.i:
+; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 2, i32 1) ]
+; CHECK-NEXT: ret void
+
+; CHECK: callee.exit:
+; CHECK-NEXT: store i8 10, i8* %ptr
+; CHECK-NEXT: ret void
+
+}
+
+define i32 @caller_1(i1* %c, i8* %ptr) personality i8 3 {
+; CHECK-LABEL: @caller_1(
+entry:
+ %v = invoke i8 @callee(i1* %c) [ "deopt"(i32 3) ] to label %normal
+ unwind label %unwind
+
+; CHECK: lleft.i:
+; CHECK-NEXT: %0 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 1) [ "deopt"(i32 3, i32 1) ]
+; CHECK-NEXT: ret i32 %0
+
+; CHECK: rleft.i:
+; CHECK-NEXT: %1 = call i32 (...) @llvm.experimental.deoptimize.i32(i32 1, i32 300, float 5.000000e+02, <2 x i32*> undef) [ "deopt"(i32 3, i32 1) ]
+; CHECK-NEXT: ret i32 %1
+
+; CHECK: rright.i:
+; CHECK-NEXT: %2 = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 3, i32 1) ]
+; CHECK-NEXT: ret i32 %2
+
+; CHECK: callee.exit:
+; CHECK-NEXT: br label %normal
+
+; CHECK: normal:
+; CHECK-NEXT: store i8 10, i8* %ptr
+; CHECK-NEXT: ret i32 42
+
+unwind:
+ %lp = landingpad i32 cleanup
+ ret i32 43
+
+normal:
+ store i8 %v, i8* %ptr
+ ret i32 42
+}
+
+define i8 @callee_with_alloca() alwaysinline {
+ %t = alloca i8
+ %v0 = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i8* %t) ]
+ ret i8 %v0
+}
+
+define void @caller_with_lifetime() {
+; CHECK-LABLE: @caller_with_lifetime(
+; CHECK: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"(i8* %t.i) ]
+; CHECK-NEXT: ret void
+
+entry:
+ call i8 @callee_with_alloca();
+ ret void
+}
+
+define i8 @callee_with_dynamic_alloca(i32 %n) alwaysinline {
+ %p = alloca i8, i32 %n
+ %v = call i8(...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i8* %p) ]
+ ret i8 %v
+}
+
+define void @caller_with_stacksaverestore(i32 %n) {
+; CHECK-LABEL: void @caller_with_stacksaverestore(
+; CHECK: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"(i8* %p.i) ]
+; CHECK-NEXT: ret void
+
+ %p = alloca i32, i32 %n
+ call i8 @callee_with_dynamic_alloca(i32 %n)
+ ret void
+}
diff --git a/test/Transforms/Inline/devirtualize-3.ll b/test/Transforms/Inline/devirtualize-3.ll
index 76c8150de01f..2a0a6d7f65ac 100644
--- a/test/Transforms/Inline/devirtualize-3.ll
+++ b/test/Transforms/Inline/devirtualize-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt -basicaa -inline -S -scalarrepl -gvn -instcombine < %s | FileCheck %s
+; RUN: opt -basicaa -inline -S -sroa -gvn -instcombine < %s | FileCheck %s
; PR5009
; CHECK: define i32 @main()
diff --git a/test/Transforms/Inline/guard-intrinsic.ll b/test/Transforms/Inline/guard-intrinsic.ll
new file mode 100644
index 000000000000..76d683df6e9b
--- /dev/null
+++ b/test/Transforms/Inline/guard-intrinsic.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -always-inline < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i8 @callee(i1* %c_ptr) alwaysinline {
+ %c = load volatile i1, i1* %c_ptr
+ call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1) [ "deopt"(i32 1) ]
+ ret i8 5
+}
+
+define void @caller_0(i1* %c, i8* %ptr) {
+; CHECK-LABEL: @caller_0(
+entry:
+; CHECK: [[COND:%[^ ]+]] = load volatile i1, i1* %c
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND]], i32 1) [ "deopt"(i32 2, i32 1) ]
+; CHECK-NEXT: store i8 5, i8* %ptr
+
+ %v = call i8 @callee(i1* %c) [ "deopt"(i32 2) ]
+ store i8 %v, i8* %ptr
+ ret void
+}
+
+define i32 @caller_1(i1* %c, i8* %ptr) personality i8 3 {
+; CHECK-LABEL: @caller_1(
+; CHECK: [[COND:%[^ ]+]] = load volatile i1, i1* %c
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 [[COND]], i32 1) [ "deopt"(i32 3, i32 1) ]
+; CHECK-NEXT: br label %normal
+entry:
+ %v = invoke i8 @callee(i1* %c) [ "deopt"(i32 3) ] to label %normal
+ unwind label %unwind
+
+unwind:
+ %lp = landingpad i32 cleanup
+ ret i32 43
+
+normal:
+ store i8 %v, i8* %ptr
+ ret i32 42
+}
diff --git a/test/Transforms/Inline/ignore-debug-info.ll b/test/Transforms/Inline/ignore-debug-info.ll
index f4f046846e82..1cce8eb92512 100644
--- a/test/Transforms/Inline/ignore-debug-info.ll
+++ b/test/Transforms/Inline/ignore-debug-info.ll
@@ -47,10 +47,10 @@ attributes #0 = { nounwind readnone }
!llvm.module.flags = !{!3, !4}
!llvm.ident = !{!5}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !{!6}, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "test.c", directory: "")
!2 = !{}
!3 = !{i32 2, !"Dwarf Version", i32 4}
!4 = !{i32 1, !"Debug Info Version", i32 3}
!5 = !{!""}
-!6 = distinct !DISubprogram()
+!6 = distinct !DISubprogram(unit: !0)
diff --git a/test/Transforms/Inline/inline-cold-callee.ll b/test/Transforms/Inline/inline-cold-callee.ll
index 1fd9f105db50..8e202e91ffab 100644
--- a/test/Transforms/Inline/inline-cold-callee.ll
+++ b/test/Transforms/Inline/inline-cold-callee.ll
@@ -5,7 +5,7 @@
; A callee with identical body does gets inlined because cost fits within the
; inline-threshold
-define i32 @callee1(i32 %x) !prof !1 {
+define i32 @callee1(i32 %x) !prof !21 {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
@@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
ret i32 %x3
}
-define i32 @callee2(i32 %x) !prof !2 {
+define i32 @callee2(i32 %x) !prof !22 {
; CHECK-LABEL: @callee2(
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
@@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
ret i32 %x3
}
-define i32 @caller2(i32 %y1) !prof !2 {
+define i32 @caller2(i32 %y1) !prof !22 {
; CHECK-LABEL: @caller2(
; CHECK: call i32 @callee2
; CHECK-NOT: call i32 @callee1
@@ -32,8 +32,21 @@ define i32 @caller2(i32 %y1) !prof !2 {
ret i32 %y3
}
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"MaxFunctionCount", i32 1000}
-!1 = !{!"function_entry_count", i64 100}
-!2 = !{!"function_entry_count", i64 1}
-
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 100}
+!22 = !{!"function_entry_count", i64 1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/test/Transforms/Inline/inline-hot-callee.ll b/test/Transforms/Inline/inline-hot-callee.ll
index 93ea9d43c78d..7a04b517bf60 100644
--- a/test/Transforms/Inline/inline-hot-callee.ll
+++ b/test/Transforms/Inline/inline-hot-callee.ll
@@ -5,7 +5,7 @@
; A cold callee with identical body does not get inlined because cost exceeds the
; inline-threshold
-define i32 @callee1(i32 %x) !prof !1 {
+define i32 @callee1(i32 %x) !prof !21 {
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
%x3 = add i32 %x2, 1
@@ -13,7 +13,7 @@ define i32 @callee1(i32 %x) !prof !1 {
ret i32 %x3
}
-define i32 @callee2(i32 %x) !prof !2 {
+define i32 @callee2(i32 %x) !prof !22 {
; CHECK-LABEL: @callee2(
%x1 = add i32 %x, 1
%x2 = add i32 %x1, 1
@@ -22,7 +22,7 @@ define i32 @callee2(i32 %x) !prof !2 {
ret i32 %x3
}
-define i32 @caller2(i32 %y1) !prof !2 {
+define i32 @caller2(i32 %y1) !prof !22 {
; CHECK-LABEL: @caller2(
; CHECK: call i32 @callee2
; CHECK-NOT: call i32 @callee1
@@ -32,8 +32,21 @@ define i32 @caller2(i32 %y1) !prof !2 {
ret i32 %y3
}
-!llvm.module.flags = !{!0}
-!0 = !{i32 1, !"MaxFunctionCount", i32 10}
-!1 = !{!"function_entry_count", i64 10}
-!2 = !{!"function_entry_count", i64 1}
-
+!llvm.module.flags = !{!1}
+!21 = !{!"function_entry_count", i64 300}
+!22 = !{!"function_entry_count", i64 1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/test/Transforms/Inline/inline-hot-callsite.ll b/test/Transforms/Inline/inline-hot-callsite.ll
new file mode 100644
index 000000000000..36d9407142e1
--- /dev/null
+++ b/test/Transforms/Inline/inline-hot-callsite.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -inline -inline-threshold=0 -inlinehint-threshold=100 -S | FileCheck %s
+
+; This tests that a hot callsite gets the (higher) inlinehint-threshold even without
+; without inline hints and gets inlined because the cost is less than
+; inlinehint-threshold. A cold callee with identical body does not get inlined because
+; cost exceeds the inline-threshold
+
+define i32 @callee1(i32 %x) {
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @callee2(i32 %x) {
+; CHECK-LABEL: @callee2(
+ %x1 = add i32 %x, 1
+ %x2 = add i32 %x1, 1
+ %x3 = add i32 %x2, 1
+
+ ret i32 %x3
+}
+
+define i32 @caller2(i32 %y1) {
+; CHECK-LABEL: @caller2(
+; CHECK: call i32 @callee2
+; CHECK-NOT: call i32 @callee1
+; CHECK: ret i32 %x3.i
+ %y2 = call i32 @callee2(i32 %y1), !prof !22
+ %y3 = call i32 @callee1(i32 %y2), !prof !21
+ ret i32 %y3
+}
+
+!llvm.module.flags = !{!1}
+!21 = !{!"branch_weights", i64 300}
+!22 = !{!"branch_weights", i64 1}
+
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 100, i32 1}
+!13 = !{i32 999000, i64 100, i32 1}
+!14 = !{i32 999999, i64 1, i32 2}
diff --git a/test/Transforms/Inline/inline-invoke-tail.ll b/test/Transforms/Inline/inline-invoke-tail.ll
index f4b80653d014..5a9edaca6007 100644
--- a/test/Transforms/Inline/inline-invoke-tail.ll
+++ b/test/Transforms/Inline/inline-invoke-tail.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -inline -S | not grep "tail call void @llvm.memcpy.i32"
+; RUN: opt < %s -inline -S | not grep "tail call void @llvm.memcpy.p0i8.p0i8.i32"
; PR3550
define internal void @foo(i32* %p, i32* %q) {
@@ -8,8 +8,6 @@ define internal void @foo(i32* %p, i32* %q) {
ret void
}
-declare void @llvm.memcpy.i32(i8* nocapture, i8* nocapture, i32, i32) nounwind
-
define i32 @main() personality i32 (...)* @__gxx_personality_v0 {
%a = alloca i32 ; <i32*> [#uses=3]
%b = alloca i32 ; <i32*> [#uses=2]
diff --git a/test/Transforms/Inline/inline-optsize.ll b/test/Transforms/Inline/inline-optsize.ll
index b01a1f657f31..7e62245fd3f0 100644
--- a/test/Transforms/Inline/inline-optsize.ll
+++ b/test/Transforms/Inline/inline-optsize.ll
@@ -1,5 +1,6 @@
; RUN: opt -S -Oz < %s | FileCheck %s -check-prefix=OZ
; RUN: opt -S -O2 < %s | FileCheck %s -check-prefix=O2
+; RUN: opt -S -Os < %s | FileCheck %s -check-prefix=OS
; The inline threshold for a function with the optsize attribute is currently
; the same as the global inline threshold for -Os. Check that the optsize
@@ -24,10 +25,20 @@ define i32 @inner() {
ret i32 %x5
}
-; @inner() should be inlined for -O2 but not for -Oz.
+; @inner() should be inlined for -O2 and -Os but not for -Oz.
; OZ: call
; O2-NOT: call
+; OS-NOT: call
define i32 @outer() optsize {
%r = call i32 @inner()
ret i32 %r
}
+
+; @inner() should not be inlined for -O2, -Os and -Oz.
+; OZ: call
+; O2: call
+; OS: call
+define i32 @outer2() minsize {
+ %r = call i32 @inner()
+ ret i32 %r
+}
diff --git a/test/Transforms/Inline/inline-threshold.ll b/test/Transforms/Inline/inline-threshold.ll
new file mode 100644
index 000000000000..cb0c8e9fcc44
--- /dev/null
+++ b/test/Transforms/Inline/inline-threshold.ll
@@ -0,0 +1,89 @@
+; Test that -inline-threshold overrides thresholds derived from opt levels.
+; RUN: opt < %s -O2 -inline-threshold=500 -S | FileCheck %s
+; RUN: opt < %s -O3 -inline-threshold=500 -S | FileCheck %s
+; RUN: opt < %s -Os -inline-threshold=500 -S | FileCheck %s
+; RUN: opt < %s -Oz -inline-threshold=500 -S | FileCheck %s
+
+@a = global i32 4
+
+define i32 @simpleFunction(i32 %a) #0 {
+entry:
+ %a1 = load volatile i32, i32* @a
+ %x1 = add i32 %a1, %a1
+ %cmp = icmp eq i32 %a1, 0
+ br i1 %cmp, label %if.then, label %if.else
+if.then:
+ %a2 = load volatile i32, i32* @a
+ %x2_0 = add i32 %x1, %a2
+ br label %if.else
+if.else:
+ %x2 = phi i32 [ %x1, %entry ], [ %x2_0, %if.then ]
+ %a3 = load volatile i32, i32* @a
+ %x3 = add i32 %x2, %a3
+ %a4 = load volatile i32, i32* @a
+ %x4 = add i32 %x3, %a4
+ %a5 = load volatile i32, i32* @a
+ %x5 = add i32 %x4, %a5
+ %a6 = load volatile i32, i32* @a
+ %x6 = add i32 %x5, %a6
+ %a7 = load volatile i32, i32* @a
+ %x7 = add i32 %x6, %a7
+ %a8 = load volatile i32, i32* @a
+ %x8 = add i32 %x7, %a8
+ %a9 = load volatile i32, i32* @a
+ %x9 = add i32 %x8, %a9
+ %a10 = load volatile i32, i32* @a
+ %x10 = add i32 %x9, %a10
+ %a11 = load volatile i32, i32* @a
+ %x11 = add i32 %x10, %a11
+ %a12 = load volatile i32, i32* @a
+ %x12 = add i32 %x11, %a12
+ %a13 = load volatile i32, i32* @a
+ %x13 = add i32 %x12, %a13
+ %a14 = load volatile i32, i32* @a
+ %x14 = add i32 %x13, %a14
+ %a15 = load volatile i32, i32* @a
+ %x15 = add i32 %x14, %a15
+ %a16 = load volatile i32, i32* @a
+ %x16 = add i32 %x15, %a16
+ %a17 = load volatile i32, i32* @a
+ %x17 = add i32 %x16, %a17
+ %a18 = load volatile i32, i32* @a
+ %x18 = add i32 %x17, %a18
+ %a19 = load volatile i32, i32* @a
+ %x19 = add i32 %x18, %a19
+ %a20 = load volatile i32, i32* @a
+ %x20 = add i32 %x19, %a20
+ %a21 = load volatile i32, i32* @a
+ %x21 = add i32 %x20, %a21
+ %a22 = load volatile i32, i32* @a
+ %x22 = add i32 %x21, %a22
+ %a23 = load volatile i32, i32* @a
+ %x23 = add i32 %x22, %a23
+ %a24 = load volatile i32, i32* @a
+ %x24 = add i32 %x23, %a24
+ %a25 = load volatile i32, i32* @a
+ %x25 = add i32 %x24, %a25
+ %a26 = load volatile i32, i32* @a
+ %x26 = add i32 %x25, %a26
+ %a27 = load volatile i32, i32* @a
+ %x27 = add i32 %x26, %a27
+ %a28 = load volatile i32, i32* @a
+ %x28 = add i32 %x27, %a28
+ %a29 = load volatile i32, i32* @a
+ %x29 = add i32 %x28, %a29
+ %add = add i32 %x29, %a
+ ret i32 %add
+}
+
+; Function Attrs: nounwind readnone uwtable
+define i32 @bar(i32 %a) #0 {
+; CHECK-LABEL: @bar
+; CHECK-NOT: call i32 @simpleFunction(i32 6)
+; CHECK: ret
+entry:
+ %i = tail call i32 @simpleFunction(i32 6)
+ ret i32 %i
+}
+
+attributes #0 = { nounwind readnone uwtable }
diff --git a/test/Transforms/Inline/inline_dbg_declare.ll b/test/Transforms/Inline/inline_dbg_declare.ll
index 3c701c41459a..a2c127e44e0a 100644
--- a/test/Transforms/Inline/inline_dbg_declare.ll
+++ b/test/Transforms/Inline/inline_dbg_declare.ll
@@ -67,16 +67,15 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: 1, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0 (trunk)", isOptimized: false, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "foo.c", directory: "")
!2 = !{}
-!3 = !{!4, !9}
-!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "foo.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !8}
!8 = !DIBasicType(tag: DW_TAG_base_type, name: "float", size: 32, align: 32, encoding: DW_ATE_float)
-!9 = distinct !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !10, variables: !2)
+!9 = distinct !DISubprogram(name: "bar", line: 6, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !10, variables: !2)
!10 = !DISubroutineType(types: !11)
!11 = !{null, !12}
!12 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 32, align: 32, baseType: !8)
@@ -93,7 +92,7 @@ attributes #1 = { nounwind readnone }
!23 = !DILocation(line: 9, column: 1, scope: !9)
; CHECK: [[FOO:![0-9]+]] = distinct !DISubprogram(name: "foo",
-; CHECK: [[BAR:![0-9]+]] = distinct !DISubprogram(name: "bar",
; CHECK: [[m23]] = !DILocalVariable(name: "x", arg: 1, scope: [[FOO]]
+; CHECK: [[BAR:![0-9]+]] = distinct !DISubprogram(name: "bar",
; CHECK: [[m24]] = !DILocation(line: 1, column: 17, scope: [[FOO]], inlinedAt: [[CALL_SITE:![0-9]+]])
; CHECK: [[CALL_SITE]] = distinct !DILocation(line: 8, column: 14, scope: [[BAR]])
diff --git a/test/Transforms/Inline/inline_unreachable-2.ll b/test/Transforms/Inline/inline_unreachable-2.ll
new file mode 100644
index 000000000000..57f090effc5b
--- /dev/null
+++ b/test/Transforms/Inline/inline_unreachable-2.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+; CHECK-LABEL: caller
+; CHECK: call void @callee
+define void @caller(i32 %a, i1 %b) #0 {
+ call void @callee(i32 %a, i1 %b)
+ unreachable
+}
+
+define void @callee(i32 %a, i1 %b) {
+ call void asm sideeffect "", ""()
+ br i1 %b, label %bb1, label %bb2
+bb1:
+ call void asm sideeffect "", ""()
+ ret void
+bb2:
+ call void asm sideeffect "", ""()
+ ret void
+}
diff --git a/test/Transforms/Inline/inline_unreachable.ll b/test/Transforms/Inline/inline_unreachable.ll
new file mode 100644
index 000000000000..dbf0119113a3
--- /dev/null
+++ b/test/Transforms/Inline/inline_unreachable.ll
@@ -0,0 +1,130 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+
+@a = global i32 4
+@_ZTIi = external global i8*
+
+; CHECK-LABEL: callSimpleFunction
+; CHECK: call i32 @simpleFunction
+define i32 @callSimpleFunction(i32 %idx, i32 %limit) {
+entry:
+ %cmp = icmp sge i32 %idx, %limit
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %s = call i32 @simpleFunction(i32 %idx)
+ store i32 %s, i32* @a
+ unreachable
+
+if.end:
+ ret i32 %idx
+}
+
+; CHECK-LABEL: callSmallFunction
+; CHECK-NOT: call i32 @smallFunction
+define i32 @callSmallFunction(i32 %idx, i32 %limit) {
+entry:
+ %cmp = icmp sge i32 %idx, %limit
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then:
+ %s = call i32 @smallFunction(i32 %idx)
+ store i32 %s, i32* @a
+ unreachable
+
+if.end:
+ ret i32 %idx
+}
+
+; CHECK-LABEL: throwSimpleException
+; CHECK: invoke i32 @simpleFunction
+define i32 @throwSimpleException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %cmp = icmp sge i32 %idx, %limit
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %exception = call i8* @__cxa_allocate_exception(i64 1) #0
+ invoke i32 @simpleFunction(i32 %idx)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %if.then
+ call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1
+ unreachable
+
+lpad: ; preds = %if.then
+ %ll = landingpad { i8*, i32 }
+ cleanup
+ ret i32 %idx
+
+if.end: ; preds = %entry
+ ret i32 %idx
+}
+
+; CHECK-LABEL: throwSmallException
+; CHECK-NOT: invoke i32 @smallFunction
+define i32 @throwSmallException(i32 %idx, i32 %limit) #0 personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %cmp = icmp sge i32 %idx, %limit
+ br i1 %cmp, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ %exception = call i8* @__cxa_allocate_exception(i64 1) #0
+ invoke i32 @smallFunction(i32 %idx)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont: ; preds = %if.then
+ call void @__cxa_throw(i8* %exception, i8* bitcast (i8** @_ZTIi to i8*), i8* null) #1
+ unreachable
+
+lpad: ; preds = %if.then
+ %ll = landingpad { i8*, i32 }
+ cleanup
+ ret i32 %idx
+
+if.end: ; preds = %entry
+ ret i32 %idx
+}
+
+define i32 @simpleFunction(i32 %a) #0 {
+entry:
+ %a1 = load volatile i32, i32* @a
+ %x1 = add i32 %a1, %a1
+ %a2 = load volatile i32, i32* @a
+ %x2 = add i32 %x1, %a2
+ %a3 = load volatile i32, i32* @a
+ %x3 = add i32 %x2, %a3
+ %a4 = load volatile i32, i32* @a
+ %x4 = add i32 %x3, %a4
+ %a5 = load volatile i32, i32* @a
+ %x5 = add i32 %x4, %a5
+ %a6 = load volatile i32, i32* @a
+ %x6 = add i32 %x5, %a6
+ %a7 = load volatile i32, i32* @a
+ %x7 = add i32 %x6, %a6
+ %a8 = load volatile i32, i32* @a
+ %x8 = add i32 %x7, %a8
+ %a9 = load volatile i32, i32* @a
+ %x9 = add i32 %x8, %a9
+ %a10 = load volatile i32, i32* @a
+ %x10 = add i32 %x9, %a10
+ %a11 = load volatile i32, i32* @a
+ %x11 = add i32 %x10, %a11
+ %a12 = load volatile i32, i32* @a
+ %x12 = add i32 %x11, %a12
+ %add = add i32 %x12, %a
+ ret i32 %add
+}
+
+define i32 @smallFunction(i32 %a) {
+entry:
+ %r = load volatile i32, i32* @a
+ ret i32 %r
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { noreturn }
+
+declare i8* @__cxa_allocate_exception(i64)
+declare i32 @__gxx_personality_v0(...)
+declare void @__cxa_throw(i8*, i8*, i8*)
+
diff --git a/test/Transforms/Inline/local-as-metadata-undominated-use.ll b/test/Transforms/Inline/local-as-metadata-undominated-use.ll
new file mode 100644
index 000000000000..d170c651afe0
--- /dev/null
+++ b/test/Transforms/Inline/local-as-metadata-undominated-use.ll
@@ -0,0 +1,48 @@
+; RUN: opt -inline -S < %s | FileCheck %s
+
+; Make sure the inliner doesn't crash when a metadata-bridged SSA operand is an
+; undominated use.
+;
+; If we ever add a verifier check to prevent the scenario in this file, it's
+; fine to delete this testcase. However, we would need a bitcode upgrade since
+; such historical IR exists in practice.
+
+define i32 @foo(i32 %i) !dbg !4 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 %add, i64 0, metadata !8, metadata !10), !dbg !11
+ %add = add nsw i32 1, %i, !dbg !12
+ ret i32 %add, !dbg !13
+}
+
+; CHECK-LABEL: define i32 @caller(
+define i32 @caller(i32 %i) {
+; CHECK-NEXT: entry:
+entry:
+; Although the inliner shouldn't crash, it can't be expected to get the
+; "correct" SSA value since its assumptions have been violated.
+; CHECK-NEXT: tail call void @llvm.dbg.value(metadata ![[EMPTY:[0-9]+]],
+; CHECK-NEXT: %{{.*}} = add nsw
+ %call = tail call i32 @foo(i32 %i)
+ ret i32 %call
+}
+
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!9}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 265634) (llvm/trunk 265637)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "t.c", directory: "/path/to/tests")
+
+; CHECK: ![[EMPTY]] = !{}
+!2 = !{}
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!5 = !DISubroutineType(types: !6)
+!6 = !{!7, !7}
+!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!8 = !DILocalVariable(name: "add", arg: 1, scope: !4, file: !1, line: 2, type: !7)
+!9 = !{i32 2, !"Debug Info Version", i32 3}
+!10 = !DIExpression()
+!11 = !DILocation(line: 2, column: 13, scope: !4)
+!12 = !DILocation(line: 2, column: 27, scope: !4)
+!13 = !DILocation(line: 2, column: 18, scope: !4)
diff --git a/test/Transforms/Inline/parallel-loop-md.ll b/test/Transforms/Inline/parallel-loop-md.ll
new file mode 100644
index 000000000000..43a44feb247e
--- /dev/null
+++ b/test/Transforms/Inline/parallel-loop-md.ll
@@ -0,0 +1,57 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind uwtable
+define void @Body(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p, i32 %i) #0 {
+entry:
+ %idxprom = sext i32 %i to i64
+ %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp = icmp eq i32 %0, 0
+ %arrayidx2 = getelementptr inbounds i32, i32* %res, i64 %idxprom
+ %1 = load i32, i32* %arrayidx2, align 4
+ br i1 %cmp, label %cond.end, label %cond.false
+
+cond.false: ; preds = %entry
+ %arrayidx6 = getelementptr inbounds i32, i32* %d, i64 %idxprom
+ %2 = load i32, i32* %arrayidx6, align 4
+ %add = add nsw i32 %2, %1
+ br label %cond.end
+
+cond.end: ; preds = %entry, %cond.false
+ %cond = phi i32 [ %add, %cond.false ], [ %1, %entry ]
+ store i32 %cond, i32* %arrayidx2, align 4
+ ret void
+}
+
+; Function Attrs: nounwind uwtable
+define void @Test(i32* %res, i32* %c, i32* %d, i32* %p, i32 %n) #1 {
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %cmp = icmp slt i32 %i.0, 1600
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ call void @Body(i32* %res, i32* undef, i32* %d, i32* %p, i32 %i.0), !llvm.mem.parallel_loop_access !0
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond, !llvm.loop !0
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; CHECK-LABEL: @Test
+; CHECK: load i32,{{.*}}, !llvm.mem.parallel_loop_access !0
+; CHECK: load i32,{{.*}}, !llvm.mem.parallel_loop_access !0
+; CHECK: load i32,{{.*}}, !llvm.mem.parallel_loop_access !0
+; CHECK: store i32{{.*}}, !llvm.mem.parallel_loop_access !0
+; CHECK: br label %for.cond, !llvm.loop !0
+
+attributes #0 = { norecurse nounwind uwtable }
+
+!0 = distinct !{!0}
+
diff --git a/test/Transforms/Inline/pr26698.ll b/test/Transforms/Inline/pr26698.ll
new file mode 100644
index 000000000000..1986db0bf3f4
--- /dev/null
+++ b/test/Transforms/Inline/pr26698.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+declare void @g(i32)
+
+define void @f() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @g(i32 0)
+ to label %invoke.cont unwind label %cs.bb
+
+invoke.cont:
+ ret void
+
+cs.bb:
+ %cs = catchswitch within none [label %cp.bb] unwind label %cleanup.bb
+
+cp.bb:
+ %cpouter1 = catchpad within %cs [i8* null, i32 0, i8* null]
+ call void @dtor() #1 [ "funclet"(token %cpouter1) ]
+ catchret from %cpouter1 to label %invoke.cont
+
+cleanup.bb:
+ %cpouter2 = cleanuppad within none []
+ call void @g(i32 1) [ "funclet"(token %cpouter2) ]
+ cleanupret from %cpouter2 unwind to caller
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+; Function Attrs: nounwind
+define internal void @dtor() #1 personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @g(i32 2)
+ to label %invoke.cont unwind label %ehcleanup1
+
+invoke.cont:
+ ret void
+
+ehcleanup1:
+ %cpinner1 = cleanuppad within none []
+ invoke void @g(i32 3) [ "funclet" (token %cpinner1) ]
+ to label %done unwind label %ehcleanup2
+done:
+ unreachable
+
+ehcleanup2:
+ %cpinner2 = cleanuppad within %cpinner1 []
+ call void @g(i32 4) [ "funclet" (token %cpinner2) ]
+ cleanupret from %cpinner2 unwind to caller
+}
+
+; CHECK-LABEL: define void @f(
+
+; CHECK: %[[cs:.*]] = catchswitch within none
+
+; CHECK: %[[cpouter1:.*]] = catchpad within %[[cs]]
+
+; CHECK: %[[cpinner1:.*]] = cleanuppad within %[[cpouter1]]
+
+; CHECK: %[[cpinner2:.*]] = cleanuppad within %[[cpinner1]]
+; CHECK-NEXT: call void @g(i32 4) #0 [ "funclet"(token %[[cpinner2]]) ]
+; CHECK-NEXT: unreachable
+
+attributes #1 = { nounwind }
diff --git a/test/Transforms/Inline/pr28298.ll b/test/Transforms/Inline/pr28298.ll
new file mode 100644
index 000000000000..0bb3f0a71159
--- /dev/null
+++ b/test/Transforms/Inline/pr28298.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -inline < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+ call void @test2()
+ ret void
+}
+
+define internal void @test2() {
+entry:
+ call void undef()
+ ret void
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: call void undef(
+; CHECK: ret void
diff --git a/test/Transforms/Inline/profile-meta.ll b/test/Transforms/Inline/profile-meta.ll
new file mode 100644
index 000000000000..66fe7413dd07
--- /dev/null
+++ b/test/Transforms/Inline/profile-meta.ll
@@ -0,0 +1,44 @@
+; RUN: opt < %s -S -inline | FileCheck %s
+
+; Make sure that profile and unpredictable metadata is preserved when cloning a select.
+
+define i32 @callee_with_select(i1 %c, i32 %a, i32 %b) {
+ %sel = select i1 %c, i32 %a, i32 %b, !prof !0, !unpredictable !1
+ ret i32 %sel
+}
+
+define i32 @caller_of_select(i1 %C, i32 %A, i32 %B) {
+ %ret = call i32 @callee_with_select(i1 %C, i32 %A, i32 %B)
+ ret i32 %ret
+
+; CHECK-LABEL: @caller_of_select(
+; CHECK-NEXT: [[SEL:%.*]] = select i1 %C, i32 %A, i32 %B, !prof !0, !unpredictable !1
+; CHECK-NEXT: ret i32 [[SEL]]
+}
+
+; Make sure that profile and unpredictable metadata is preserved when cloning a branch.
+
+define i32 @callee_with_branch(i1 %c) {
+ br i1 %c, label %if, label %else, !unpredictable !1, !prof !2
+if:
+ ret i32 1
+else:
+ ret i32 2
+}
+
+define i32 @caller_of_branch(i1 %C) {
+ %ret = call i32 @callee_with_branch(i1 %C)
+ ret i32 %ret
+
+; CHECK-LABEL: @caller_of_branch(
+; CHECK-NEXT: br i1 %C, label %{{.*}}, label %{{.*}}, !prof !2, !unpredictable !1
+}
+
+!0 = !{!"branch_weights", i32 1, i32 2}
+!1 = !{}
+!2 = !{!"branch_weights", i32 3, i32 4}
+
+; CHECK: !0 = !{!"branch_weights", i32 1, i32 2}
+; CHECK: !1 = !{}
+; CHECK: !2 = !{!"branch_weights", i32 3, i32 4}
+
diff --git a/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll b/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
deleted file mode 100644
index 5d027a744920..000000000000
--- a/test/Transforms/InstCombine/2002-03-11-InstCombineHang.ll
+++ /dev/null
@@ -1,9 +0,0 @@
-; This testcase causes instcombine to hang.
-;
-; RUN: opt < %s -instcombine
-
-define void @test(i32 %X) {
- %reg117 = add i32 %X, 0 ; <i32> [#uses=0]
- ret void
-}
-
diff --git a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll b/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
deleted file mode 100644
index 854ec604d01a..000000000000
--- a/test/Transforms/InstCombine/2002-05-14-SubFailure.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; Instcombine was missing a test that caused it to make illegal transformations
-; sometimes. In this case, it transforms the sub into an add:
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK: sub
-
-define i32 @test(i32 %i, i32 %j) {
- %A = mul i32 %i, %j
- %B = sub i32 2, %A
- ret i32 %B
-}
-
diff --git a/test/Transforms/InstCombine/2002-08-02-CastTest.ll b/test/Transforms/InstCombine/2002-08-02-CastTest.ll
deleted file mode 100644
index 363cb21e3958..000000000000
--- a/test/Transforms/InstCombine/2002-08-02-CastTest.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; This testcase is incorrectly getting completely eliminated. There should be
-; SOME instruction named %c here, even if it's a bitwise and.
-;
-; RUN: opt < %s -instcombine -S | grep %c
-;
-define i64 @test3(i64 %A) {
- %c1 = trunc i64 %A to i8 ; <i8> [#uses=1]
- %c2 = zext i8 %c1 to i64 ; <i64> [#uses=1]
- ret i64 %c2
-}
-
diff --git a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll b/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
deleted file mode 100644
index 49e55c620a49..000000000000
--- a/test/Transforms/InstCombine/2002-12-05-MissedConstProp.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-; CHECK-NOT: add
-
-define i32 @test(i32 %A) {
- %A.neg = sub i32 0, %A ; <i32> [#uses=1]
- %.neg = sub i32 0, 1 ; <i32> [#uses=1]
- %X = add i32 %.neg, 1 ; <i32> [#uses=1]
- %Y.neg.ra = add i32 %A, %X ; <i32> [#uses=1]
- %r = add i32 %A.neg, %Y.neg.ra ; <i32> [#uses=1]
- ret i32 %r
-}
-
diff --git a/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll b/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
index 6672b6c6d4e5..6a95c82374d8 100644
--- a/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
+++ b/test/Transforms/InstCombine/2004-11-27-SetCCForCastLargerAndConstant.ll
@@ -1,6 +1,7 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; This test case tests the InstructionCombining optimization that
; reduces things like:
-; %Y = sext i8 %X to i32
+; %Y = sext i8 %X to i32
; %C = icmp ult i32 %Y, 1024
; to
; %C = i1 true
@@ -10,183 +11,259 @@
; constant value and the range of the casted value.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
-; END.
+
define i1 @lt_signed_to_large_unsigned(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp ult i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C1 = icmp sgt i8 %SB, -1
-; CHECK: ret i1 %C1
+; CHECK-LABEL: @lt_signed_to_large_unsigned(
+; CHECK-NEXT: [[C1:%.*]] = icmp sgt i8 %SB, -1
+; CHECK-NEXT: ret i1 [[C1]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp ult i32 %Y, 1024
+ ret i1 %C
+}
+
+; PR28011 - https://llvm.org/bugs/show_bug.cgi?id=28011
+; The above transform only applies to scalar integers; it shouldn't be attempted for constant expressions or vectors.
+
+@a = common global i32** null
+@b = common global [1 x i32] zeroinitializer
+
+define i1 @PR28011(i16 %a) {
+; CHECK-LABEL: @PR28011(
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 %a to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CONV]], or (i32 zext (i1 icmp ne (i32*** bitcast ([1 x i32]* @b to i32***), i32*** @a) to i32), i32 1)
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %conv = sext i16 %a to i32
+ %cmp = icmp ne i32 %conv, or (i32 zext (i1 icmp ne (i32*** bitcast ([1 x i32]* @b to i32***), i32*** @a) to i32), i32 1)
+ ret i1 %cmp
+}
+
+define <2 x i1> @lt_signed_to_large_unsigned_vec(<2 x i8> %SB) {
+; CHECK-LABEL: @lt_signed_to_large_unsigned_vec(
+; CHECK-NEXT: [[Y:%.*]] = sext <2 x i8> %SB to <2 x i32>
+; CHECK-NEXT: [[C:%.*]] = icmp ult <2 x i32> [[Y]], <i32 1024, i32 2>
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %Y = sext <2 x i8> %SB to <2 x i32>
+ %C = icmp ult <2 x i32> %Y, <i32 1024, i32 2>
+ ret <2 x i1> %C
}
define i1 @lt_signed_to_large_signed(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp slt i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 true
+; CHECK-LABEL: @lt_signed_to_large_signed(
+; CHECK-NEXT: ret i1 true
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp slt i32 %Y, 1024
+ ret i1 %C
}
define i1 @lt_signed_to_large_negative(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp slt i32 %Y, -1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 false
+; CHECK-LABEL: @lt_signed_to_large_negative(
+; CHECK-NEXT: ret i1 false
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp slt i32 %Y, -1024
+ ret i1 %C
}
define i1 @lt_signed_to_small_unsigned(i8 %SB) {
- %Y = sext i8 %SB to i32
- %C = icmp ult i32 %Y, 17
- ret i1 %C
-; CHECK: %C = icmp ult i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @lt_signed_to_small_unsigned(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp ult i32 %Y, 17
+ ret i1 %C
}
define i1 @lt_signed_to_small_signed(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp slt i32 %Y, 17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C = icmp slt i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @lt_signed_to_small_signed(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp slt i32 %Y, 17
+ ret i1 %C
}
define i1 @lt_signed_to_small_negative(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp slt i32 %Y, -17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C = icmp slt i8 %SB, -17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @lt_signed_to_small_negative(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i8 %SB, -17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp slt i32 %Y, -17
+ ret i1 %C
}
define i1 @lt_unsigned_to_large_unsigned(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp ult i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 true
+; CHECK-LABEL: @lt_unsigned_to_large_unsigned(
+; CHECK-NEXT: ret i1 true
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp ult i32 %Y, 1024
+ ret i1 %C
}
define i1 @lt_unsigned_to_large_signed(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp slt i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 true
+; CHECK-LABEL: @lt_unsigned_to_large_signed(
+; CHECK-NEXT: ret i1 true
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp slt i32 %Y, 1024
+ ret i1 %C
}
define i1 @lt_unsigned_to_large_negative(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp slt i32 %Y, -1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 false
+; CHECK-LABEL: @lt_unsigned_to_large_negative(
+; CHECK-NEXT: ret i1 false
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp slt i32 %Y, -1024
+ ret i1 %C
}
define i1 @lt_unsigned_to_small_unsigned(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp ult i32 %Y, 17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C = icmp ult i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @lt_unsigned_to_small_unsigned(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp ult i32 %Y, 17
+ ret i1 %C
}
define i1 @lt_unsigned_to_small_signed(i8 %SB) {
- %Y = zext i8 %SB to i32
- %C = icmp slt i32 %Y, 17
- ret i1 %C
-; CHECK: %C = icmp ult i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @lt_unsigned_to_small_signed(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp slt i32 %Y, 17
+ ret i1 %C
}
define i1 @lt_unsigned_to_small_negative(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp slt i32 %Y, -17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 false
+; CHECK-LABEL: @lt_unsigned_to_small_negative(
+; CHECK-NEXT: ret i1 false
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp slt i32 %Y, -17
+ ret i1 %C
}
define i1 @gt_signed_to_large_unsigned(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp ugt i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C = icmp slt i8 %SB, 0
-; CHECK: ret i1 %C
+; CHECK-LABEL: @gt_signed_to_large_unsigned(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i8 %SB, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp ugt i32 %Y, 1024
+ ret i1 %C
}
define i1 @gt_signed_to_large_signed(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp sgt i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 false
+; CHECK-LABEL: @gt_signed_to_large_signed(
+; CHECK-NEXT: ret i1 false
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp sgt i32 %Y, 1024
+ ret i1 %C
}
define i1 @gt_signed_to_large_negative(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp sgt i32 %Y, -1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 true
+; CHECK-LABEL: @gt_signed_to_large_negative(
+; CHECK-NEXT: ret i1 true
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp sgt i32 %Y, -1024
+ ret i1 %C
}
define i1 @gt_signed_to_small_unsigned(i8 %SB) {
- %Y = sext i8 %SB to i32
- %C = icmp ugt i32 %Y, 17
- ret i1 %C
-; CHECK: %C = icmp ugt i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @gt_signed_to_small_unsigned(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp ugt i32 %Y, 17
+ ret i1 %C
}
define i1 @gt_signed_to_small_signed(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp sgt i32 %Y, 17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C = icmp sgt i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @gt_signed_to_small_signed(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp sgt i32 %Y, 17
+ ret i1 %C
}
define i1 @gt_signed_to_small_negative(i8 %SB) {
- %Y = sext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp sgt i32 %Y, -17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C = icmp sgt i8 %SB, -17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @gt_signed_to_small_negative(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i8 %SB, -17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = sext i8 %SB to i32
+ %C = icmp sgt i32 %Y, -17
+ ret i1 %C
}
define i1 @gt_unsigned_to_large_unsigned(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp ugt i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 false
+; CHECK-LABEL: @gt_unsigned_to_large_unsigned(
+; CHECK-NEXT: ret i1 false
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp ugt i32 %Y, 1024
+ ret i1 %C
}
define i1 @gt_unsigned_to_large_signed(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp sgt i32 %Y, 1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 false
+; CHECK-LABEL: @gt_unsigned_to_large_signed(
+; CHECK-NEXT: ret i1 false
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp sgt i32 %Y, 1024
+ ret i1 %C
}
define i1 @gt_unsigned_to_large_negative(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp sgt i32 %Y, -1024 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 true
+; CHECK-LABEL: @gt_unsigned_to_large_negative(
+; CHECK-NEXT: ret i1 true
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp sgt i32 %Y, -1024
+ ret i1 %C
}
define i1 @gt_unsigned_to_small_unsigned(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp ugt i32 %Y, 17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: %C = icmp ugt i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @gt_unsigned_to_small_unsigned(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp ugt i32 %Y, 17
+ ret i1 %C
}
define i1 @gt_unsigned_to_small_signed(i8 %SB) {
- %Y = zext i8 %SB to i32
- %C = icmp sgt i32 %Y, 17
- ret i1 %C
-; CHECK: %C = icmp ugt i8 %SB, 17
-; CHECK: ret i1 %C
+; CHECK-LABEL: @gt_unsigned_to_small_signed(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 %SB, 17
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp sgt i32 %Y, 17
+ ret i1 %C
}
define i1 @gt_unsigned_to_small_negative(i8 %SB) {
- %Y = zext i8 %SB to i32 ; <i32> [#uses=1]
- %C = icmp sgt i32 %Y, -17 ; <i1> [#uses=1]
- ret i1 %C
-; CHECK: ret i1 true
+; CHECK-LABEL: @gt_unsigned_to_small_negative(
+; CHECK-NEXT: ret i1 true
+;
+ %Y = zext i8 %SB to i32
+ %C = icmp sgt i32 %Y, -17
+ ret i1 %C
}
diff --git a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
index 7adeb9fd9603..b4285ab82099 100644
--- a/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
+++ b/test/Transforms/InstCombine/2006-12-01-BadFPVectorXform.ll
@@ -1,9 +1,14 @@
-; RUN: opt < %s -instcombine -S | grep sub
-; RUN: opt < %s -instcombine -S | grep add
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
define <4 x float> @test(<4 x float> %tmp26, <4 x float> %tmp53) {
; (X+Y)-Y != X for fp vectors.
- %tmp64 = fadd <4 x float> %tmp26, %tmp53 ; <<4 x float>> [#uses=1]
- %tmp75 = fsub <4 x float> %tmp64, %tmp53 ; <<4 x float>> [#uses=1]
- ret <4 x float> %tmp75
+; CHECK-LABEL: @test(
+; CHECK-NEXT: [[TMP64:%.*]] = fadd <4 x float> %tmp26, %tmp53
+; CHECK-NEXT: [[TMP75:%.*]] = fsub <4 x float> [[TMP64]], %tmp53
+; CHECK-NEXT: ret <4 x float> [[TMP75]]
+;
+ %tmp64 = fadd <4 x float> %tmp26, %tmp53
+ %tmp75 = fsub <4 x float> %tmp64, %tmp53
+ ret <4 x float> %tmp75
}
diff --git a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll b/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
deleted file mode 100644
index 6298a0723338..000000000000
--- a/test/Transforms/InstCombine/2007-01-27-AndICmp.ll
+++ /dev/null
@@ -1,8 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep "ugt.*, 1"
-
-define i1 @test(i32 %tmp1030) {
- %tmp1037 = icmp ne i32 %tmp1030, 39 ; <i1> [#uses=1]
- %tmp1039 = icmp ne i32 %tmp1030, 40 ; <i1> [#uses=1]
- %tmp1042 = and i1 %tmp1037, %tmp1039 ; <i1> [#uses=1]
- ret i1 %tmp1042
-}
diff --git a/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll b/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
deleted file mode 100644
index f71b99ce1a4b..000000000000
--- a/test/Transforms/InstCombine/2007-11-22-IcmpCrash.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -instcombine -disable-output
-; PR1817
-
-define i1 @test1(i32 %X) {
- %A = icmp slt i32 %X, 10
- %B = icmp ult i32 %X, 10
- %C = and i1 %A, %B
- ret i1 %C
-}
-
-define i1 @test2(i32 %X) {
- %A = icmp slt i32 %X, 10
- %B = icmp ult i32 %X, 10
- %C = or i1 %A, %B
- ret i1 %C
-}
diff --git a/test/Transforms/InstCombine/2008-07-08-AndICmp.ll b/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
deleted file mode 100644
index a12f4bdf1084..000000000000
--- a/test/Transforms/InstCombine/2008-07-08-AndICmp.ll
+++ /dev/null
@@ -1,10 +0,0 @@
-; RUN: opt < %s -instcombine -S | grep icmp | count 1
-; PR2330
-
-define i1 @foo(i32 %a, i32 %b) nounwind {
-entry:
- icmp ult i32 %a, 8 ; <i1>:0 [#uses=1]
- icmp ult i32 %b, 8 ; <i1>:1 [#uses=1]
- and i1 %1, %0 ; <i1>:2 [#uses=1]
- ret i1 %2
-}
diff --git a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
index cfca72adf87d..b0a17467455a 100644
--- a/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
+++ b/test/Transforms/InstCombine/2008-07-08-ShiftOneAndOne.ll
@@ -1,10 +1,14 @@
-; RUN: opt < %s -instcombine -S | grep "icmp ne i32 \%a"
-; PR2330
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-define i1 @foo(i32 %a) nounwind {
-entry:
- %tmp15 = shl i32 1, %a ; <i32> [#uses=1]
- %tmp237 = and i32 %tmp15, 1 ; <i32> [#uses=1]
- %toBool = icmp eq i32 %tmp237, 0 ; <i1> [#uses=1]
- ret i1 %toBool
+define i1 @PR2330(i32 %a) {
+; CHECK-LABEL: @PR2330(
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 %a, 0
+; CHECK-NEXT: ret i1 [[TOBOOL]]
+;
+ %tmp15 = shl i32 1, %a
+ %tmp237 = and i32 %tmp15, 1
+ %toBool = icmp eq i32 %tmp237, 0
+ ret i1 %toBool
}
+
diff --git a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
index 786f0c55bbe2..a9fa53d39992 100644
--- a/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
+++ b/test/Transforms/InstCombine/2008-07-10-CastSextBool.ll
@@ -1,17 +1,22 @@
-; RUN: opt < %s -instcombine -S | grep "%C = xor i1 %A, true"
-; RUN: opt < %s -instcombine -S | grep "ret i1 false"
-; PR2539
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-define i1 @test1(i1 %A) {
- %B = zext i1 %A to i32
- %C = icmp slt i32 %B, 1
- ret i1 %C
+define i1 @PR2539_A(i1 %A) {
+; CHECK-LABEL: @PR2539_A(
+; CHECK-NEXT: [[C:%.*]] = xor i1 %A, true
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = zext i1 %A to i32
+ %C = icmp slt i32 %B, 1
+ ret i1 %C
}
-define i1 @test2(i1 zeroext %b) {
-entry:
- %cmptmp = icmp slt i1 %b, true ; <i1> [#uses=1]
- ret i1 %cmptmp
+define i1 @PR2539_B(i1 zeroext %b) {
+; CHECK-LABEL: @PR2539_B(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = icmp slt i1 %b, true
+ ret i1 %cmp
}
diff --git a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll b/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
deleted file mode 100644
index b469887ba25c..000000000000
--- a/test/Transforms/InstCombine/2008-07-16-sse2_storel_dq.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -instcombine -S | not grep "store "
-; PR2296
-
-@G = common global double 0.000000e+00, align 16
-
-define void @x(<2 x i64> %y) nounwind {
-entry:
- bitcast <2 x i64> %y to <4 x i32>
- call void @llvm.x86.sse2.storel.dq( i8* bitcast (double* @G to i8*), <4 x i32> %0 ) nounwind
- ret void
-}
-
-declare void @llvm.x86.sse2.storel.dq(i8*, <4 x i32>) nounwind
diff --git a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
index 0f8b38c8e9c1..35cd3affec6d 100644
--- a/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
+++ b/test/Transforms/InstCombine/2009-02-20-InstCombine-SROA.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -instcombine -scalarrepl -S | not grep " = alloca"
+; RUN: opt < %s -instcombine -sroa -S | not grep " = alloca"
; rdar://6417724
; Instcombine shouldn't do anything to this function that prevents promoting the allocas inside it.
diff --git a/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll b/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
deleted file mode 100644
index ba57baf23711..000000000000
--- a/test/Transforms/InstCombine/2012-05-27-Negative-Shift-Crash.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt -inline -instcombine -S < %s
-; PR12967
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.7.0"
-
-@d = common global i32 0, align 4
-@c = common global i32 0, align 4
-@e = common global i32 0, align 4
-@f = common global i32 0, align 4
-@a = common global i32 0, align 4
-@b = common global i32 0, align 4
-
-define signext i8 @fn1(i32 %p1) nounwind uwtable readnone ssp {
-entry:
- %shr = lshr i32 1, %p1
- %conv = trunc i32 %shr to i8
- ret i8 %conv
-}
-
-define void @fn4() nounwind uwtable ssp {
-entry:
- %0 = load i32, i32* @d, align 4
- %cmp = icmp eq i32 %0, 0
- %conv = zext i1 %cmp to i32
- store i32 %conv, i32* @c, align 4
- tail call void @fn3(i32 %conv) nounwind
- ret void
-}
-
-define void @fn3(i32 %p1) nounwind uwtable ssp {
-entry:
- %and = and i32 %p1, 8
- store i32 %and, i32* @e, align 4
- %sub = add nsw i32 %and, -1
- store i32 %sub, i32* @f, align 4
- %0 = load i32, i32* @a, align 4
- %tobool = icmp eq i32 %0, 0
- br i1 %tobool, label %if.else, label %if.then
-
-if.then: ; preds = %entry
- %1 = load i32, i32* @b, align 4
- %.lobit = lshr i32 %1, 31
- %2 = trunc i32 %.lobit to i8
- %.not = xor i8 %2, 1
- br label %if.end
-
-if.else: ; preds = %entry
- %call = tail call signext i8 @fn1(i32 %sub) nounwind
- br label %if.end
-
-if.end: ; preds = %if.else, %if.then
- %storemerge.in = phi i8 [ %call, %if.else ], [ %.not, %if.then ]
- %storemerge = sext i8 %storemerge.in to i32
- store i32 %storemerge, i32* @b, align 4
- ret void
-}
diff --git a/test/Transforms/InstCombine/addrspacecast.ll b/test/Transforms/InstCombine/addrspacecast.ll
index 27f6b720d667..e375a7aa34b5 100644
--- a/test/Transforms/InstCombine/addrspacecast.ll
+++ b/test/Transforms/InstCombine/addrspacecast.ll
@@ -145,3 +145,42 @@ end:
ret i32 %sum.inc
}
+; CHECK-LABEL: @constant_fold_null(
+; CHECK: i32 addrspace(3)* null to i32 addrspace(4)*
+define void @constant_fold_null() #0 {
+ %cast = addrspacecast i32 addrspace(3)* null to i32 addrspace(4)*
+ store i32 7, i32 addrspace(4)* %cast
+ ret void
+}
+
+; CHECK-LABEL: @constant_fold_undef(
+; CHECK: ret i32 addrspace(4)* undef
+define i32 addrspace(4)* @constant_fold_undef() #0 {
+ %cast = addrspacecast i32 addrspace(3)* undef to i32 addrspace(4)*
+ ret i32 addrspace(4)* %cast
+}
+
+; CHECK-LABEL: @constant_fold_null_vector(
+; CHECK: addrspacecast (<4 x i32 addrspace(3)*> zeroinitializer to <4 x i32 addrspace(4)*>)
+define <4 x i32 addrspace(4)*> @constant_fold_null_vector() #0 {
+ %cast = addrspacecast <4 x i32 addrspace(3)*> zeroinitializer to <4 x i32 addrspace(4)*>
+ ret <4 x i32 addrspace(4)*> %cast
+}
+
+; CHECK-LABEL: @constant_fold_inttoptr(
+; CHECK: addrspacecast (i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*)
+define void @constant_fold_inttoptr() #0 {
+ %cast = addrspacecast i32 addrspace(3)* inttoptr (i32 -1 to i32 addrspace(3)*) to i32 addrspace(4)*
+ store i32 7, i32 addrspace(4)* %cast
+ ret void
+}
+
+; CHECK-LABEL: @constant_fold_gep_inttoptr(
+; CHECK: addrspacecast (i32 addrspace(3)* inttoptr (i64 1274 to i32 addrspace(3)*) to i32 addrspace(4)*)
+define void @constant_fold_gep_inttoptr() #0 {
+ %k = inttoptr i32 1234 to i32 addrspace(3)*
+ %gep = getelementptr i32, i32 addrspace(3)* %k, i32 10
+ %cast = addrspacecast i32 addrspace(3)* %gep to i32 addrspace(4)*
+ store i32 7, i32 addrspace(4)* %cast
+ ret void
+}
diff --git a/test/Transforms/InstCombine/align-attr.ll b/test/Transforms/InstCombine/align-attr.ll
index 99a17db13c4c..75a3766b7d1f 100644
--- a/test/Transforms/InstCombine/align-attr.ll
+++ b/test/Transforms/InstCombine/align-attr.ll
@@ -13,3 +13,16 @@ entry:
; CHECK: ret i32
}
+define i32 @foo2(i32* align 32 %a) #0 {
+entry:
+ %v = call i32* @func1(i32* %a)
+ %0 = load i32, i32* %v, align 4
+ ret i32 %0
+
+; CHECK-LABEL: @foo2
+; CHECK-DAG: load i32, i32* %v, align 32
+; CHECK: ret i32
+}
+
+declare i32* @func1(i32* returned) nounwind
+
diff --git a/test/Transforms/InstCombine/all-bits-shift.ll b/test/Transforms/InstCombine/all-bits-shift.ll
index b9eb19cf2ad1..a035f53d1aa2 100644
--- a/test/Transforms/InstCombine/all-bits-shift.ll
+++ b/test/Transforms/InstCombine/all-bits-shift.ll
@@ -1,5 +1,4 @@
-; RUN: opt -S -instcombine < %s | FileCheck %s
-; RUN: opt -S -instsimplify < %s | FileCheck %s
+; RUN: opt -S -instcombine -expensive-combines < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
diff --git a/test/Transforms/InstCombine/allocsize-32.ll b/test/Transforms/InstCombine/allocsize-32.ll
new file mode 100644
index 000000000000..a732f64e43db
--- /dev/null
+++ b/test/Transforms/InstCombine/allocsize-32.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; The idea is that we want to have sane semantics (e.g. not assertion failures)
+; when given an allocsize function that takes a 64-bit argument in the face of
+; 32-bit pointers.
+
+target datalayout="e-p:32:32:32"
+
+declare i8* @my_malloc(i8*, i64) allocsize(1)
+
+define void @test_malloc(i8** %p, i32* %r) {
+ %1 = call i8* @my_malloc(i8* null, i64 100)
+ store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %1, i1 false)
+ ; CHECK: store i32 100
+ store i32 %2, i32* %r, align 8
+
+ ; Big number is 5 billion.
+ %3 = call i8* @my_malloc(i8* null, i64 5000000000)
+ store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ ; CHECK: call i32 @llvm.objectsize
+ %4 = call i32 @llvm.objectsize.i32.p0i8(i8* %3, i1 false)
+ store i32 %4, i32* %r, align 8
+ ret void
+}
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
diff --git a/test/Transforms/InstCombine/allocsize.ll b/test/Transforms/InstCombine/allocsize.ll
new file mode 100644
index 000000000000..928c8a50249c
--- /dev/null
+++ b/test/Transforms/InstCombine/allocsize.ll
@@ -0,0 +1,141 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; Test that instcombine folds allocsize function calls properly.
+; Dummy arguments are inserted to verify that allocsize is picking the right
+; args, and to prove that arbitrary unfoldable values don't interfere with
+; allocsize if they're not used by allocsize.
+
+declare i8* @my_malloc(i8*, i32) allocsize(1)
+declare i8* @my_calloc(i8*, i8*, i32, i32) allocsize(2, 3)
+
+; CHECK-LABEL: define void @test_malloc
+define void @test_malloc(i8** %p, i64* %r) {
+ %1 = call i8* @my_malloc(i8* null, i32 100)
+ store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+ ; CHECK: store i64 100
+ store i64 %2, i64* %r, align 8
+ ret void
+}
+
+; CHECK-LABEL: define void @test_calloc
+define void @test_calloc(i8** %p, i64* %r) {
+ %1 = call i8* @my_calloc(i8* null, i8* null, i32 100, i32 5)
+ store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+ ; CHECK: store i64 500
+ store i64 %2, i64* %r, align 8
+ ret void
+}
+
+; Failure cases with non-constant values...
+; CHECK-LABEL: define void @test_malloc_fails
+define void @test_malloc_fails(i8** %p, i64* %r, i32 %n) {
+ %1 = call i8* @my_malloc(i8* null, i32 %n)
+ store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ ; CHECK: @llvm.objectsize.i64.p0i8
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+ store i64 %2, i64* %r, align 8
+ ret void
+}
+
+; CHECK-LABEL: define void @test_calloc_fails
+define void @test_calloc_fails(i8** %p, i64* %r, i32 %n) {
+ %1 = call i8* @my_calloc(i8* null, i8* null, i32 %n, i32 5)
+ store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ ; CHECK: @llvm.objectsize.i64.p0i8
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+ store i64 %2, i64* %r, align 8
+
+
+ %3 = call i8* @my_calloc(i8* null, i8* null, i32 100, i32 %n)
+ store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ ; CHECK: @llvm.objectsize.i64.p0i8
+ %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false)
+ store i64 %4, i64* %r, align 8
+ ret void
+}
+
+declare i8* @my_malloc_outofline(i8*, i32) #0
+declare i8* @my_calloc_outofline(i8*, i8*, i32, i32) #1
+
+; Verifying that out of line allocsize is parsed correctly
+; CHECK-LABEL: define void @test_outofline
+define void @test_outofline(i8** %p, i64* %r) {
+ %1 = call i8* @my_malloc_outofline(i8* null, i32 100)
+ store i8* %1, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %1, i1 false)
+ ; CHECK: store i64 100
+ store i64 %2, i64* %r, align 8
+
+
+ %3 = call i8* @my_calloc_outofline(i8* null, i8* null, i32 100, i32 5)
+ store i8* %3, i8** %p, align 8 ; To ensure objectsize isn't killed
+
+ %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %3, i1 false)
+ ; CHECK: store i64 500
+ store i64 %4, i64* %r, align 8
+ ret void
+}
+
+declare i8* @my_malloc_i64(i8*, i64) #0
+declare i8* @my_tiny_calloc(i8*, i8*, i8, i8) #1
+declare i8* @my_varied_calloc(i8*, i8*, i32, i8) #1
+
+; CHECK-LABEL: define void @test_overflow
+define void @test_overflow(i8** %p, i32* %r) {
+ %r64 = bitcast i32* %r to i64*
+
+ ; (2**31 + 1) * 2 > 2**31. So overflow. Yay.
+ %big_malloc = call i8* @my_calloc(i8* null, i8* null, i32 2147483649, i32 2)
+ store i8* %big_malloc, i8** %p, align 8
+
+ ; CHECK: @llvm.objectsize
+ %1 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_malloc, i1 false)
+ store i32 %1, i32* %r, align 4
+
+
+ %big_little_malloc = call i8* @my_tiny_calloc(i8* null, i8* null, i8 127, i8 4)
+ store i8* %big_little_malloc, i8** %p, align 8
+
+ ; CHECK: store i32 508
+ %2 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_little_malloc, i1 false)
+ store i32 %2, i32* %r, align 4
+
+
+ ; malloc(2**33)
+ %big_malloc_i64 = call i8* @my_malloc_i64(i8* null, i64 8589934592)
+ store i8* %big_malloc_i64, i8** %p, align 8
+
+ ; CHECK: @llvm.objectsize
+ %3 = call i32 @llvm.objectsize.i32.p0i8(i8* %big_malloc_i64, i1 false)
+ store i32 %3, i32* %r, align 4
+
+
+ %4 = call i64 @llvm.objectsize.i64.p0i8(i8* %big_malloc_i64, i1 false)
+ ; CHECK: store i64 8589934592
+ store i64 %4, i64* %r64, align 8
+
+
+ ; Just intended to ensure that we properly handle args of different types...
+ %varied_calloc = call i8* @my_varied_calloc(i8* null, i8* null, i32 1000, i8 5)
+ store i8* %varied_calloc, i8** %p, align 8
+
+ ; CHECK: store i32 5000
+ %5 = call i32 @llvm.objectsize.i32.p0i8(i8* %varied_calloc, i1 false)
+ store i32 %5, i32* %r, align 4
+
+ ret void
+}
+
+attributes #0 = { allocsize(1) }
+attributes #1 = { allocsize(2, 3) }
+
+declare i32 @llvm.objectsize.i32.p0i8(i8*, i1)
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
diff --git a/test/Transforms/InstCombine/amdgcn-intrinsics.ll b/test/Transforms/InstCombine/amdgcn-intrinsics.ll
new file mode 100644
index 000000000000..a734924f1705
--- /dev/null
+++ b/test/Transforms/InstCombine/amdgcn-intrinsics.ll
@@ -0,0 +1,364 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.rcp
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.rcp.f32(float) nounwind readnone
+declare double @llvm.amdgcn.rcp.f64(double) nounwind readnone
+
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_1
+; CHECK-NEXT: ret float 1.000000e+00
+define float @test_constant_fold_rcp_f32_1() nounwind {
+ %val = call float @llvm.amdgcn.rcp.f32(float 1.0) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_1
+; CHECK-NEXT: ret double 1.000000e+00
+define double @test_constant_fold_rcp_f64_1() nounwind {
+ %val = call double @llvm.amdgcn.rcp.f64(double 1.0) nounwind readnone
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_half
+; CHECK-NEXT: ret float 2.000000e+00
+define float @test_constant_fold_rcp_f32_half() nounwind {
+ %val = call float @llvm.amdgcn.rcp.f32(float 0.5) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_half
+; CHECK-NEXT: ret double 2.000000e+00
+define double @test_constant_fold_rcp_f64_half() nounwind {
+ %val = call double @llvm.amdgcn.rcp.f64(double 0.5) nounwind readnone
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f32_43
+; CHECK-NEXT: call float @llvm.amdgcn.rcp.f32(float 4.300000e+01)
+define float @test_constant_fold_rcp_f32_43() nounwind {
+ %val = call float @llvm.amdgcn.rcp.f32(float 4.300000e+01) nounwind readnone
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_rcp_f64_43
+; CHECK-NEXT: call double @llvm.amdgcn.rcp.f64(double 4.300000e+01)
+define double @test_constant_fold_rcp_f64_43() nounwind {
+ %val = call double @llvm.amdgcn.rcp.f64(double 4.300000e+01) nounwind readnone
+ ret double %val
+}
+
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.frexp.mant
+; --------------------------------------------------------------------
+
+declare float @llvm.amdgcn.frexp.mant.f32(float) nounwind readnone
+declare double @llvm.amdgcn.frexp.mant.f64(double) nounwind readnone
+
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_undef(
+; CHECK-NEXT: ret float undef
+define float @test_constant_fold_frexp_mant_f32_undef() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float undef)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_undef(
+; CHECK-NEXT: ret double undef
+define double @test_constant_fold_frexp_mant_f64_undef() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double undef)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_0(
+; CHECK-NEXT: ret float 0.000000e+00
+define float @test_constant_fold_frexp_mant_f32_0() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float 0.0)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_0(
+; CHECK-NEXT: ret double 0.000000e+00
+define double @test_constant_fold_frexp_mant_f64_0() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double 0.0)
+ ret double %val
+}
+
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n0(
+; CHECK-NEXT: ret float -0.000000e+00
+define float @test_constant_fold_frexp_mant_f32_n0() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float -0.0)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n0(
+; CHECK-NEXT: ret double -0.000000e+00
+define double @test_constant_fold_frexp_mant_f64_n0() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double -0.0)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_1(
+; CHECK-NEXT: ret float 5.000000e-01
+define float @test_constant_fold_frexp_mant_f32_1() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float 1.0)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_1(
+; CHECK-NEXT: ret double 5.000000e-01
+define double @test_constant_fold_frexp_mant_f64_1() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double 1.0)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_n1(
+; CHECK-NEXT: ret float -5.000000e-01
+define float @test_constant_fold_frexp_mant_f32_n1() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float -1.0)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_n1(
+; CHECK-NEXT: ret double -5.000000e-01
+define double @test_constant_fold_frexp_mant_f64_n1() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double -1.0)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_nan(
+; CHECK-NEXT: ret float 0x7FF8000000000000
+define float @test_constant_fold_frexp_mant_f32_nan() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF8000000000000)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_nan(
+; CHECK-NEXT: ret double 0x7FF8000000000000
+define double @test_constant_fold_frexp_mant_f64_nan() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF8000000000000)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_inf(
+; CHECK-NEXT: ret float 0x7FF0000000000000
+define float @test_constant_fold_frexp_mant_f32_inf() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x7FF0000000000000)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_inf(
+; CHECK-NEXT: ret double 0x7FF0000000000000
+define double @test_constant_fold_frexp_mant_f64_inf() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FF0000000000000)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_ninf(
+; CHECK-NEXT: ret float 0xFFF0000000000000
+define float @test_constant_fold_frexp_mant_f32_ninf() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float 0xFFF0000000000000)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_ninf(
+; CHECK-NEXT: ret double 0xFFF0000000000000
+define double @test_constant_fold_frexp_mant_f64_ninf() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double 0xFFF0000000000000)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_max_num(
+; CHECK-NEXT: ret float 0x3FEFFFFFE0000000
+define float @test_constant_fold_frexp_mant_f32_max_num() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x47EFFFFFE0000000)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_max_num(
+; CHECK-NEXT: ret double 0x3FEFFFFFFFFFFFFF
+define double @test_constant_fold_frexp_mant_f64_max_num() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double 0x7FEFFFFFFFFFFFFF)
+ ret double %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f32_min_num(
+; CHECK-NEXT: ret float 5.000000e-01
+define float @test_constant_fold_frexp_mant_f32_min_num() nounwind {
+ %val = call float @llvm.amdgcn.frexp.mant.f32(float 0x36A0000000000000)
+ ret float %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_mant_f64_min_num(
+; CHECK-NEXT: ret double 5.000000e-01
+define double @test_constant_fold_frexp_mant_f64_min_num() nounwind {
+ %val = call double @llvm.amdgcn.frexp.mant.f64(double 4.940656e-324)
+ ret double %val
+}
+
+
+; --------------------------------------------------------------------
+; llvm.amdgcn.frexp.exp
+; --------------------------------------------------------------------
+
+declare i32 @llvm.amdgcn.frexp.exp.f32(float) nounwind readnone
+declare i32 @llvm.amdgcn.frexp.exp.f64(double) nounwind readnone
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_undef(
+; CHECK-NEXT: ret i32 undef
+define i32 @test_constant_fold_frexp_exp_f32_undef() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float undef)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_undef(
+; CHECK-NEXT: ret i32 undef
+define i32 @test_constant_fold_frexp_exp_f64_undef() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double undef)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_0(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_0() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_0(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_0() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n0(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_n0() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -0.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n0(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_n0() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -0.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1024(
+; CHECK-NEXT: ret i32 11
+define i32 @test_constant_fold_frexp_exp_f32_1024() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 1024.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1024(
+; CHECK-NEXT: ret i32 11
+define i32 @test_constant_fold_frexp_exp_f64_1024() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 1024.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_n1024(
+; CHECK-NEXT: ret i32 11
+define i32 @test_constant_fold_frexp_exp_f32_n1024() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float -1024.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_n1024(
+; CHECK-NEXT: ret i32 11
+define i32 @test_constant_fold_frexp_exp_f64_n1024() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double -1024.0)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_1_1024(
+; CHECK-NEXT: ret i32 -9
+define i32 @test_constant_fold_frexp_exp_f32_1_1024() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0.0009765625)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_1_1024(
+; CHECK-NEXT: ret i32 -9
+define i32 @test_constant_fold_frexp_exp_f64_1_1024() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0.0009765625)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_nan(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_nan() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF8000000000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_nan(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_nan() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF8000000000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_inf(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_inf() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x7FF0000000000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_inf(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_inf() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FF0000000000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_ninf(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f32_ninf() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0xFFF0000000000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_ninf(
+; CHECK-NEXT: ret i32 0
+define i32 @test_constant_fold_frexp_exp_f64_ninf() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0xFFF0000000000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_max_num(
+; CHECK-NEXT: ret i32 128
+define i32 @test_constant_fold_frexp_exp_f32_max_num() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x47EFFFFFE0000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_max_num(
+; CHECK-NEXT: ret i32 1024
+define i32 @test_constant_fold_frexp_exp_f64_max_num() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 0x7FEFFFFFFFFFFFFF)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f32_min_num(
+; CHECK-NEXT: ret i32 -148
+define i32 @test_constant_fold_frexp_exp_f32_min_num() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f32(float 0x36A0000000000000)
+ ret i32 %val
+}
+
+; CHECK-LABEL: @test_constant_fold_frexp_exp_f64_min_num(
+; CHECK-NEXT: ret i32 -1073
+define i32 @test_constant_fold_frexp_exp_f64_min_num() nounwind {
+ %val = call i32 @llvm.amdgcn.frexp.exp.f64(double 4.940656e-324)
+ ret i32 %val
+}
+
diff --git a/test/Transforms/InstCombine/and-compare.ll b/test/Transforms/InstCombine/and-compare.ll
index 53ea81d1c0d4..e97d74c5a03b 100644
--- a/test/Transforms/InstCombine/and-compare.ll
+++ b/test/Transforms/InstCombine/and-compare.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN: FileCheck %s
+; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/InstCombine/and-fcmp.ll b/test/Transforms/InstCombine/and-fcmp.ll
index a398307f869e..553533392c11 100644
--- a/test/Transforms/InstCombine/and-fcmp.ll
+++ b/test/Transforms/InstCombine/and-fcmp.ll
@@ -1,100 +1,1483 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-define zeroext i8 @t1(float %x, float %y) nounwind {
- %a = fcmp ueq float %x, %y
- %b = fcmp ord float %x, %y
- %c = and i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-; CHECK: t1
-; CHECK: fcmp oeq float %x, %y
-; CHECK-NOT: fcmp ueq float %x, %y
-; CHECK-NOT: fcmp ord float %x, %y
-; CHECK-NOT: and
-}
-
-define zeroext i8 @t2(float %x, float %y) nounwind {
- %a = fcmp olt float %x, %y
- %b = fcmp ord float %x, %y
- %c = and i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-; CHECK: t2
-; CHECK: fcmp olt float %x, %y
-; CHECK-NOT: fcmp ord float %x, %y
-; CHECK-NOT: and
-}
-
-define zeroext i8 @t3(float %x, float %y) nounwind {
- %a = fcmp oge float %x, %y
- %b = fcmp uno float %x, %y
- %c = and i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-; CHECK: t3
-; CHECK: ret i8 0
-}
-
-define zeroext i8 @t4(float %x, float %y) nounwind {
- %a = fcmp one float %y, %x
- %b = fcmp ord float %x, %y
- %c = and i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-; CHECK: t4
-; CHECK: fcmp one float %y, %x
-; CHECK-NOT: fcmp ord float %x, %y
-; CHECK-NOT: and
-}
-
-define zeroext i8 @t5(float %x, float %y) nounwind {
- %a = fcmp ord float %x, %y
- %b = fcmp uno float %x, %y
- %c = and i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-; CHECK: t5
-; CHECK: ret i8 0
-}
-
-define zeroext i8 @t6(float %x, float %y) nounwind {
- %a = fcmp uno float %x, %y
- %b = fcmp ord float %x, %y
- %c = and i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-; CHECK: t6
-; CHECK: ret i8 0
-}
-
-define zeroext i8 @t7(float %x, float %y) nounwind {
- %a = fcmp uno float %x, %y
- %b = fcmp ult float %x, %y
- %c = and i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-; CHECK: t7
-; CHECK: fcmp uno
-; CHECK-NOT: fcmp ult
-}
-
; PR15737
define i1 @t8(float %a, double %b) {
+; CHECK-LABEL: @t8(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ord float %a, 0.000000e+00
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord double %b, 0.000000e+00
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], [[CMP1]]
+; CHECK-NEXT: ret i1 [[AND]]
+;
%cmp = fcmp ord float %a, 0.000000e+00
%cmp1 = fcmp ord double %b, 0.000000e+00
%and = and i1 %cmp, %cmp1
ret i1 %and
-; CHECK: t8
-; CHECK: fcmp ord
-; CHECK: fcmp ord
}
define <2 x i1> @t9(<2 x float> %a, <2 x double> %b) {
+; CHECK-LABEL: @t9(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ord <2 x float> %a, zeroinitializer
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord <2 x double> %b, zeroinitializer
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i1> [[CMP]], [[CMP1]]
+; CHECK-NEXT: ret <2 x i1> [[AND]]
+;
%cmp = fcmp ord <2 x float> %a, zeroinitializer
%cmp1 = fcmp ord <2 x double> %b, zeroinitializer
%and = and <2 x i1> %cmp, %cmp1
ret <2 x i1> %and
-; CHECK: t9
-; CHECK: fcmp ord
-; CHECK: fcmp ord
+}
+
+define i1 @auto_gen_0(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_0(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp false double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_1(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_1(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp oeq double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_2(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_2(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oeq double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_3(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_3(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ogt double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_4(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_4(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ogt double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_5(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_5(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ogt double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_6(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_6(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_7(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_7(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_8(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_8(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_9(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_9(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_10(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_10(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_11(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_11(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_12(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_12(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_13(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_13(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_14(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_14(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_15(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_15(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_16(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_16(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_17(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_17(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_18(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_18(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_19(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_19(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_20(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_20(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_21(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_21(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_22(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_22(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_23(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_23(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_24(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_24(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_25(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_25(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_26(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_26(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_27(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_27(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_28(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_28(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_29(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_29(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_30(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_30(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_31(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_31(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_32(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_32(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_33(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_33(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_34(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_34(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_35(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_35(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_36(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_36(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_37(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_37(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_38(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_38(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_39(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_39(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_40(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_40(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_41(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_41(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_42(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_42(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_43(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_43(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_44(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_44(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_45(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_45(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_46(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_46(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_47(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_47(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_48(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_48(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_49(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_49(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_50(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_50(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_51(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_51(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_52(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_52(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_53(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_53(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_54(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_54(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_55(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_55(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_56(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_56(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_57(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_57(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_58(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_58(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_59(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_59(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_60(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_60(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_61(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_61(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_62(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_62(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_63(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_63(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_64(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_64(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_65(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_65(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_66(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_66(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_67(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_67(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_68(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_68(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_69(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_69(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_70(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_70(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_71(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_71(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_72(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_72(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_73(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_73(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_74(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_74(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_75(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_75(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_76(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_76(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_77(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_77(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_78(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_78(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_79(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_79(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_80(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_80(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_81(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_81(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_82(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_82(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_83(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_83(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_84(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_84(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_85(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_85(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_86(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_86(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_87(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_87(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_88(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_88(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_89(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_89(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_90(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_90(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_91(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_91(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_92(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_92(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_93(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_93(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_94(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_94(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_95(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_95(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_96(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_96(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_97(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_97(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_98(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_98(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_99(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_99(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_100(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_100(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_101(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_101(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_102(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_102(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_103(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_103(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_104(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_104(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp une double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_105(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_105(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_106(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_106(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_107(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_107(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_108(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_108(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_109(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_109(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_110(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_110(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_111(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_111(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_112(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_112(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_113(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_113(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_114(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_114(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_115(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_115(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_116(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_116(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_117(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_117(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_118(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_118(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp une double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_119(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_119(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp uno double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_120(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_120(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_121(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_121(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_122(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_122(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_123(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_123(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_124(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_124(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_125(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_125(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_126(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_126(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_127(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_127(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_128(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_128(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_129(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_129(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_130(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_130(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_131(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_131(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_132(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_132(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_133(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_133(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp une double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_134(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_134(
+; CHECK-NEXT: [[CMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[CMP1]]
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp uno double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_135(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_135(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp true double %a, %b
+ %retval = and i1 %cmp, %cmp1
+ ret i1 %retval
}
diff --git a/test/Transforms/InstCombine/and-or-icmps.ll b/test/Transforms/InstCombine/and-or-icmps.ll
new file mode 100644
index 000000000000..3903472e9119
--- /dev/null
+++ b/test/Transforms/InstCombine/and-or-icmps.ll
@@ -0,0 +1,53 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i1 @PR1817_1(i32 %X) {
+; CHECK-LABEL: @PR1817_1(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i32 %X, 10
+; CHECK-NEXT: [[B:%.*]] = icmp ult i32 %X, 10
+; CHECK-NEXT: [[C:%.*]] = and i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %A = icmp slt i32 %X, 10
+ %B = icmp ult i32 %X, 10
+ %C = and i1 %A, %B
+ ret i1 %C
+}
+
+define i1 @PR1817_2(i32 %X) {
+; CHECK-LABEL: @PR1817_2(
+; CHECK-NEXT: [[A:%.*]] = icmp slt i32 %X, 10
+; CHECK-NEXT: [[B:%.*]] = icmp ult i32 %X, 10
+; CHECK-NEXT: [[C:%.*]] = or i1 [[A]], [[B]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %A = icmp slt i32 %X, 10
+ %B = icmp ult i32 %X, 10
+ %C = or i1 %A, %B
+ ret i1 %C
+}
+
+define i1 @PR2330(i32 %a, i32 %b) {
+; CHECK-LABEL: @PR2330(
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %b, %a
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 8
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %cmp1 = icmp ult i32 %a, 8
+ %cmp2 = icmp ult i32 %b, 8
+ %and = and i1 %cmp2, %cmp1
+ ret i1 %and
+}
+
+define i1 @test(i32 %tmp1030) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: [[TMP1030_OFF:%.*]] = add i32 %tmp1030, -39
+; CHECK-NEXT: [[TMP1030_CMP:%.*]] = icmp ugt i32 [[TMP1030_OFF]], 1
+; CHECK-NEXT: ret i1 [[TMP1030_CMP]]
+;
+ %tmp1037 = icmp ne i32 %tmp1030, 39
+ %tmp1039 = icmp ne i32 %tmp1030, 40
+ %tmp1042 = and i1 %tmp1037, %tmp1039
+ ret i1 %tmp1042
+}
+
diff --git a/test/Transforms/InstCombine/and-or-not.ll b/test/Transforms/InstCombine/and-or-not.ll
index cc661d50c896..144e42e74868 100644
--- a/test/Transforms/InstCombine/and-or-not.ll
+++ b/test/Transforms/InstCombine/and-or-not.ll
@@ -1,46 +1,56 @@
-; RUN: opt < %s -instcombine -S | grep xor | count 4
-; RUN: opt < %s -instcombine -S | not grep and
-; RUN: opt < %s -instcombine -S | not grep " or"
+; RUN: opt < %s -instcombine -S | FileCheck %s
; PR1510
; These are all equivalent to A^B
define i32 @test1(i32 %a, i32 %b) {
-entry:
%tmp3 = or i32 %b, %a ; <i32> [#uses=1]
%tmp3not = xor i32 %tmp3, -1 ; <i32> [#uses=1]
%tmp6 = and i32 %b, %a ; <i32> [#uses=1]
%tmp7 = or i32 %tmp6, %tmp3not ; <i32> [#uses=1]
%tmp7not = xor i32 %tmp7, -1 ; <i32> [#uses=1]
ret i32 %tmp7not
+
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[TMP7NOT:%.*]] = xor i32 %b, %a
+; CHECK-NEXT: ret i32 [[TMP7NOT]]
}
define i32 @test2(i32 %a, i32 %b) {
-entry:
%tmp3 = or i32 %b, %a ; <i32> [#uses=1]
%tmp6 = and i32 %b, %a ; <i32> [#uses=1]
%tmp6not = xor i32 %tmp6, -1 ; <i32> [#uses=1]
%tmp7 = and i32 %tmp3, %tmp6not ; <i32> [#uses=1]
ret i32 %tmp7
+
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[TMP7:%.*]] = xor i32 %b, %a
+; CHECK-NEXT: ret i32 [[TMP7]]
}
define <4 x i32> @test3(<4 x i32> %a, <4 x i32> %b) {
-entry:
%tmp3 = or <4 x i32> %a, %b ; <<4 x i32>> [#uses=1]
%tmp3not = xor <4 x i32> %tmp3, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%tmp6 = and <4 x i32> %a, %b ; <<4 x i32>> [#uses=1]
%tmp7 = or <4 x i32> %tmp6, %tmp3not ; <<4 x i32>> [#uses=1]
%tmp7not = xor <4 x i32> %tmp7, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp7not
+
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[TMP7NOT:%.*]] = xor <4 x i32> %a, %b
+; CHECK-NEXT: ret <4 x i32> [[TMP7NOT]]
}
define <4 x i32> @test4(<4 x i32> %a, <4 x i32> %b) {
-entry:
%tmp3 = or <4 x i32> %a, %b ; <<4 x i32>> [#uses=1]
%tmp6 = and <4 x i32> %a, %b ; <<4 x i32>> [#uses=1]
%tmp6not = xor <4 x i32> %tmp6, < i32 -1, i32 -1, i32 -1, i32 -1 > ; <<4 x i32>> [#uses=1]
%tmp7 = and <4 x i32> %tmp3, %tmp6not ; <<4 x i32>> [#uses=1]
ret <4 x i32> %tmp7
+
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i32> %a, %b
+; CHECK-NEXT: ret <4 x i32> [[TMP7]]
}
diff --git a/test/Transforms/InstCombine/and-or.ll b/test/Transforms/InstCombine/and-or.ll
index 0ae12a36c2b8..2ad90ad98e7c 100644
--- a/test/Transforms/InstCombine/and-or.ll
+++ b/test/Transforms/InstCombine/and-or.ll
@@ -1,39 +1,55 @@
-; RUN: opt < %s -instcombine -S | grep "and i32 %a, 1" | count 4
-; RUN: opt < %s -instcombine -S | grep "or i32 %0, %b" | count 4
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-
-define i32 @func1(i32 %a, i32 %b) nounwind readnone {
-entry:
- %0 = or i32 %b, %a ; <i32> [#uses=1]
- %1 = and i32 %0, 1 ; <i32> [#uses=1]
- %2 = and i32 %b, -2 ; <i32> [#uses=1]
- %3 = or i32 %1, %2 ; <i32> [#uses=1]
- ret i32 %3
+define i32 @func1(i32 %a, i32 %b) {
+; CHECK-LABEL: @func1(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %tmp = or i32 %b, %a
+ %tmp1 = and i32 %tmp, 1
+ %tmp2 = and i32 %b, -2
+ %tmp3 = or i32 %tmp1, %tmp2
+ ret i32 %tmp3
}
-define i32 @func2(i32 %a, i32 %b) nounwind readnone {
-entry:
- %0 = or i32 %a, %b ; <i32> [#uses=1]
- %1 = and i32 1, %0 ; <i32> [#uses=1]
- %2 = and i32 -2, %b ; <i32> [#uses=1]
- %3 = or i32 %1, %2 ; <i32> [#uses=1]
- ret i32 %3
+define i32 @func2(i32 %a, i32 %b) {
+; CHECK-LABEL: @func2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %tmp = or i32 %a, %b
+ %tmp1 = and i32 1, %tmp
+ %tmp2 = and i32 -2, %b
+ %tmp3 = or i32 %tmp1, %tmp2
+ ret i32 %tmp3
}
-define i32 @func3(i32 %a, i32 %b) nounwind readnone {
-entry:
- %0 = or i32 %b, %a ; <i32> [#uses=1]
- %1 = and i32 %0, 1 ; <i32> [#uses=1]
- %2 = and i32 %b, -2 ; <i32> [#uses=1]
- %3 = or i32 %2, %1 ; <i32> [#uses=1]
- ret i32 %3
+define i32 @func3(i32 %a, i32 %b) {
+; CHECK-LABEL: @func3(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %tmp = or i32 %b, %a
+ %tmp1 = and i32 %tmp, 1
+ %tmp2 = and i32 %b, -2
+ %tmp3 = or i32 %tmp2, %tmp1
+ ret i32 %tmp3
}
-define i32 @func4(i32 %a, i32 %b) nounwind readnone {
-entry:
- %0 = or i32 %a, %b ; <i32> [#uses=1]
- %1 = and i32 1, %0 ; <i32> [#uses=1]
- %2 = and i32 -2, %b ; <i32> [#uses=1]
- %3 = or i32 %2, %1 ; <i32> [#uses=1]
- ret i32 %3
+define i32 @func4(i32 %a, i32 %b) {
+; CHECK-LABEL: @func4(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %a, 1
+; CHECK-NEXT: [[TMP3:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %tmp = or i32 %a, %b
+ %tmp1 = and i32 1, %tmp
+ %tmp2 = and i32 -2, %b
+ %tmp3 = or i32 %tmp2, %tmp1
+ ret i32 %tmp3
}
+
diff --git a/test/Transforms/InstCombine/and.ll b/test/Transforms/InstCombine/and.ll
index 3d36bfb404d1..43b80e3f91cf 100644
--- a/test/Transforms/InstCombine/and.ll
+++ b/test/Transforms/InstCombine/and.ll
@@ -1,255 +1,370 @@
-; This test makes sure that these instructions are properly eliminated.
-;
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-; RUN: opt < %s -instcombine -S | not grep and
+; There should be no 'and' instructions left in any test.
define i32 @test1(i32 %A) {
- ; zero result
- %B = and i32 %A, 0 ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i32 0
+;
+ %B = and i32 %A, 0
+ ret i32 %B
}
define i32 @test2(i32 %A) {
- ; noop
- %B = and i32 %A, -1 ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i32 %A
+;
+ %B = and i32 %A, -1
+ ret i32 %B
}
define i1 @test3(i1 %A) {
- ; always = false
- %B = and i1 %A, false ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: ret i1 false
+;
+ %B = and i1 %A, false
+ ret i1 %B
}
define i1 @test4(i1 %A) {
- ; noop
- %B = and i1 %A, true ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: ret i1 %A
+;
+ %B = and i1 %A, true
+ ret i1 %B
}
define i32 @test5(i32 %A) {
- %B = and i32 %A, %A ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: ret i32 %A
+;
+ %B = and i32 %A, %A
+ ret i32 %B
}
define i1 @test6(i1 %A) {
- %B = and i1 %A, %A ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: ret i1 %A
+;
+ %B = and i1 %A, %A
+ ret i1 %B
}
; A & ~A == 0
define i32 @test7(i32 %A) {
- %NotA = xor i32 %A, -1 ; <i32> [#uses=1]
- %B = and i32 %A, %NotA ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: ret i32 0
+;
+ %NotA = xor i32 %A, -1
+ %B = and i32 %A, %NotA
+ ret i32 %B
}
; AND associates
define i8 @test8(i8 %A) {
- %B = and i8 %A, 3 ; <i8> [#uses=1]
- %C = and i8 %B, 4 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: ret i8 0
+;
+ %B = and i8 %A, 3
+ %C = and i8 %B, 4
+ ret i8 %C
}
+; Test of sign bit, convert to setle %A, 0
define i1 @test9(i32 %A) {
- ; Test of sign bit, convert to setle %A, 0
- %B = and i32 %A, -2147483648 ; <i32> [#uses=1]
- %C = icmp ne i32 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 %A, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = and i32 %A, -2147483648
+ %C = icmp ne i32 %B, 0
+ ret i1 %C
}
+; Test of sign bit, convert to setle %A, 0
define i1 @test9a(i32 %A) {
- ; Test of sign bit, convert to setle %A, 0
- %B = and i32 %A, -2147483648 ; <i32> [#uses=1]
- %C = icmp ne i32 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test9a(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 %A, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = and i32 %A, -2147483648
+ %C = icmp ne i32 %B, 0
+ ret i1 %C
}
define i32 @test10(i32 %A) {
- %B = and i32 %A, 12 ; <i32> [#uses=1]
- %C = xor i32 %B, 15 ; <i32> [#uses=1]
- ; (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
- %D = and i32 %C, 1 ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: ret i32 1
+;
+ %B = and i32 %A, 12
+ %C = xor i32 %B, 15
+ ; (X ^ C1) & C2 --> (X & C2) ^ (C1&C2)
+ %D = and i32 %C, 1
+ ret i32 %D
}
define i32 @test11(i32 %A, i32* %P) {
- %B = or i32 %A, 3 ; <i32> [#uses=1]
- %C = xor i32 %B, 12 ; <i32> [#uses=2]
- ; additional use of C
- store i32 %C, i32* %P
- ; %C = and uint %B, 3 --> 3
- %D = and i32 %C, 3 ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: [[B:%.*]] = or i32 %A, 3
+; CHECK-NEXT: [[C:%.*]] = xor i32 [[B]], 12
+; CHECK-NEXT: store i32 [[C]], i32* %P, align 4
+; CHECK-NEXT: ret i32 3
+;
+ %B = or i32 %A, 3
+ %C = xor i32 %B, 12
+ ; additional use of C
+ store i32 %C, i32* %P
+ ; %C = and uint %B, 3 --> 3
+ %D = and i32 %C, 3
+ ret i32 %D
}
define i1 @test12(i32 %A, i32 %B) {
- %C1 = icmp ult i32 %A, %B ; <i1> [#uses=1]
- %C2 = icmp ule i32 %A, %B ; <i1> [#uses=1]
- ; (A < B) & (A <= B) === (A < B)
- %D = and i1 %C1, %C2 ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %A, %B
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %C1 = icmp ult i32 %A, %B
+ %C2 = icmp ule i32 %A, %B
+ ; (A < B) & (A <= B) === (A < B)
+ %D = and i1 %C1, %C2
+ ret i1 %D
}
define i1 @test13(i32 %A, i32 %B) {
- %C1 = icmp ult i32 %A, %B ; <i1> [#uses=1]
- %C2 = icmp ugt i32 %A, %B ; <i1> [#uses=1]
- ; (A < B) & (A > B) === false
- %D = and i1 %C1, %C2 ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: ret i1 false
+;
+ %C1 = icmp ult i32 %A, %B
+ %C2 = icmp ugt i32 %A, %B
+ ; (A < B) & (A > B) === false
+ %D = and i1 %C1, %C2
+ ret i1 %D
}
define i1 @test14(i8 %A) {
- %B = and i8 %A, -128 ; <i8> [#uses=1]
- %C = icmp ne i8 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i8 %A, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = and i8 %A, -128
+ %C = icmp ne i8 %B, 0
+ ret i1 %C
}
define i8 @test15(i8 %A) {
- %B = lshr i8 %A, 7 ; <i8> [#uses=1]
- ; Always equals zero
- %C = and i8 %B, 2 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: ret i8 0
+;
+ %B = lshr i8 %A, 7
+ ; Always equals zero
+ %C = and i8 %B, 2
+ ret i8 %C
}
define i8 @test16(i8 %A) {
- %B = shl i8 %A, 2 ; <i8> [#uses=1]
- %C = and i8 %B, 3 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: ret i8 0
+;
+ %B = shl i8 %A, 2
+ %C = and i8 %B, 3
+ ret i8 %C
}
;; ~(~X & Y) --> (X | ~Y)
define i8 @test17(i8 %X, i8 %Y) {
- %B = xor i8 %X, -1 ; <i8> [#uses=1]
- %C = and i8 %B, %Y ; <i8> [#uses=1]
- %D = xor i8 %C, -1 ; <i8> [#uses=1]
- ret i8 %D
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: [[Y_NOT:%.*]] = xor i8 %Y, -1
+; CHECK-NEXT: [[D:%.*]] = or i8 %X, [[Y_NOT]]
+; CHECK-NEXT: ret i8 [[D]]
+;
+ %B = xor i8 %X, -1
+ %C = and i8 %B, %Y
+ %D = xor i8 %C, -1
+ ret i8 %D
}
define i1 @test18(i32 %A) {
- %B = and i32 %A, -128 ; <i32> [#uses=1]
- ;; C >= 128
- %C = icmp ne i32 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 %A, 127
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = and i32 %A, -128
+ ;; C >= 128
+ %C = icmp ne i32 %B, 0
+ ret i1 %C
}
define i1 @test18a(i8 %A) {
- %B = and i8 %A, -2 ; <i8> [#uses=1]
- %C = icmp eq i8 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test18a(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 %A, 2
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = and i8 %A, -2
+ %C = icmp eq i8 %B, 0
+ ret i1 %C
}
define i32 @test19(i32 %A) {
- %B = shl i32 %A, 3 ; <i32> [#uses=1]
- ;; Clearing a zero bit
- %C = and i32 %B, -2 ; <i32> [#uses=1]
- ret i32 %C
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: [[B:%.*]] = shl i32 %A, 3
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %B = shl i32 %A, 3
+ ;; Clearing a zero bit
+ %C = and i32 %B, -2
+ ret i32 %C
}
define i8 @test20(i8 %A) {
- %C = lshr i8 %A, 7 ; <i8> [#uses=1]
- ;; Unneeded
- %D = and i8 %C, 1 ; <i8> [#uses=1]
- ret i8 %D
-}
-
-define i1 @test22(i32 %A) {
- %B = icmp eq i32 %A, 1 ; <i1> [#uses=1]
- %C = icmp sge i32 %A, 3 ; <i1> [#uses=1]
- ;; false
- %D = and i1 %B, %C ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: [[C:%.*]] = lshr i8 %A, 7
+; CHECK-NEXT: ret i8 [[C]]
+;
+ %C = lshr i8 %A, 7
+ ;; Unneeded
+ %D = and i8 %C, 1
+ ret i8 %D
}
define i1 @test23(i32 %A) {
- %B = icmp sgt i32 %A, 1 ; <i1> [#uses=1]
- %C = icmp sle i32 %A, 2 ; <i1> [#uses=1]
- ;; A == 2
- %D = and i1 %B, %C ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %A, 2
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %B = icmp sgt i32 %A, 1
+ %C = icmp sle i32 %A, 2
+ ;; A == 2
+ %D = and i1 %B, %C
+ ret i1 %D
}
define i1 @test24(i32 %A) {
- %B = icmp sgt i32 %A, 1 ; <i1> [#uses=1]
- %C = icmp ne i32 %A, 2 ; <i1> [#uses=1]
- ;; A > 2
- %D = and i1 %B, %C ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 %A, 2
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %B = icmp sgt i32 %A, 1
+ %C = icmp ne i32 %A, 2
+ ;; A > 2
+ %D = and i1 %B, %C
+ ret i1 %D
}
define i1 @test25(i32 %A) {
- %B = icmp sge i32 %A, 50 ; <i1> [#uses=1]
- %C = icmp slt i32 %A, 100 ; <i1> [#uses=1]
- ;; (A-50) <u 50
- %D = and i1 %B, %C ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: [[A_OFF:%.*]] = add i32 %A, -50
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A_OFF]], 50
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %B = icmp sge i32 %A, 50
+ %C = icmp slt i32 %A, 100
+ ;; (A-50) <u 50
+ %D = and i1 %B, %C
+ ret i1 %D
}
define i1 @test26(i32 %A) {
- %B = icmp ne i32 %A, 49 ; <i1> [#uses=1]
- %C = icmp ne i32 %A, 50 ; <i1> [#uses=1]
- ;; (A-49) > 1
- %D = and i1 %B, %C ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: [[A_OFF:%.*]] = add i32 %A, -49
+; CHECK-NEXT: [[A_CMP:%.*]] = icmp ugt i32 [[A_OFF]], 1
+; CHECK-NEXT: ret i1 [[A_CMP]]
+;
+ %B = icmp ne i32 %A, 49
+ %C = icmp ne i32 %A, 50
+ ;; (A-49) > 1
+ %D = and i1 %B, %C
+ ret i1 %D
}
define i8 @test27(i8 %A) {
- %B = and i8 %A, 4 ; <i8> [#uses=1]
- %C = sub i8 %B, 16 ; <i8> [#uses=1]
- ;; 0xF0
- %D = and i8 %C, -16 ; <i8> [#uses=1]
- %E = add i8 %D, 16 ; <i8> [#uses=1]
- ret i8 %E
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: ret i8 0
+;
+ %B = and i8 %A, 4
+ %C = sub i8 %B, 16
+ ;; 0xF0
+ %D = and i8 %C, -16
+ %E = add i8 %D, 16
+ ret i8 %E
}
;; This is juse a zero extending shr.
define i32 @test28(i32 %X) {
- ;; Sign extend
- %Y = ashr i32 %X, 24 ; <i32> [#uses=1]
- ;; Mask out sign bits
- %Z = and i32 %Y, 255 ; <i32> [#uses=1]
- ret i32 %Z
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: [[Y1:%.*]] = lshr i32 %X, 24
+; CHECK-NEXT: ret i32 [[Y1]]
+;
+ ;; Sign extend
+ %Y = ashr i32 %X, 24
+ ;; Mask out sign bits
+ %Z = and i32 %Y, 255
+ ret i32 %Z
}
define i32 @test29(i8 %X) {
- %Y = zext i8 %X to i32 ; <i32> [#uses=1]
- ;; Zero extend makes this unneeded.
- %Z = and i32 %Y, 255 ; <i32> [#uses=1]
- ret i32 %Z
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: [[Y:%.*]] = zext i8 %X to i32
+; CHECK-NEXT: ret i32 [[Y]]
+;
+ %Y = zext i8 %X to i32
+ ;; Zero extend makes this unneeded.
+ %Z = and i32 %Y, 255
+ ret i32 %Z
}
define i32 @test30(i1 %X) {
- %Y = zext i1 %X to i32 ; <i32> [#uses=1]
- %Z = and i32 %Y, 1 ; <i32> [#uses=1]
- ret i32 %Z
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: [[Y:%.*]] = zext i1 %X to i32
+; CHECK-NEXT: ret i32 [[Y]]
+;
+ %Y = zext i1 %X to i32
+ %Z = and i32 %Y, 1
+ ret i32 %Z
}
define i32 @test31(i1 %X) {
- %Y = zext i1 %X to i32 ; <i32> [#uses=1]
- %Z = shl i32 %Y, 4 ; <i32> [#uses=1]
- %A = and i32 %Z, 16 ; <i32> [#uses=1]
- ret i32 %A
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: [[Y:%.*]] = zext i1 %X to i32
+; CHECK-NEXT: [[Z:%.*]] = shl nuw nsw i32 [[Y]], 4
+; CHECK-NEXT: ret i32 [[Z]]
+;
+ %Y = zext i1 %X to i32
+ %Z = shl i32 %Y, 4
+ %A = and i32 %Z, 16
+ ret i32 %A
}
define i32 @test32(i32 %In) {
- %Y = and i32 %In, 16 ; <i32> [#uses=1]
- %Z = lshr i32 %Y, 2 ; <i32> [#uses=1]
- %A = and i32 %Z, 1 ; <i32> [#uses=1]
- ret i32 %A
+; CHECK-LABEL: @test32(
+; CHECK-NEXT: ret i32 0
+;
+ %Y = and i32 %In, 16
+ %Z = lshr i32 %Y, 2
+ %A = and i32 %Z, 1
+ ret i32 %A
}
;; Code corresponding to one-bit bitfield ^1.
define i32 @test33(i32 %b) {
- %tmp.4.mask = and i32 %b, 1 ; <i32> [#uses=1]
- %tmp.10 = xor i32 %tmp.4.mask, 1 ; <i32> [#uses=1]
- %tmp.12 = and i32 %b, -2 ; <i32> [#uses=1]
- %tmp.13 = or i32 %tmp.12, %tmp.10 ; <i32> [#uses=1]
- ret i32 %tmp.13
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: [[TMP_13:%.*]] = xor i32 %b, 1
+; CHECK-NEXT: ret i32 [[TMP_13]]
+;
+ %tmp.4.mask = and i32 %b, 1
+ %tmp.10 = xor i32 %tmp.4.mask, 1
+ %tmp.12 = and i32 %b, -2
+ %tmp.13 = or i32 %tmp.12, %tmp.10
+ ret i32 %tmp.13
}
define i32 @test34(i32 %A, i32 %B) {
- %tmp.2 = or i32 %B, %A ; <i32> [#uses=1]
- %tmp.4 = and i32 %tmp.2, %B ; <i32> [#uses=1]
- ret i32 %tmp.4
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: ret i32 %B
+;
+ %tmp.2 = or i32 %B, %A
+ %tmp.4 = and i32 %tmp.2, %B
+ ret i32 %tmp.4
}
diff --git a/test/Transforms/InstCombine/and2.ll b/test/Transforms/InstCombine/and2.ll
index 326bfda38553..49ce38421374 100644
--- a/test/Transforms/InstCombine/and2.ll
+++ b/test/Transforms/InstCombine/and2.ll
@@ -1,43 +1,45 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; PR1738
define i1 @test1(double %X, double %Y) {
- %tmp9 = fcmp ord double %X, 0.000000e+00
- %tmp13 = fcmp ord double %Y, 0.000000e+00
- %bothcond = and i1 %tmp13, %tmp9
- ret i1 %bothcond
-; CHECK: fcmp ord double %Y, %X
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %Y, %X
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %tmp9 = fcmp ord double %X, 0.000000e+00
+ %tmp13 = fcmp ord double %Y, 0.000000e+00
+ %bothcond = and i1 %tmp13, %tmp9
+ ret i1 %bothcond
}
define i1 @test2(i1 %X, i1 %Y) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[B:%.*]] = and i1 %X, %Y
+; CHECK-NEXT: ret i1 [[B]]
+;
%a = and i1 %X, %Y
%b = and i1 %a, %X
ret i1 %b
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: and i1 %X, %Y
-; CHECK-NEXT: ret
}
define i32 @test3(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[B:%.*]] = and i32 %X, %Y
+; CHECK-NEXT: ret i32 [[B]]
+;
%a = and i32 %X, %Y
%b = and i32 %Y, %a
ret i32 %b
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: and i32 %X, %Y
-; CHECK-NEXT: ret
-}
-
-define i1 @test4(i32 %X) {
- %a = icmp ult i32 %X, 31
- %b = icmp slt i32 %X, 0
- %c = and i1 %a, %b
- ret i1 %c
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: ret i1 false
}
; Make sure we don't go into an infinite loop with this test
define <4 x i32> @test5(<4 x i32> %A) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: [[TMP2:%.*]] = and <4 x i32> [[TMP1]], <i32 1, i32 2, i32 3, i32 4>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
%1 = xor <4 x i32> %A, <i32 1, i32 2, i32 3, i32 4>
%2 = and <4 x i32> <i32 1, i32 2, i32 3, i32 4>, %1
ret <4 x i32> %2
@@ -46,8 +48,11 @@ define <4 x i32> @test5(<4 x i32> %A) {
; Check that we combine "if x!=0 && x!=-1" into "if x+1u>1"
define i32 @test6(i64 %x) nounwind {
; CHECK-LABEL: @test6(
-; CHECK-NEXT: add i64 %x, 1
-; CHECK-NEXT: icmp ugt i64 %x.off, 1
+; CHECK-NEXT: [[X_OFF:%.*]] = add i64 %x, 1
+; CHECK-NEXT: [[X_CMP:%.*]] = icmp ugt i64 [[X_OFF]], 1
+; CHECK-NEXT: [[LAND_EXT:%.*]] = zext i1 [[X_CMP]] to i32
+; CHECK-NEXT: ret i32 [[LAND_EXT]]
+;
%cmp1 = icmp ne i64 %x, -1
%not.cmp = icmp ne i64 %x, 0
%.cmp1 = and i1 %cmp1, %not.cmp
@@ -57,9 +62,10 @@ define i32 @test6(i64 %x) nounwind {
define i1 @test7(i32 %i, i1 %b) {
; CHECK-LABEL: @test7(
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %i, 0
-; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP]], %b
-; CHECK-NEXT: ret i1 [[AND]]
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %i, 0
+; CHECK-NEXT: [[TMP2:%.*]] = and i1 [[TMP1]], %b
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
%cmp1 = icmp slt i32 %i, 1
%cmp2 = icmp sgt i32 %i, -1
%and1 = and i1 %cmp1, %b
@@ -69,9 +75,10 @@ define i1 @test7(i32 %i, i1 %b) {
define i1 @test8(i32 %i) {
; CHECK-LABEL: @test8(
-; CHECK-NEXT: [[DEC:%.*]] = add i32 %i, -1
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 [[DEC]], 13
-; CHECK-NEXT: ret i1 [[CMP]]
+; CHECK-NEXT: [[I_OFF:%.*]] = add i32 %i, -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[I_OFF]], 13
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%cmp1 = icmp ne i32 %i, 0
%cmp2 = icmp ult i32 %i, 14
%cond = and i1 %cmp1, %cmp2
@@ -81,10 +88,9 @@ define i1 @test8(i32 %i) {
; combine -x & 1 into x & 1
define i64 @test9(i64 %x) {
; CHECK-LABEL: @test9(
-; CHECK-NOT: %sub = sub nsw i64 0, %x
-; CHECK-NOT: %and = and i64 %sub, 1
-; CHECK-NEXT: %and = and i64 %x, 1
-; CHECK-NEXT: ret i64 %and
+; CHECK-NEXT: [[AND:%.*]] = and i64 %x, 1
+; CHECK-NEXT: ret i64 [[AND]]
+;
%sub = sub nsw i64 0, %x
%and = and i64 %sub, 1
ret i64 %and
@@ -92,56 +98,13 @@ define i64 @test9(i64 %x) {
define i64 @test10(i64 %x) {
; CHECK-LABEL: @test10(
-; CHECK-NOT: %sub = sub nsw i64 0, %x
-; CHECK-NEXT: %and = and i64 %x, 1
-; CHECK-NOT: %add = add i64 %sub, %and
-; CHECK-NEXT: %add = sub i64 %and, %x
-; CHECK-NEXT: ret i64 %add
+; CHECK-NEXT: [[AND:%.*]] = and i64 %x, 1
+; CHECK-NEXT: [[ADD:%.*]] = sub i64 [[AND]], %x
+; CHECK-NEXT: ret i64 [[ADD]]
+;
%sub = sub nsw i64 0, %x
%and = and i64 %sub, 1
%add = add i64 %sub, %and
ret i64 %add
}
-define i64 @fabs_double(double %x) {
-; CHECK-LABEL: @fabs_double(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %and = bitcast double %fabs to i64
-; CHECK-NEXT: ret i64 %and
- %bc = bitcast double %x to i64
- %and = and i64 %bc, 9223372036854775807
- ret i64 %and
-}
-
-define i64 @fabs_double_swap(double %x) {
-; CHECK-LABEL: @fabs_double_swap(
-; CHECK-NEXT: %fabs = call double @llvm.fabs.f64(double %x)
-; CHECK-NEXT: %and = bitcast double %fabs to i64
-; CHECK-NEXT: ret i64 %and
- %bc = bitcast double %x to i64
- %and = and i64 9223372036854775807, %bc
- ret i64 %and
-}
-
-define i32 @fabs_float(float %x) {
-; CHECK-LABEL: @fabs_float(
-; CHECK-NEXT: %fabs = call float @llvm.fabs.f32(float %x)
-; CHECK-NEXT: %and = bitcast float %fabs to i32
-; CHECK-NEXT: ret i32 %and
- %bc = bitcast float %x to i32
- %and = and i32 %bc, 2147483647
- ret i32 %and
-}
-
-; Make sure that only a bitcast is transformed.
-
-define i64 @fabs_double_not_bitcast(double %x) {
-; CHECK-LABEL: @fabs_double_not_bitcast(
-; CHECK-NEXT: %bc = fptoui double %x to i64
-; CHECK-NEXT: %and = and i64 %bc, 9223372036854775807
-; CHECK-NEXT: ret i64 %and
- %bc = fptoui double %x to i64
- %and = and i64 %bc, 9223372036854775807
- ret i64 %and
-}
-
diff --git a/test/Transforms/InstCombine/apint-add.ll b/test/Transforms/InstCombine/apint-add.ll
new file mode 100644
index 000000000000..6740ae66aef1
--- /dev/null
+++ b/test/Transforms/InstCombine/apint-add.ll
@@ -0,0 +1,116 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Tests for Integer BitWidth <= 64 && BitWidth % 8 != 0.
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i1 @test1(i1 %x) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i1 %x
+;
+ %tmp.2 = xor i1 %x, 1
+ %tmp.4 = add i1 %tmp.2, 1
+ ret i1 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i47 @test2(i47 %x) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i47 %x
+;
+ %tmp.2 = xor i47 %x, 70368744177664
+ %tmp.4 = add i47 %tmp.2, 70368744177664
+ ret i47 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i15 @test3(i15 %x) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: ret i15 %x
+;
+ %tmp.2 = xor i15 %x, 16384
+ %tmp.4 = add i15 %tmp.2, 16384
+ ret i15 %tmp.4
+}
+
+; X + signbit --> X ^ signbit
+define <2 x i5> @test3vec(<2 x i5> %x) {
+; CHECK-LABEL: @test3vec(
+; CHECK-NEXT: [[Y:%.*]] = xor <2 x i5> %x, <i5 -16, i5 -16>
+; CHECK-NEXT: ret <2 x i5> [[Y]]
+;
+ %y = add <2 x i5> %x, <i5 16, i5 16>
+ ret <2 x i5> %y
+}
+
+;; (x & 0b1111..0) + 1 -> x | 1
+define i49 @test4(i49 %x) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[TMP_4:%.*]] = or i49 %x, 1
+; CHECK-NEXT: ret i49 [[TMP_4]]
+;
+ %tmp.2 = and i49 %x, 562949953421310
+ %tmp.4 = add i49 %tmp.2, 1
+ ret i49 %tmp.4
+}
+
+; Tests for Integer BitWidth > 64 && BitWidth <= 1024.
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i111 @test5(i111 %x) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: ret i111 %x
+;
+ %tmp.2 = shl i111 1, 110
+ %tmp.4 = xor i111 %x, %tmp.2
+ %tmp.6 = add i111 %tmp.4, %tmp.2
+ ret i111 %tmp.6
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i65 @test6(i65 %x) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: ret i65 %x
+;
+ %tmp.0 = shl i65 1, 64
+ %tmp.2 = xor i65 %x, %tmp.0
+ %tmp.4 = add i65 %tmp.2, %tmp.0
+ ret i65 %tmp.4
+}
+
+;; Flip sign bit then add INT_MIN -> nop.
+define i1024 @test7(i1024 %x) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: ret i1024 %x
+;
+ %tmp.0 = shl i1024 1, 1023
+ %tmp.2 = xor i1024 %x, %tmp.0
+ %tmp.4 = add i1024 %tmp.2, %tmp.0
+ ret i1024 %tmp.4
+}
+
+;; If we have add(xor(X, 0xF..F80..), 0x80..), it's an xor.
+define i128 @test8(i128 %x) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[TMP_4:%.*]] = xor i128 %x, 170141183460469231731687303715884105600
+; CHECK-NEXT: ret i128 [[TMP_4]]
+;
+ %tmp.5 = shl i128 1, 127
+ %tmp.1 = ashr i128 %tmp.5, 120
+ %tmp.2 = xor i128 %x, %tmp.1
+ %tmp.4 = add i128 %tmp.2, %tmp.5
+ ret i128 %tmp.4
+}
+
+;; (x & 254)+1 -> (x & 254)|1
+define i77 @test9(i77 %x) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: [[TMP_2:%.*]] = and i77 %x, 562949953421310
+; CHECK-NEXT: [[TMP_4:%.*]] = or i77 [[TMP_2]], 1
+; CHECK-NEXT: ret i77 [[TMP_4]]
+;
+ %tmp.2 = and i77 %x, 562949953421310
+ %tmp.4 = add i77 %tmp.2, 1
+ ret i77 %tmp.4
+}
+
diff --git a/test/Transforms/InstCombine/apint-add1.ll b/test/Transforms/InstCombine/apint-add1.ll
deleted file mode 100644
index 02f1baf53996..000000000000
--- a/test/Transforms/InstCombine/apint-add1.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; This test makes sure that add instructions are properly eliminated.
-; This test is for Integer BitWidth <= 64 && BitWidth % 8 != 0.
-
-; RUN: opt < %s -instcombine -S | \
-; RUN: grep -v OK | not grep add
-
-
-define i1 @test1(i1 %x) {
- %tmp.2 = xor i1 %x, 1
- ;; Add of sign bit -> xor of sign bit.
- %tmp.4 = add i1 %tmp.2, 1
- ret i1 %tmp.4
-}
-
-define i47 @test2(i47 %x) {
- %tmp.2 = xor i47 %x, 70368744177664
- ;; Add of sign bit -> xor of sign bit.
- %tmp.4 = add i47 %tmp.2, 70368744177664
- ret i47 %tmp.4
-}
-
-define i15 @test3(i15 %x) {
- %tmp.2 = xor i15 %x, 16384
- ;; Add of sign bit -> xor of sign bit.
- %tmp.4 = add i15 %tmp.2, 16384
- ret i15 %tmp.4
-}
-
-define i49 @test6(i49 %x) {
- ;; (x & 254)+1 -> (x & 254)|1
- %tmp.2 = and i49 %x, 562949953421310
- %tmp.4 = add i49 %tmp.2, 1
- ret i49 %tmp.4
-}
diff --git a/test/Transforms/InstCombine/apint-add2.ll b/test/Transforms/InstCombine/apint-add2.ll
deleted file mode 100644
index 913a70f1b458..000000000000
--- a/test/Transforms/InstCombine/apint-add2.ll
+++ /dev/null
@@ -1,46 +0,0 @@
-; This test makes sure that add instructions are properly eliminated.
-; This test is for Integer BitWidth > 64 && BitWidth <= 1024.
-
-; RUN: opt < %s -instcombine -S | \
-; RUN: grep -v OK | not grep add
-; END.
-
-define i111 @test1(i111 %x) {
- %tmp.2 = shl i111 1, 110
- %tmp.4 = xor i111 %x, %tmp.2
- ;; Add of sign bit -> xor of sign bit.
- %tmp.6 = add i111 %tmp.4, %tmp.2
- ret i111 %tmp.6
-}
-
-define i65 @test2(i65 %x) {
- %tmp.0 = shl i65 1, 64
- %tmp.2 = xor i65 %x, %tmp.0
- ;; Add of sign bit -> xor of sign bit.
- %tmp.4 = add i65 %tmp.2, %tmp.0
- ret i65 %tmp.4
-}
-
-define i1024 @test3(i1024 %x) {
- %tmp.0 = shl i1024 1, 1023
- %tmp.2 = xor i1024 %x, %tmp.0
- ;; Add of sign bit -> xor of sign bit.
- %tmp.4 = add i1024 %tmp.2, %tmp.0
- ret i1024 %tmp.4
-}
-
-define i128 @test4(i128 %x) {
- ;; If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext.
- %tmp.5 = shl i128 1, 127
- %tmp.1 = ashr i128 %tmp.5, 120
- %tmp.2 = xor i128 %x, %tmp.1
- %tmp.4 = add i128 %tmp.2, %tmp.5
- ret i128 %tmp.4
-}
-
-define i77 @test6(i77 %x) {
- ;; (x & 254)+1 -> (x & 254)|1
- %tmp.2 = and i77 %x, 562949953421310
- %tmp.4 = add i77 %tmp.2, 1
- ret i77 %tmp.4
-}
diff --git a/test/Transforms/InstCombine/apint-and-xor-merge.ll b/test/Transforms/InstCombine/apint-and-xor-merge.ll
index 8adffde36273..52633125048a 100644
--- a/test/Transforms/InstCombine/apint-and-xor-merge.ll
+++ b/test/Transforms/InstCombine/apint-and-xor-merge.ll
@@ -1,22 +1,31 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; This test case checks that the merge of and/xor can work on arbitrary
; precision integers.
-; RUN: opt < %s -instcombine -S | grep and | count 1
-; RUN: opt < %s -instcombine -S | grep xor | count 2
+; RUN: opt < %s -instcombine -S | FileCheck %s
; (x &z ) ^ (y & z) -> (x ^ y) & z
define i57 @test1(i57 %x, i57 %y, i57 %z) {
- %tmp3 = and i57 %z, %x
- %tmp6 = and i57 %z, %y
- %tmp7 = xor i57 %tmp3, %tmp6
- ret i57 %tmp7
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[TMP61:%.*]] = xor i57 %x, %y
+; CHECK-NEXT: [[TMP7:%.*]] = and i57 [[TMP61]], %z
+; CHECK-NEXT: ret i57 [[TMP7]]
+;
+ %tmp3 = and i57 %z, %x
+ %tmp6 = and i57 %z, %y
+ %tmp7 = xor i57 %tmp3, %tmp6
+ ret i57 %tmp7
}
; (x & y) ^ (x | y) -> x ^ y
define i23 @test2(i23 %x, i23 %y, i23 %z) {
- %tmp3 = and i23 %y, %x
- %tmp6 = or i23 %y, %x
- %tmp7 = xor i23 %tmp3, %tmp6
- ret i23 %tmp7
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[TMP7:%.*]] = xor i23 %y, %x
+; CHECK-NEXT: ret i23 [[TMP7]]
+;
+ %tmp3 = and i23 %y, %x
+ %tmp6 = or i23 %y, %x
+ %tmp7 = xor i23 %tmp3, %tmp6
+ ret i23 %tmp7
}
diff --git a/test/Transforms/InstCombine/apint-select.ll b/test/Transforms/InstCombine/apint-select.ll
index cf24a44d6288..2aae232f1b0c 100644
--- a/test/Transforms/InstCombine/apint-select.ll
+++ b/test/Transforms/InstCombine/apint-select.ll
@@ -1,45 +1,144 @@
-; This test makes sure that these instructions are properly eliminated.
-
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK-NOT: select
+; No selects should remain.
+
+define i41 @zext(i1 %C) {
+; CHECK-LABEL: @zext(
+; CHECK-NEXT: [[V:%.*]] = zext i1 %C to i41
+; CHECK-NEXT: ret i41 [[V]]
+;
+ %V = select i1 %C, i41 1, i41 0
+ ret i41 %V
+}
+
+define i41 @sext(i1 %C) {
+; CHECK-LABEL: @sext(
+; CHECK-NEXT: [[V:%.*]] = sext i1 %C to i41
+; CHECK-NEXT: ret i41 [[V]]
+;
+ %V = select i1 %C, i41 -1, i41 0
+ ret i41 %V
+}
+
+define i999 @not_zext(i1 %C) {
+; CHECK-LABEL: @not_zext(
+; CHECK-NEXT: [[TMP1:%.*]] = zext i1 %C to i999
+; CHECK-NEXT: [[V:%.*]] = xor i999 [[TMP1]], 1
+; CHECK-NEXT: ret i999 [[V]]
+;
+ %V = select i1 %C, i999 0, i999 1
+ ret i999 %V
+}
+
+define i999 @not_sext(i1 %C) {
+; CHECK-LABEL: @not_sext(
+; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 %C, true
+; CHECK-NEXT: [[V:%.*]] = sext i1 [[NOT_C]] to i999
+; CHECK-NEXT: ret i999 [[V]]
+;
+ %V = select i1 %C, i999 0, i999 -1
+ ret i999 %V
+}
+
+; Vector selects of vector splat constants match APInt too.
+
+define <2 x i41> @zext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @zext_vec(
+; CHECK-NEXT: [[V:%.*]] = zext <2 x i1> %C to <2 x i41>
+; CHECK-NEXT: ret <2 x i41> [[V]]
+;
+ %V = select <2 x i1> %C, <2 x i41> <i41 1, i41 1>, <2 x i41> <i41 0, i41 0>
+ ret <2 x i41> %V
+}
+
+define <2 x i32> @sext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @sext_vec(
+; CHECK-NEXT: [[V:%.*]] = sext <2 x i1> %C to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[V]]
+;
+ %V = select <2 x i1> %C, <2 x i32> <i32 -1, i32 -1>, <2 x i32> <i32 0, i32 0>
+ ret <2 x i32> %V
+}
-define i41 @test1(i1 %C) {
- %V = select i1 %C, i41 1, i41 0 ; V = C
- ret i41 %V
+define <2 x i999> @not_zext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @not_zext_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> %C to <2 x i999>
+; CHECK-NEXT: [[V:%.*]] = xor <2 x i999> [[TMP1]], <i999 1, i999 1>
+; CHECK-NEXT: ret <2 x i999> [[V]]
+;
+ %V = select <2 x i1> %C, <2 x i999> <i999 0, i999 0>, <2 x i999> <i999 1, i999 1>
+ ret <2 x i999> %V
}
-define i999 @test2(i1 %C) {
- %V = select i1 %C, i999 0, i999 1 ; V = C
- ret i999 %V
+define <2 x i64> @not_sext_vec(<2 x i1> %C) {
+; CHECK-LABEL: @not_sext_vec(
+; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> %C, <i1 true, i1 true>
+; CHECK-NEXT: [[V:%.*]] = sext <2 x i1> [[NOT_C]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[V]]
+;
+ %V = select <2 x i1> %C, <2 x i64> <i64 0, i64 0>, <2 x i64> <i64 -1, i64 -1>
+ ret <2 x i64> %V
+}
+
+; But don't touch this - we would need 3 instructions to extend and splat the scalar select condition.
+
+define <2 x i32> @scalar_select_of_vectors(i1 %c) {
+; CHECK-LABEL: @scalar_select_of_vectors(
+; CHECK-NEXT: [[V:%.*]] = select i1 %c, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+; CHECK-NEXT: ret <2 x i32> [[V]]
+;
+ %V = select i1 %c, <2 x i32> <i32 1, i32 1>, <2 x i32> zeroinitializer
+ ret <2 x i32> %V
}
+;; (x <s 0) ? -1 : 0 -> ashr x, 31
+
define i41 @test3(i41 %X) {
- ;; (x <s 0) ? -1 : 0 -> ashr x, 31
- %t = icmp slt i41 %X, 0
- %V = select i1 %t, i41 -1, i41 0
- ret i41 %V
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i41 %X, 40
+; CHECK-NEXT: ret i41 [[X_LOBIT]]
+;
+ %t = icmp slt i41 %X, 0
+ %V = select i1 %t, i41 -1, i41 0
+ ret i41 %V
}
+;; (x <s 0) ? -1 : 0 -> ashr x, 31
+
define i1023 @test4(i1023 %X) {
- ;; (x <s 0) ? -1 : 0 -> ashr x, 31
- %t = icmp slt i1023 %X, 0
- %V = select i1 %t, i1023 -1, i1023 0
- ret i1023 %V
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i1023 %X, 1022
+; CHECK-NEXT: ret i1023 [[X_LOBIT]]
+;
+ %t = icmp slt i1023 %X, 0
+ %V = select i1 %t, i1023 -1, i1023 0
+ ret i1023 %V
}
+;; ((X & 27) ? 27 : 0)
+
define i41 @test5(i41 %X) {
- ;; ((X & 27) ? 27 : 0)
- %Y = and i41 %X, 32
- %t = icmp ne i41 %Y, 0
- %V = select i1 %t, i41 32, i41 0
- ret i41 %V
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: [[Y:%.*]] = and i41 %X, 32
+; CHECK-NEXT: ret i41 [[Y]]
+;
+ %Y = and i41 %X, 32
+ %t = icmp ne i41 %Y, 0
+ %V = select i1 %t, i41 32, i41 0
+ ret i41 %V
}
+;; ((X & 27) ? 27 : 0)
+
define i1023 @test6(i1023 %X) {
- ;; ((X & 27) ? 27 : 0)
- %Y = and i1023 %X, 64
- %t = icmp ne i1023 %Y, 0
- %V = select i1 %t, i1023 64, i1023 0
- ret i1023 %V
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: [[Y:%.*]] = and i1023 %X, 64
+; CHECK-NEXT: ret i1023 [[Y]]
+;
+ %Y = and i1023 %X, 64
+ %t = icmp ne i1023 %Y, 0
+ %V = select i1 %t, i1023 64, i1023 0
+ ret i1023 %V
}
+
diff --git a/test/Transforms/InstCombine/assoc-cast-assoc.ll b/test/Transforms/InstCombine/assoc-cast-assoc.ll
new file mode 100644
index 000000000000..e410fde93e29
--- /dev/null
+++ b/test/Transforms/InstCombine/assoc-cast-assoc.ll
@@ -0,0 +1,77 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define i5 @XorZextXor(i3 %a) {
+; CHECK-LABEL: @XorZextXor(
+; CHECK-NEXT: [[CAST:%.*]] = zext i3 %a to i5
+; CHECK-NEXT: [[OP2:%.*]] = xor i5 [[CAST]], 15
+; CHECK-NEXT: ret i5 [[OP2]]
+;
+ %op1 = xor i3 %a, 3
+ %cast = zext i3 %op1 to i5
+ %op2 = xor i5 %cast, 12
+ ret i5 %op2
+}
+
+define <2 x i32> @XorZextXorVec(<2 x i1> %a) {
+; CHECK-LABEL: @XorZextXorVec(
+; CHECK-NEXT: [[CAST:%.*]] = zext <2 x i1> %a to <2 x i32>
+; CHECK-NEXT: [[OP2:%.*]] = xor <2 x i32> [[CAST]], <i32 2, i32 1>
+; CHECK-NEXT: ret <2 x i32> [[OP2]]
+;
+ %op1 = xor <2 x i1> %a, <i1 true, i1 false>
+ %cast = zext <2 x i1> %op1 to <2 x i32>
+ %op2 = xor <2 x i32> %cast, <i32 3, i32 1>
+ ret <2 x i32> %op2
+}
+
+define i5 @OrZextOr(i3 %a) {
+; CHECK-LABEL: @OrZextOr(
+; CHECK-NEXT: [[CAST:%.*]] = zext i3 %a to i5
+; CHECK-NEXT: [[OP2:%.*]] = or i5 [[CAST]], 11
+; CHECK-NEXT: ret i5 [[OP2]]
+;
+ %op1 = or i3 %a, 3
+ %cast = zext i3 %op1 to i5
+ %op2 = or i5 %cast, 8
+ ret i5 %op2
+}
+
+define <2 x i32> @OrZextOrVec(<2 x i2> %a) {
+; CHECK-LABEL: @OrZextOrVec(
+; CHECK-NEXT: [[CAST:%.*]] = zext <2 x i2> %a to <2 x i32>
+; CHECK-NEXT: [[OP2:%.*]] = or <2 x i32> [[CAST]], <i32 3, i32 5>
+; CHECK-NEXT: ret <2 x i32> [[OP2]]
+;
+ %op1 = or <2 x i2> %a, <i2 2, i2 0>
+ %cast = zext <2 x i2> %op1 to <2 x i32>
+ %op2 = or <2 x i32> %cast, <i32 1, i32 5>
+ ret <2 x i32> %op2
+}
+
+; Unlike the rest, this case is handled by SimplifyDemandedBits / ShrinkDemandedConstant.
+
+define i5 @AndZextAnd(i3 %a) {
+; CHECK-LABEL: @AndZextAnd(
+; CHECK-NEXT: [[CAST:%.*]] = zext i3 %a to i5
+; CHECK-NEXT: [[OP2:%.*]] = and i5 [[CAST]], 2
+; CHECK-NEXT: ret i5 [[OP2]]
+;
+ %op1 = and i3 %a, 3
+ %cast = zext i3 %op1 to i5
+ %op2 = and i5 %cast, 14
+ ret i5 %op2
+}
+
+define <2 x i32> @AndZextAndVec(<2 x i8> %a) {
+; CHECK-LABEL: @AndZextAndVec(
+; CHECK-NEXT: [[CAST:%.*]] = zext <2 x i8> %a to <2 x i32>
+; CHECK-NEXT: [[OP2:%.*]] = and <2 x i32> [[CAST]], <i32 5, i32 0>
+; CHECK-NEXT: ret <2 x i32> [[OP2]]
+;
+ %op1 = and <2 x i8> %a, <i8 7, i8 0>
+ %cast = zext <2 x i8> %op1 to <2 x i32>
+ %op2 = and <2 x i32> %cast, <i32 261, i32 1>
+ ret <2 x i32> %op2
+}
+
diff --git a/test/Transforms/InstCombine/atomic.ll b/test/Transforms/InstCombine/atomic.ll
index 5754a5a4ba56..15c1659de969 100644
--- a/test/Transforms/InstCombine/atomic.ll
+++ b/test/Transforms/InstCombine/atomic.ll
@@ -5,12 +5,265 @@ target triple = "x86_64-apple-macosx10.7.0"
; Check transforms involving atomic operations
+define i32 @test1(i32* %p) {
+; CHECK-LABEL: define i32 @test1(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: shl i32 %x, 1
+ %x = load atomic i32, i32* %p seq_cst, align 4
+ %y = load i32, i32* %p, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
define i32 @test2(i32* %p) {
; CHECK-LABEL: define i32 @test2(
+; CHECK: %x = load volatile i32, i32* %p, align 4
+; CHECK: %y = load volatile i32, i32* %p, align 4
+ %x = load volatile i32, i32* %p, align 4
+ %y = load volatile i32, i32* %p, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; The exact semantics of mixing volatile and non-volatile on the same
+; memory location are a bit unclear, but conservatively, we know we don't
+; want to remove the volatile.
+define i32 @test3(i32* %p) {
+; CHECK-LABEL: define i32 @test3(
+; CHECK: %x = load volatile i32, i32* %p, align 4
+ %x = load volatile i32, i32* %p, align 4
+ %y = load i32, i32* %p, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; Forwarding from a stronger ordered atomic is fine
+define i32 @test4(i32* %p) {
+; CHECK-LABEL: define i32 @test4(
; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
; CHECK: shl i32 %x, 1
%x = load atomic i32, i32* %p seq_cst, align 4
+ %y = load atomic i32, i32* %p unordered, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; Forwarding from a non-atomic is not. (The earlier load
+; could in priciple be promoted to atomic and then forwarded,
+; but we can't just drop the atomic from the load.)
+define i32 @test5(i32* %p) {
+; CHECK-LABEL: define i32 @test5(
+; CHECK: %x = load atomic i32, i32* %p unordered, align 4
+ %x = load atomic i32, i32* %p unordered, align 4
%y = load i32, i32* %p, align 4
%z = add i32 %x, %y
ret i32 %z
}
+
+; Forwarding atomic to atomic is fine
+define i32 @test6(i32* %p) {
+; CHECK-LABEL: define i32 @test6(
+; CHECK: %x = load atomic i32, i32* %p unordered, align 4
+; CHECK: shl i32 %x, 1
+ %x = load atomic i32, i32* %p unordered, align 4
+ %y = load atomic i32, i32* %p unordered, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; FIXME: we currently don't do anything for monotonic
+define i32 @test7(i32* %p) {
+; CHECK-LABEL: define i32 @test7(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: %y = load atomic i32, i32* %p monotonic, align 4
+ %x = load atomic i32, i32* %p seq_cst, align 4
+ %y = load atomic i32, i32* %p monotonic, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; FIXME: We could forward in racy code
+define i32 @test8(i32* %p) {
+; CHECK-LABEL: define i32 @test8(
+; CHECK: %x = load atomic i32, i32* %p seq_cst, align 4
+; CHECK: %y = load atomic i32, i32* %p acquire, align 4
+ %x = load atomic i32, i32* %p seq_cst, align 4
+ %y = load atomic i32, i32* %p acquire, align 4
+ %z = add i32 %x, %y
+ ret i32 %z
+}
+
+; An unordered access to null is still unreachable. There's no
+; ordering imposed.
+define i32 @test9() {
+; CHECK-LABEL: define i32 @test9(
+; CHECK: store i32 undef, i32* null
+ %x = load atomic i32, i32* null unordered, align 4
+ ret i32 %x
+}
+
+; FIXME: Could also fold
+define i32 @test10() {
+; CHECK-LABEL: define i32 @test10(
+; CHECK: load atomic i32, i32* null monotonic
+ %x = load atomic i32, i32* null monotonic, align 4
+ ret i32 %x
+}
+
+; Would this be legal to fold? Probably?
+define i32 @test11() {
+; CHECK-LABEL: define i32 @test11(
+; CHECK: load atomic i32, i32* null seq_cst
+ %x = load atomic i32, i32* null seq_cst, align 4
+ ret i32 %x
+}
+
+; An unordered access to null is still unreachable. There's no
+; ordering imposed.
+define i32 @test12() {
+; CHECK-LABEL: define i32 @test12(
+; CHECK: store atomic i32 undef, i32* null
+ store atomic i32 0, i32* null unordered, align 4
+ ret i32 0
+}
+
+; FIXME: Could also fold
+define i32 @test13() {
+; CHECK-LABEL: define i32 @test13(
+; CHECK: store atomic i32 0, i32* null monotonic
+ store atomic i32 0, i32* null monotonic, align 4
+ ret i32 0
+}
+
+; Would this be legal to fold? Probably?
+define i32 @test14() {
+; CHECK-LABEL: define i32 @test14(
+; CHECK: store atomic i32 0, i32* null seq_cst
+ store atomic i32 0, i32* null seq_cst, align 4
+ ret i32 0
+}
+
+@a = external global i32
+@b = external global i32
+
+define i32 @test15(i1 %cnd) {
+; CHECK-LABEL: define i32 @test15(
+; CHECK: load atomic i32, i32* @a unordered, align 4
+; CHECK: load atomic i32, i32* @b unordered, align 4
+ %addr = select i1 %cnd, i32* @a, i32* @b
+ %x = load atomic i32, i32* %addr unordered, align 4
+ ret i32 %x
+}
+
+; FIXME: This would be legal to transform
+define i32 @test16(i1 %cnd) {
+; CHECK-LABEL: define i32 @test16(
+; CHECK: load atomic i32, i32* %addr monotonic, align 4
+ %addr = select i1 %cnd, i32* @a, i32* @b
+ %x = load atomic i32, i32* %addr monotonic, align 4
+ ret i32 %x
+}
+
+; FIXME: This would be legal to transform
+define i32 @test17(i1 %cnd) {
+; CHECK-LABEL: define i32 @test17(
+; CHECK: load atomic i32, i32* %addr seq_cst, align 4
+ %addr = select i1 %cnd, i32* @a, i32* @b
+ %x = load atomic i32, i32* %addr seq_cst, align 4
+ ret i32 %x
+}
+
+define i32 @test22(i1 %cnd) {
+; CHECK-LABEL: define i32 @test22(
+; CHECK: [[PHI:%.*]] = phi i32
+; CHECK: store atomic i32 [[PHI]], i32* @a unordered, align 4
+ br i1 %cnd, label %block1, label %block2
+
+block1:
+ store atomic i32 1, i32* @a unordered, align 4
+ br label %merge
+block2:
+ store atomic i32 2, i32* @a unordered, align 4
+ br label %merge
+
+merge:
+ ret i32 0
+}
+
+; TODO: probably also legal here
+define i32 @test23(i1 %cnd) {
+; CHECK-LABEL: define i32 @test23(
+; CHECK: br i1 %cnd, label %block1, label %block2
+ br i1 %cnd, label %block1, label %block2
+
+block1:
+ store atomic i32 1, i32* @a monotonic, align 4
+ br label %merge
+block2:
+ store atomic i32 2, i32* @a monotonic, align 4
+ br label %merge
+
+merge:
+ ret i32 0
+}
+
+declare void @clobber()
+
+define i32 @test18(float* %p) {
+; CHECK-LABEL: define i32 @test18(
+; CHECK: load atomic i32, i32* [[A:%.*]] unordered, align 4
+; CHECK: store atomic i32 [[B:%.*]], i32* [[C:%.*]] unordered, align 4
+ %x = load atomic float, float* %p unordered, align 4
+ call void @clobber() ;; keep the load around
+ store atomic float %x, float* %p unordered, align 4
+ ret i32 0
+}
+
+; TODO: probably also legal in this case
+define i32 @test19(float* %p) {
+; CHECK-LABEL: define i32 @test19(
+; CHECK: load atomic float, float* %p seq_cst, align 4
+; CHECK: store atomic float %x, float* %p seq_cst, align 4
+ %x = load atomic float, float* %p seq_cst, align 4
+ call void @clobber() ;; keep the load around
+ store atomic float %x, float* %p seq_cst, align 4
+ ret i32 0
+}
+
+define i32 @test20(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test20(
+; CHECK: store atomic i8* %v, i8** [[D:%.*]] unordered, align 4
+ %cast = bitcast i8* %v to i32*
+ store atomic i32* %cast, i32** %p unordered, align 4
+ ret i32 0
+}
+
+define i32 @test21(i32** %p, i8* %v) {
+; CHECK-LABEL: define i32 @test21(
+; CHECK: store atomic i32* %cast, i32** %p monotonic, align 4
+ %cast = bitcast i8* %v to i32*
+ store atomic i32* %cast, i32** %p monotonic, align 4
+ ret i32 0
+}
+
+define void @pr27490a(i8** %p1, i8** %p2) {
+; CHECK-LABEL: define void @pr27490
+; CHECK: %1 = bitcast i8** %p1 to i64*
+; CHECK: %l1 = load i64, i64* %1, align 8
+; CHECK: %2 = bitcast i8** %p2 to i64*
+; CHECK: store volatile i64 %l1, i64* %2, align 8
+ %l = load i8*, i8** %p1
+ store volatile i8* %l, i8** %p2
+ ret void
+}
+
+define void @pr27490b(i8** %p1, i8** %p2) {
+; CHECK-LABEL: define void @pr27490
+; CHECK: %1 = bitcast i8** %p1 to i64*
+; CHECK: %l1 = load i64, i64* %1, align 8
+; CHECK: %2 = bitcast i8** %p2 to i64*
+; CHECK: store atomic i64 %l1, i64* %2 seq_cst, align 8
+ %l = load i8*, i8** %p1
+ store atomic i8* %l, i8** %p2 seq_cst, align 8
+ ret void
+}
diff --git a/test/Transforms/InstCombine/bitcast-bigendian.ll b/test/Transforms/InstCombine/bitcast-bigendian.ll
index ed812e15f385..f558ecc0b605 100644
--- a/test/Transforms/InstCombine/bitcast-bigendian.ll
+++ b/test/Transforms/InstCombine/bitcast-bigendian.ll
@@ -89,3 +89,43 @@ define <2 x float> @test6(float %A){
; CHECK-NEXT: insertelement <2 x float> {{.*}}, float 4.200000e+01, i32 1
; CHECK: ret
}
+
+; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast.
+
+define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+ %t1 = bitcast <1 x i64> %a to <2 x i32>
+ %t2 = xor <2 x i32> <i32 1, i32 2>, %t1
+ ret <2 x i32> %t2
+
+; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+; CHECK-NEXT: %t21 = xor <1 x i64> %a, <i64 4294967298>
+; CHECK-NEXT: %t2 = bitcast <1 x i64> %t21 to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %t2
+}
+
+; Verify that 'and' of integer and constant is done as a vector bitwise op before the bitcast.
+
+define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+ %t1 = bitcast <2 x i32> %a to i64
+ %t2 = and i64 %t1, 3
+ ret i64 %t2
+
+; CHECK-LABEL: @and_bitcast_vec_to_int(
+; CHECK-NEXT: %t21 = and <2 x i32> %a, <i32 0, i32 3>
+; CHECK-NEXT: %t2 = bitcast <2 x i32> %t21 to i64
+; CHECK-NEXT: ret i64 %t2
+}
+
+; Verify that 'or' of vector and constant is done as an integer bitwise op before the bitcast.
+
+define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+ %t1 = bitcast i64 %a to <2 x i32>
+ %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+ ret <2 x i32> %t2
+
+; CHECK-LABEL: @or_bitcast_int_to_vec(
+; CHECK-NEXT: %t21 = or i64 %a, 4294967298
+; CHECK-NEXT: %t2 = bitcast i64 %t21 to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %t2
+}
+
diff --git a/test/Transforms/InstCombine/bitcast.ll b/test/Transforms/InstCombine/bitcast.ll
index bccd19cc32ea..74958596f23f 100644
--- a/test/Transforms/InstCombine/bitcast.ll
+++ b/test/Transforms/InstCombine/bitcast.ll
@@ -16,6 +16,59 @@ define i32 @test1(i64 %a) {
; CHECK: ret i32 0
}
+; Perform the bitwise logic in the source type of the operands to eliminate bitcasts.
+
+define <2 x i32> @xor_two_vector_bitcasts(<1 x i64> %a, <1 x i64> %b) {
+ %t1 = bitcast <1 x i64> %a to <2 x i32>
+ %t2 = bitcast <1 x i64> %b to <2 x i32>
+ %t3 = xor <2 x i32> %t1, %t2
+ ret <2 x i32> %t3
+
+; CHECK-LABEL: @xor_two_vector_bitcasts(
+; CHECK-NEXT: %t31 = xor <1 x i64> %a, %b
+; CHECK-NEXT: %t3 = bitcast <1 x i64> %t31 to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %t3
+}
+
+; Verify that 'xor' of vector and constant is done as a vector bitwise op before the bitcast.
+
+define <2 x i32> @xor_bitcast_vec_to_vec(<1 x i64> %a) {
+ %t1 = bitcast <1 x i64> %a to <2 x i32>
+ %t2 = xor <2 x i32> <i32 1, i32 2>, %t1
+ ret <2 x i32> %t2
+
+; CHECK-LABEL: @xor_bitcast_vec_to_vec(
+; CHECK-NEXT: %t21 = xor <1 x i64> %a, <i64 8589934593>
+; CHECK-NEXT: %t2 = bitcast <1 x i64> %t21 to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %t2
+}
+
+; Verify that 'and' of integer and constant is done as a vector bitwise op before the bitcast.
+
+define i64 @and_bitcast_vec_to_int(<2 x i32> %a) {
+ %t1 = bitcast <2 x i32> %a to i64
+ %t2 = and i64 %t1, 3
+ ret i64 %t2
+
+; CHECK-LABEL: @and_bitcast_vec_to_int(
+; CHECK-NEXT: %t21 = and <2 x i32> %a, <i32 3, i32 0>
+; CHECK-NEXT: %t2 = bitcast <2 x i32> %t21 to i64
+; CHECK-NEXT: ret i64 %t2
+}
+
+; Verify that 'or' of vector and constant is done as an integer bitwise op before the bitcast.
+
+define <2 x i32> @or_bitcast_int_to_vec(i64 %a) {
+ %t1 = bitcast i64 %a to <2 x i32>
+ %t2 = or <2 x i32> %t1, <i32 1, i32 2>
+ ret <2 x i32> %t2
+
+; CHECK-LABEL: @or_bitcast_int_to_vec(
+; CHECK-NEXT: %t21 = or i64 %a, 8589934593
+; CHECK-NEXT: %t2 = bitcast i64 %t21 to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> %t2
+}
+
; Optimize bitcasts that are extracting low element of vector. This happens
; because of SRoA.
; rdar://7892780
@@ -209,3 +262,10 @@ define <2 x i64> @test7(<2 x i8*>* %arg) nounwind {
; CHECK: bitcast
; CHECK: load
}
+
+define i8 @test8() {
+ %res = bitcast <8 x i1> <i1 true, i1 true, i1 false, i1 true, i1 false, i1 true, i1 false, i1 true> to i8
+ ret i8 %res
+; CHECK: @test8
+; CHECK: ret i8 -85
+}
diff --git a/test/Transforms/InstCombine/bitreverse-fold.ll b/test/Transforms/InstCombine/bitreverse-fold.ll
index ad7fc3a74644..ecdfbc8cb5f9 100644
--- a/test/Transforms/InstCombine/bitreverse-fold.ll
+++ b/test/Transforms/InstCombine/bitreverse-fold.ll
@@ -1,11 +1,96 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
-define i32 @test1(i32 %p) {
-; CHECK-LABEL: @test1
+define i32 @identity_bitreverse_i32(i32 %p) {
+; CHECK-LABEL: @identity_bitreverse_i32(
; CHECK-NEXT: ret i32 %p
%a = call i32 @llvm.bitreverse.i32(i32 %p)
%b = call i32 @llvm.bitreverse.i32(i32 %a)
ret i32 %b
}
+; CHECK-LABEL: @identity_bitreverse_v2i32(
+; CHECK-NEXT: ret <2 x i32> %p
+define <2 x i32> @identity_bitreverse_v2i32(<2 x i32> %p) {
+ %a = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %p)
+ %b = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> %a)
+ ret <2 x i32> %b
+}
+
+; CHECK-LABEL: @reverse_0_i32(
+; CHECK-NEXT: ret i32 0
+define i32 @reverse_0_i32() {
+ %x = call i32 @llvm.bitreverse.i32(i32 0)
+ ret i32 %x
+}
+
+; CHECK-LABEL: @reverse_1_i32(
+; CHECK-NEXT: ret i32 -2147483648
+define i32 @reverse_1_i32() {
+ %x = call i32 @llvm.bitreverse.i32(i32 1)
+ ret i32 %x
+}
+
+; CHECK-LABEL: @reverse_neg1_i32(
+; CHECK-NEXT: ret i32 -1
+define i32 @reverse_neg1_i32() {
+ %x = call i32 @llvm.bitreverse.i32(i32 -1)
+ ret i32 %x
+}
+
+; CHECK-LABEL: @reverse_false_i1(
+; CHECK-NEXT: ret i1 false
+define i1 @reverse_false_i1() {
+ %x = call i1 @llvm.bitreverse.i1(i1 false)
+ ret i1 %x
+}
+
+; CHECK-LABEL: @reverse_true_i1(
+; CHECK-NEXT: ret i1 true
+define i1 @reverse_true_i1() {
+ %x = call i1 @llvm.bitreverse.i1(i1 true)
+ ret i1 %x
+}
+
+; CHECK-LABEL: @reverse_false_v2i1(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+define <2 x i1> @reverse_false_v2i1() {
+ %x = call <2 x i1> @llvm.bitreverse.v2i1(<2 x i1> zeroinitializer)
+ ret <2 x i1> %x
+}
+
+; CHECK-LABEL: @reverse_true_v2i1(
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+define <2 x i1> @reverse_true_v2i1() {
+ %x = call <2 x i1> @llvm.bitreverse.v2i1(<2 x i1> <i1 true, i1 true>)
+ ret <2 x i1> %x
+}
+
+; CHECK-LABEL: @bitreverse_920_1234_v2i32(
+; CHECK-NEXT: ret <2 x i32> <i32 432013312, i32 1260388352>
+define <2 x i32> @bitreverse_920_1234_v2i32() {
+ %x = call <2 x i32> @llvm.bitreverse.v2i32(<2 x i32> <i32 920, i32 1234>)
+ ret <2 x i32> %x
+}
+
+; CHECK-LABEL: @reverse_100_i3(
+; CHECK-NEXT: ret i3 1
+define i3 @reverse_100_i3() {
+ %x = call i3 @llvm.bitreverse.i3(i3 100)
+ ret i3 %x
+}
+
+; CHECK-LABEL: @reverse_6_3_v2i3(
+; CHECK-NEXT: ret <2 x i3> <i3 3, i3 -2>
+define <2 x i3> @reverse_6_3_v2i3() {
+ %x = call <2 x i3> @llvm.bitreverse.v2i3(<2 x i3> <i3 6, i3 3>)
+ ret <2 x i3> %x
+}
+
+declare i1 @llvm.bitreverse.i1(i1) readnone
+declare <2 x i1> @llvm.bitreverse.v2i1(<2 x i1>) readnone
+
+declare i3 @llvm.bitreverse.i3(i3) readnone
+declare <2 x i3> @llvm.bitreverse.v2i3(<2 x i3>) readnone
+
declare i32 @llvm.bitreverse.i32(i32) readnone
+declare <2 x i32> @llvm.bitreverse.v2i32(<2 x i32>) readnone
diff --git a/test/Transforms/InstCombine/bswap.ll b/test/Transforms/InstCombine/bswap.ll
index b48b2a57c8ce..39102bb31719 100644
--- a/test/Transforms/InstCombine/bswap.ll
+++ b/test/Transforms/InstCombine/bswap.ll
@@ -1,86 +1,137 @@
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-; RUN: opt < %s -instcombine -S | \
-; RUN: grep "call.*llvm.bswap" | count 7
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; CHECK-LABEL: @test1
+; CHECK: call i32 @llvm.bswap.i32(i32 %i)
define i32 @test1(i32 %i) {
- %tmp1 = lshr i32 %i, 24 ; <i32> [#uses=1]
- %tmp3 = lshr i32 %i, 8 ; <i32> [#uses=1]
- %tmp4 = and i32 %tmp3, 65280 ; <i32> [#uses=1]
- %tmp5 = or i32 %tmp1, %tmp4 ; <i32> [#uses=1]
- %tmp7 = shl i32 %i, 8 ; <i32> [#uses=1]
- %tmp8 = and i32 %tmp7, 16711680 ; <i32> [#uses=1]
- %tmp9 = or i32 %tmp5, %tmp8 ; <i32> [#uses=1]
- %tmp11 = shl i32 %i, 24 ; <i32> [#uses=1]
- %tmp12 = or i32 %tmp9, %tmp11 ; <i32> [#uses=1]
- ret i32 %tmp12
+ %tmp1 = lshr i32 %i, 24
+ %tmp3 = lshr i32 %i, 8
+ %tmp4 = and i32 %tmp3, 65280
+ %tmp5 = or i32 %tmp1, %tmp4
+ %tmp7 = shl i32 %i, 8
+ %tmp8 = and i32 %tmp7, 16711680
+ %tmp9 = or i32 %tmp5, %tmp8
+ %tmp11 = shl i32 %i, 24
+ %tmp12 = or i32 %tmp9, %tmp11
+ ret i32 %tmp12
}
+; CHECK-LABEL: @test2
+; CHECK: call i32 @llvm.bswap.i32(i32 %arg)
define i32 @test2(i32 %arg) {
- %tmp2 = shl i32 %arg, 24 ; <i32> [#uses=1]
- %tmp4 = shl i32 %arg, 8 ; <i32> [#uses=1]
- %tmp5 = and i32 %tmp4, 16711680 ; <i32> [#uses=1]
- %tmp6 = or i32 %tmp2, %tmp5 ; <i32> [#uses=1]
- %tmp8 = lshr i32 %arg, 8 ; <i32> [#uses=1]
- %tmp9 = and i32 %tmp8, 65280 ; <i32> [#uses=1]
- %tmp10 = or i32 %tmp6, %tmp9 ; <i32> [#uses=1]
- %tmp12 = lshr i32 %arg, 24 ; <i32> [#uses=1]
- %tmp14 = or i32 %tmp10, %tmp12 ; <i32> [#uses=1]
- ret i32 %tmp14
+ %tmp2 = shl i32 %arg, 24
+ %tmp4 = shl i32 %arg, 8
+ %tmp5 = and i32 %tmp4, 16711680
+ %tmp6 = or i32 %tmp2, %tmp5
+ %tmp8 = lshr i32 %arg, 8
+ %tmp9 = and i32 %tmp8, 65280
+ %tmp10 = or i32 %tmp6, %tmp9
+ %tmp12 = lshr i32 %arg, 24
+ %tmp14 = or i32 %tmp10, %tmp12
+ ret i32 %tmp14
}
+; CHECK-LABEL: @test3
+; CHECK: call i16 @llvm.bswap.i16(i16 %s)
define i16 @test3(i16 %s) {
- %tmp2 = lshr i16 %s, 8 ; <i16> [#uses=1]
- %tmp4 = shl i16 %s, 8 ; <i16> [#uses=1]
- %tmp5 = or i16 %tmp2, %tmp4 ; <i16> [#uses=1]
- ret i16 %tmp5
+ %tmp2 = lshr i16 %s, 8
+ %tmp4 = shl i16 %s, 8
+ %tmp5 = or i16 %tmp2, %tmp4
+ ret i16 %tmp5
}
+; CHECK-LABEL: @test4
+; CHECK: call i16 @llvm.bswap.i16(i16 %s)
define i16 @test4(i16 %s) {
- %tmp2 = lshr i16 %s, 8 ; <i16> [#uses=1]
- %tmp4 = shl i16 %s, 8 ; <i16> [#uses=1]
- %tmp5 = or i16 %tmp4, %tmp2 ; <i16> [#uses=1]
- ret i16 %tmp5
+ %tmp2 = lshr i16 %s, 8
+ %tmp4 = shl i16 %s, 8
+ %tmp5 = or i16 %tmp4, %tmp2
+ ret i16 %tmp5
}
+; CHECK-LABEL: @test5
+; CHECK: call i16 @llvm.bswap.i16(i16 %a)
define i16 @test5(i16 %a) {
- %tmp = zext i16 %a to i32 ; <i32> [#uses=2]
- %tmp1 = and i32 %tmp, 65280 ; <i32> [#uses=1]
- %tmp2 = ashr i32 %tmp1, 8 ; <i32> [#uses=1]
- %tmp2.upgrd.1 = trunc i32 %tmp2 to i16 ; <i16> [#uses=1]
- %tmp4 = and i32 %tmp, 255 ; <i32> [#uses=1]
- %tmp5 = shl i32 %tmp4, 8 ; <i32> [#uses=1]
- %tmp5.upgrd.2 = trunc i32 %tmp5 to i16 ; <i16> [#uses=1]
- %tmp.upgrd.3 = or i16 %tmp2.upgrd.1, %tmp5.upgrd.2 ; <i16> [#uses=1]
- %tmp6 = bitcast i16 %tmp.upgrd.3 to i16 ; <i16> [#uses=1]
- %tmp6.upgrd.4 = zext i16 %tmp6 to i32 ; <i32> [#uses=1]
- %retval = trunc i32 %tmp6.upgrd.4 to i16 ; <i16> [#uses=1]
- ret i16 %retval
+ %tmp = zext i16 %a to i32
+ %tmp1 = and i32 %tmp, 65280
+ %tmp2 = ashr i32 %tmp1, 8
+ %tmp2.upgrd.1 = trunc i32 %tmp2 to i16
+ %tmp4 = and i32 %tmp, 255
+ %tmp5 = shl i32 %tmp4, 8
+ %tmp5.upgrd.2 = trunc i32 %tmp5 to i16
+ %tmp.upgrd.3 = or i16 %tmp2.upgrd.1, %tmp5.upgrd.2
+ %tmp6 = bitcast i16 %tmp.upgrd.3 to i16
+ %tmp6.upgrd.4 = zext i16 %tmp6 to i32
+ %retval = trunc i32 %tmp6.upgrd.4 to i16
+ ret i16 %retval
}
; PR2842
+; CHECK-LABEL: @test6
+; CHECK: call i32 @llvm.bswap.i32(i32 %x)
define i32 @test6(i32 %x) nounwind readnone {
- %tmp = shl i32 %x, 16 ; <i32> [#uses=1]
- %x.mask = and i32 %x, 65280 ; <i32> [#uses=1]
- %tmp1 = lshr i32 %x, 16 ; <i32> [#uses=1]
- %tmp2 = and i32 %tmp1, 255 ; <i32> [#uses=1]
- %tmp3 = or i32 %x.mask, %tmp ; <i32> [#uses=1]
- %tmp4 = or i32 %tmp3, %tmp2 ; <i32> [#uses=1]
- %tmp5 = shl i32 %tmp4, 8 ; <i32> [#uses=1]
- %tmp6 = lshr i32 %x, 24 ; <i32> [#uses=1]
- %tmp7 = or i32 %tmp5, %tmp6 ; <i32> [#uses=1]
- ret i32 %tmp7
+ %tmp = shl i32 %x, 16
+ %x.mask = and i32 %x, 65280
+ %tmp1 = lshr i32 %x, 16
+ %tmp2 = and i32 %tmp1, 255
+ %tmp3 = or i32 %x.mask, %tmp
+ %tmp4 = or i32 %tmp3, %tmp2
+ %tmp5 = shl i32 %tmp4, 8
+ %tmp6 = lshr i32 %x, 24
+ %tmp7 = or i32 %tmp5, %tmp6
+ ret i32 %tmp7
}
; PR23863
+; CHECK-LABEL: @test7
+; CHECK: call i32 @llvm.bswap.i32(i32 %x)
define i32 @test7(i32 %x) {
- %shl = shl i32 %x, 16
- %shr = lshr i32 %x, 16
- %or = or i32 %shl, %shr
- %and2 = shl i32 %or, 8
- %shl3 = and i32 %and2, -16711936
- %and4 = lshr i32 %or, 8
- %shr5 = and i32 %and4, 16711935
- %or6 = or i32 %shl3, %shr5
- ret i32 %or6
+ %shl = shl i32 %x, 16
+ %shr = lshr i32 %x, 16
+ %or = or i32 %shl, %shr
+ %and2 = shl i32 %or, 8
+ %shl3 = and i32 %and2, -16711936
+ %and4 = lshr i32 %or, 8
+ %shr5 = and i32 %and4, 16711935
+ %or6 = or i32 %shl3, %shr5
+ ret i32 %or6
+}
+
+; CHECK-LABEL: @test8
+; CHECK: call i16 @llvm.bswap.i16(i16 %a)
+define i16 @test8(i16 %a) {
+entry:
+ %conv = zext i16 %a to i32
+ %shr = lshr i16 %a, 8
+ %shl = shl i32 %conv, 8
+ %conv1 = zext i16 %shr to i32
+ %or = or i32 %conv1, %shl
+ %conv2 = trunc i32 %or to i16
+ ret i16 %conv2
+}
+
+; CHECK-LABEL: @test9
+; CHECK: call i16 @llvm.bswap.i16(i16 %a)
+define i16 @test9(i16 %a) {
+entry:
+ %conv = zext i16 %a to i32
+ %shr = lshr i32 %conv, 8
+ %shl = shl i32 %conv, 8
+ %or = or i32 %shr, %shl
+ %conv2 = trunc i32 %or to i16
+ ret i16 %conv2
+}
+
+; CHECK-LABEL: @test10
+; CHECK: trunc i32 %a to i16
+; CHECK: call i16 @llvm.bswap.i16(i16 %trunc)
+define i16 @test10(i32 %a) {
+ %shr1 = lshr i32 %a, 8
+ %and1 = and i32 %shr1, 255
+ %and2 = shl i32 %a, 8
+ %shl1 = and i32 %and2, 65280
+ %or = or i32 %and1, %shl1
+ %conv = trunc i32 %or to i16
+ ret i16 %conv
}
diff --git a/test/Transforms/InstCombine/builtin-object-size-offset.ll b/test/Transforms/InstCombine/builtin-object-size-offset.ll
new file mode 100644
index 000000000000..7ab24a9acd94
--- /dev/null
+++ b/test/Transforms/InstCombine/builtin-object-size-offset.ll
@@ -0,0 +1,58 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; #include <stdlib.h>
+; #include <stdio.h>
+;
+; int foo1(int N) {
+; char Big[20];
+; char Small[10];
+; char *Ptr = N ? Big + 10 : Small;
+; return __builtin_object_size(Ptr, 0);
+; }
+;
+; void foo() {
+; size_t ret;
+; ret = foo1(0);
+; printf("\n %d", ret);
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@.str = private unnamed_addr constant [5 x i8] c"\0A %d\00", align 1
+
+define i32 @foo1(i32 %N) {
+entry:
+ %Big = alloca [20 x i8], align 16
+ %Small = alloca [10 x i8], align 1
+ %0 = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 0
+ call void @llvm.lifetime.start(i64 20, i8* %0)
+ %1 = getelementptr inbounds [10 x i8], [10 x i8]* %Small, i64 0, i64 0
+ call void @llvm.lifetime.start(i64 10, i8* %1)
+ %tobool = icmp ne i32 %N, 0
+ %add.ptr = getelementptr inbounds [20 x i8], [20 x i8]* %Big, i64 0, i64 10
+ %cond = select i1 %tobool, i8* %add.ptr, i8* %1
+ %2 = call i64 @llvm.objectsize.i64.p0i8(i8* %cond, i1 false)
+ %conv = trunc i64 %2 to i32
+ call void @llvm.lifetime.end(i64 10, i8* %1)
+ call void @llvm.lifetime.end(i64 20, i8* %0)
+ ret i32 %conv
+; CHECK: ret i32 10
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
+
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+
+define void @foo() {
+entry:
+ %call = tail call i32 @foo1(i32 0)
+ %conv = sext i32 %call to i64
+ %call1 = tail call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([5 x i8], [5 x i8]* @.str, i64 0, i64 0), i64 %conv)
+ ret void
+}
+
+declare i32 @printf(i8* nocapture readonly, ...)
+
diff --git a/test/Transforms/InstCombine/builtin-object-size-ptr.ll b/test/Transforms/InstCombine/builtin-object-size-ptr.ll
new file mode 100644
index 000000000000..b38513999dc1
--- /dev/null
+++ b/test/Transforms/InstCombine/builtin-object-size-ptr.ll
@@ -0,0 +1,34 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; int foo() {
+; struct V { char buf1[10];
+; int b;
+; char buf2[10];
+; } var;
+;
+; char *p = &var.buf1[1];
+; return __builtin_object_size (p, 0);
+; }
+
+%struct.V = type { [10 x i8], i32, [10 x i8] }
+
+define i32 @foo() #0 {
+entry:
+ %var = alloca %struct.V, align 4
+ %0 = bitcast %struct.V* %var to i8*
+ call void @llvm.lifetime.start(i64 28, i8* %0) #3
+ %buf1 = getelementptr inbounds %struct.V, %struct.V* %var, i32 0, i32 0
+ %arrayidx = getelementptr inbounds [10 x i8], [10 x i8]* %buf1, i64 0, i64 1
+ %1 = call i64 @llvm.objectsize.i64.p0i8(i8* %arrayidx, i1 false)
+ %conv = trunc i64 %1 to i32
+ call void @llvm.lifetime.end(i64 28, i8* %0) #3
+ ret i32 %conv
+; CHECK: ret i32 27
+; CHECK-NOT: ret i32 -1
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #2
+
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
diff --git a/test/Transforms/InstCombine/cast-set.ll b/test/Transforms/InstCombine/cast-set.ll
index 8f19bdcdfde3..6da6dc3236c1 100644
--- a/test/Transforms/InstCombine/cast-set.ll
+++ b/test/Transforms/InstCombine/cast-set.ll
@@ -1,72 +1,77 @@
-; This tests for various complex cast elimination cases instcombine should
-; handle.
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128"
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
define i1 @test1(i32 %X) {
- %A = bitcast i32 %X to i32 ; <i32> [#uses=1]
- ; Convert to setne int %X, 12
- %c = icmp ne i32 %A, 12 ; <i1> [#uses=1]
- ret i1 %c
; CHECK-LABEL: @test1(
-; CHECK: %c = icmp ne i32 %X, 12
-; CHECK: ret i1 %c
+; CHECK-NEXT: [[C:%.*]] = icmp ne i32 %X, 12
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %A = bitcast i32 %X to i32
+ ; Convert to setne int %X, 12
+ %c = icmp ne i32 %A, 12
+ ret i1 %c
}
define i1 @test2(i32 %X, i32 %Y) {
- %A = bitcast i32 %X to i32 ; <i32> [#uses=1]
- %B = bitcast i32 %Y to i32 ; <i32> [#uses=1]
- ; Convert to setne int %X, %Y
- %c = icmp ne i32 %A, %B ; <i1> [#uses=1]
- ret i1 %c
; CHECK-LABEL: @test2(
-; CHECK: %c = icmp ne i32 %X, %Y
-; CHECK: ret i1 %c
+; CHECK-NEXT: [[C:%.*]] = icmp ne i32 %X, %Y
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %A = bitcast i32 %X to i32
+ %B = bitcast i32 %Y to i32
+ ; Convert to setne int %X, %Y
+ %c = icmp ne i32 %A, %B
+ ret i1 %c
}
define i32 @test4(i32 %A) {
- %B = bitcast i32 %A to i32 ; <i32> [#uses=1]
- %C = shl i32 %B, 2 ; <i32> [#uses=1]
- %D = bitcast i32 %C to i32 ; <i32> [#uses=1]
- ret i32 %D
; CHECK-LABEL: @test4(
-; CHECK: %C = shl i32 %A, 2
-; CHECK: ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = shl i32 %A, 2
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = bitcast i32 %A to i32
+ %C = shl i32 %B, 2
+ %D = bitcast i32 %C to i32
+ ret i32 %D
}
define i16 @test5(i16 %A) {
- %B = sext i16 %A to i32 ; <i32> [#uses=1]
- %C = and i32 %B, 15 ; <i32> [#uses=1]
- %D = trunc i32 %C to i16 ; <i16> [#uses=1]
- ret i16 %D
; CHECK-LABEL: @test5(
-; CHECK: %C = and i16 %A, 15
-; CHECK: ret i16 %C
+; CHECK-NEXT: [[C:%.*]] = and i16 %A, 15
+; CHECK-NEXT: ret i16 [[C]]
+;
+ %B = sext i16 %A to i32
+ %C = and i32 %B, 15
+ %D = trunc i32 %C to i16
+ ret i16 %D
}
define i1 @test6(i1 %A) {
- %B = zext i1 %A to i32 ; <i32> [#uses=1]
- %C = icmp ne i32 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
; CHECK-LABEL: @test6(
-; CHECK: ret i1 %A
+; CHECK-NEXT: ret i1 %A
+;
+ %B = zext i1 %A to i32
+ %C = icmp ne i32 %B, 0
+ ret i1 %C
}
define i1 @test6a(i1 %A) {
- %B = zext i1 %A to i32 ; <i32> [#uses=1]
- %C = icmp ne i32 %B, -1 ; <i1> [#uses=1]
- ret i1 %C
; CHECK-LABEL: @test6a(
-; CHECK: ret i1 true
+; CHECK-NEXT: ret i1 true
+;
+ %B = zext i1 %A to i32
+ %C = icmp ne i32 %B, -1
+ ret i1 %C
}
define i1 @test7(i8* %A) {
- %B = bitcast i8* %A to i32* ; <i32*> [#uses=1]
- %C = icmp eq i32* %B, null ; <i1> [#uses=1]
- ret i1 %C
; CHECK-LABEL: @test7(
-; CHECK: %C = icmp eq i8* %A, null
-; CHECK: ret i1 %C
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8* %A, null
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = bitcast i8* %A to i32*
+ %C = icmp eq i32* %B, null
+ ret i1 %C
}
diff --git a/test/Transforms/InstCombine/cast.ll b/test/Transforms/InstCombine/cast.ll
index 016b6aa64558..3bc79cef735e 100644
--- a/test/Transforms/InstCombine/cast.ll
+++ b/test/Transforms/InstCombine/cast.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; Tests to make sure elimination of casts is working correctly
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128-n8:16:32:64"
@@ -5,133 +6,165 @@ target datalayout = "E-p:64:64:64-p1:32:32:32-p2:64:64:64-p3:64:64:64-a0:0:8-f32
@inbuf = external global [32832 x i8] ; <[32832 x i8]*> [#uses=1]
define i32 @test1(i32 %A) {
- %c1 = bitcast i32 %A to i32 ; <i32> [#uses=1]
- %c2 = bitcast i32 %c1 to i32 ; <i32> [#uses=1]
- ret i32 %c2
-; CHECK: ret i32 %A
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i32 %A
+;
+ %c1 = bitcast i32 %A to i32 ; <i32> [#uses=1]
+ %c2 = bitcast i32 %c1 to i32 ; <i32> [#uses=1]
+ ret i32 %c2
}
define i64 @test2(i8 %A) {
- %c1 = zext i8 %A to i16 ; <i16> [#uses=1]
- %c2 = zext i16 %c1 to i32 ; <i32> [#uses=1]
- %Ret = zext i32 %c2 to i64 ; <i64> [#uses=1]
- ret i64 %Ret
-; CHECK: %Ret = zext i8 %A to i64
-; CHECK: ret i64 %Ret
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[RET:%.*]] = zext i8 %A to i64
+; CHECK-NEXT: ret i64 [[RET]]
+;
+ %c1 = zext i8 %A to i16 ; <i16> [#uses=1]
+ %c2 = zext i16 %c1 to i32 ; <i32> [#uses=1]
+ %Ret = zext i32 %c2 to i64 ; <i64> [#uses=1]
+ ret i64 %Ret
}
; This function should just use bitwise AND
define i64 @test3(i64 %A) {
- %c1 = trunc i64 %A to i8 ; <i8> [#uses=1]
- %c2 = zext i8 %c1 to i64 ; <i64> [#uses=1]
- ret i64 %c2
-; CHECK: %c2 = and i64 %A, 255
-; CHECK: ret i64 %c2
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[C2:%.*]] = and i64 %A, 255
+; CHECK-NEXT: ret i64 [[C2]]
+;
+ %c1 = trunc i64 %A to i8 ; <i8> [#uses=1]
+ %c2 = zext i8 %c1 to i64 ; <i64> [#uses=1]
+ ret i64 %c2
}
define i32 @test4(i32 %A, i32 %B) {
- %COND = icmp slt i32 %A, %B ; <i1> [#uses=1]
- ; Booleans are unsigned integrals
- %c = zext i1 %COND to i8 ; <i8> [#uses=1]
- ; for the cast elim purpose
- %result = zext i8 %c to i32 ; <i32> [#uses=1]
- ret i32 %result
-; CHECK: %COND = icmp slt i32 %A, %B
-; CHECK: %result = zext i1 %COND to i32
-; CHECK: ret i32 %result
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[COND:%.*]] = icmp slt i32 %A, %B
+; CHECK-NEXT: [[RESULT:%.*]] = zext i1 [[COND]] to i32
+; CHECK-NEXT: ret i32 [[RESULT]]
+;
+ %COND = icmp slt i32 %A, %B ; <i1> [#uses=1]
+ ; Booleans are unsigned integrals
+ %c = zext i1 %COND to i8 ; <i8> [#uses=1]
+ ; for the cast elim purpose
+ %result = zext i8 %c to i32 ; <i32> [#uses=1]
+ ret i32 %result
}
define i32 @test5(i1 %B) {
; This cast should get folded into
- %c = zext i1 %B to i8 ; <i8> [#uses=1]
- ; this cast
- %result = zext i8 %c to i32 ; <i32> [#uses=1]
- ret i32 %result
-; CHECK: %result = zext i1 %B to i32
-; CHECK: ret i32 %result
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: [[RESULT:%.*]] = zext i1 %B to i32
+; CHECK-NEXT: ret i32 [[RESULT]]
+;
+ %c = zext i1 %B to i8 ; <i8> [#uses=1]
+ ; this cast
+ %result = zext i8 %c to i32 ; <i32> [#uses=1]
+ ret i32 %result
}
define i32 @test6(i64 %A) {
- %c1 = trunc i64 %A to i32 ; <i32> [#uses=1]
- %res = bitcast i32 %c1 to i32 ; <i32> [#uses=1]
- ret i32 %res
-; CHECK: trunc i64 %A to i32
-; CHECK-NEXT: ret i32
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: [[C1:%.*]] = trunc i64 %A to i32
+; CHECK-NEXT: ret i32 [[C1]]
+;
+ %c1 = trunc i64 %A to i32 ; <i32> [#uses=1]
+ %res = bitcast i32 %c1 to i32 ; <i32> [#uses=1]
+ ret i32 %res
}
define i64 @test7(i1 %A) {
- %c1 = zext i1 %A to i32 ; <i32> [#uses=1]
- %res = sext i32 %c1 to i64 ; <i64> [#uses=1]
- ret i64 %res
-; CHECK: %res = zext i1 %A to i64
-; CHECK: ret i64 %res
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[RES:%.*]] = zext i1 %A to i64
+; CHECK-NEXT: ret i64 [[RES]]
+;
+ %c1 = zext i1 %A to i32 ; <i32> [#uses=1]
+ %res = sext i32 %c1 to i64 ; <i64> [#uses=1]
+ ret i64 %res
}
define i64 @test8(i8 %A) {
- %c1 = sext i8 %A to i64 ; <i64> [#uses=1]
- %res = bitcast i64 %c1 to i64 ; <i64> [#uses=1]
- ret i64 %res
-; CHECK: = sext i8 %A to i64
-; CHECK-NEXT: ret i64
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[C1:%.*]] = sext i8 %A to i64
+; CHECK-NEXT: ret i64 [[C1]]
+;
+ %c1 = sext i8 %A to i64 ; <i64> [#uses=1]
+ %res = bitcast i64 %c1 to i64 ; <i64> [#uses=1]
+ ret i64 %res
}
define i16 @test9(i16 %A) {
- %c1 = sext i16 %A to i32 ; <i32> [#uses=1]
- %c2 = trunc i32 %c1 to i16 ; <i16> [#uses=1]
- ret i16 %c2
-; CHECK: ret i16 %A
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: ret i16 %A
+;
+ %c1 = sext i16 %A to i32 ; <i32> [#uses=1]
+ %c2 = trunc i32 %c1 to i16 ; <i16> [#uses=1]
+ ret i16 %c2
}
define i16 @test10(i16 %A) {
- %c1 = sext i16 %A to i32 ; <i32> [#uses=1]
- %c2 = trunc i32 %c1 to i16 ; <i16> [#uses=1]
- ret i16 %c2
-; CHECK: ret i16 %A
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: ret i16 %A
+;
+ %c1 = sext i16 %A to i32 ; <i32> [#uses=1]
+ %c2 = trunc i32 %c1 to i16 ; <i16> [#uses=1]
+ ret i16 %c2
}
declare void @varargs(i32, ...)
define void @test11(i32* %P) {
- %c = bitcast i32* %P to i16* ; <i16*> [#uses=1]
- call void (i32, ...) @varargs( i32 5, i16* %c )
- ret void
-; CHECK: call void (i32, ...) @varargs(i32 5, i32* %P)
-; CHECK: ret void
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: call void (i32, ...) @varargs(i32 5, i32* %P)
+; CHECK-NEXT: ret void
+;
+ %c = bitcast i32* %P to i16* ; <i16*> [#uses=1]
+ call void (i32, ...) @varargs( i32 5, i16* %c )
+ ret void
}
declare i32 @__gxx_personality_v0(...)
define void @test_invoke_vararg_cast(i32* %a, i32* %b) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+; CHECK-LABEL: @test_invoke_vararg_cast(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: invoke void (i32, ...) @varargs(i32 1, i32* %b, i32* %a)
+; CHECK-NEXT: to label %invoke.cont unwind label %lpad
+; CHECK: invoke.cont:
+; CHECK-NEXT: ret void
+; CHECK: lpad:
+; CHECK-NEXT: [[TMP0:%.*]] = landingpad { i8*, i32
+;
entry:
%0 = bitcast i32* %b to i8*
%1 = bitcast i32* %a to i64*
invoke void (i32, ...) @varargs(i32 1, i8* %0, i64* %1)
- to label %invoke.cont unwind label %lpad
+ to label %invoke.cont unwind label %lpad
invoke.cont: ; preds = %entry
ret void
lpad: ; preds = %entry
%2 = landingpad { i8*, i32 }
- cleanup
+ cleanup
ret void
-; CHECK-LABEL: test_invoke_vararg_cast
-; CHECK-LABEL: entry:
-; CHECK: invoke void (i32, ...) @varargs(i32 1, i32* %b, i32* %a)
}
define i8* @test13(i64 %A) {
- %c = getelementptr [0 x i8], [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A ; <i8*> [#uses=1]
- ret i8* %c
-; CHECK: %c = getelementptr [32832 x i8], [32832 x i8]* @inbuf, i64 0, i64 %A
-; CHECK: ret i8* %c
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[C:%.*]] = getelementptr [32832 x i8], [32832 x i8]* @inbuf, i64 0, i64 %A
+; CHECK-NEXT: ret i8* [[C]]
+;
+ %c = getelementptr [0 x i8], [0 x i8]* bitcast ([32832 x i8]* @inbuf to [0 x i8]*), i64 0, i64 %A ; <i8*> [#uses=1]
+ ret i8* %c
}
define i1 @test14(i8 %A) {
- %c = bitcast i8 %A to i8 ; <i8> [#uses=1]
- %X = icmp ult i8 %c, -128 ; <i1> [#uses=1]
- ret i1 %X
-; CHECK: %X = icmp sgt i8 %A, -1
-; CHECK: ret i1 %X
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[X:%.*]] = icmp sgt i8 %A, -1
+; CHECK-NEXT: ret i1 [[X]]
+;
+ %c = bitcast i8 %A to i8 ; <i8> [#uses=1]
+ %X = icmp ult i8 %c, -128 ; <i1> [#uses=1]
+ ret i1 %X
}
@@ -143,463 +176,557 @@ define i1 @test14(i8 %A) {
;}
define i1 @test16(i32* %P) {
- %c = icmp ne i32* %P, null ; <i1> [#uses=1]
- ret i1 %c
-; CHECK: %c = icmp ne i32* %P, null
-; CHECK: ret i1 %c
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: [[C:%.*]] = icmp ne i32* %P, null
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %c = icmp ne i32* %P, null ; <i1> [#uses=1]
+ ret i1 %c
}
define i16 @test17(i1 %tmp3) {
- %c = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
- %t86 = trunc i32 %c to i16 ; <i16> [#uses=1]
- ret i16 %t86
-; CHECK: %t86 = zext i1 %tmp3 to i16
-; CHECK: ret i16 %t86
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: [[T86:%.*]] = zext i1 %tmp3 to i16
+; CHECK-NEXT: ret i16 [[T86]]
+;
+ %c = zext i1 %tmp3 to i32 ; <i32> [#uses=1]
+ %t86 = trunc i32 %c to i16 ; <i16> [#uses=1]
+ ret i16 %t86
}
define i16 @test18(i8 %tmp3) {
- %c = sext i8 %tmp3 to i32 ; <i32> [#uses=1]
- %t86 = trunc i32 %c to i16 ; <i16> [#uses=1]
- ret i16 %t86
-; CHECK: %t86 = sext i8 %tmp3 to i16
-; CHECK: ret i16 %t86
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: [[T86:%.*]] = sext i8 %tmp3 to i16
+; CHECK-NEXT: ret i16 [[T86]]
+;
+ %c = sext i8 %tmp3 to i32 ; <i32> [#uses=1]
+ %t86 = trunc i32 %c to i16 ; <i16> [#uses=1]
+ ret i16 %t86
}
define i1 @test19(i32 %X) {
- %c = sext i32 %X to i64 ; <i64> [#uses=1]
- %Z = icmp slt i64 %c, 12345 ; <i1> [#uses=1]
- ret i1 %Z
-; CHECK: %Z = icmp slt i32 %X, 12345
-; CHECK: ret i1 %Z
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: [[Z:%.*]] = icmp slt i32 %X, 12345
+; CHECK-NEXT: ret i1 [[Z]]
+;
+ %c = sext i32 %X to i64
+ %Z = icmp slt i64 %c, 12345
+ ret i1 %Z
+}
+
+define <2 x i1> @test19vec(<2 x i32> %X) {
+; CHECK-LABEL: @test19vec(
+; CHECK-NEXT: [[Z:%.*]] = icmp slt <2 x i32> %X, <i32 12345, i32 2147483647>
+; CHECK-NEXT: ret <2 x i1> [[Z]]
+;
+ %c = sext <2 x i32> %X to <2 x i64>
+ %Z = icmp slt <2 x i64> %c, <i64 12345, i64 2147483647>
+ ret <2 x i1> %Z
+}
+
+define <3 x i1> @test19vec2(<3 x i1> %X) {
+; CHECK-LABEL: @test19vec2(
+; CHECK-NEXT: [[CMPEQ:%.*]] = xor <3 x i1> %X, <i1 true, i1 true, i1 true>
+; CHECK-NEXT: ret <3 x i1> [[CMPEQ]]
+;
+ %sext = sext <3 x i1> %X to <3 x i32>
+ %cmpeq = icmp eq <3 x i32> %sext, zeroinitializer
+ ret <3 x i1> %cmpeq
}
define i1 @test20(i1 %B) {
- %c = zext i1 %B to i32 ; <i32> [#uses=1]
- %D = icmp slt i32 %c, -1 ; <i1> [#uses=1]
- ;; false
- ret i1 %D
-; CHECK: ret i1 false
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: ret i1 false
+;
+ %c = zext i1 %B to i32 ; <i32> [#uses=1]
+ %D = icmp slt i32 %c, -1 ; <i1> [#uses=1]
+ ;; false
+ ret i1 %D
}
define i32 @test21(i32 %X) {
- %c1 = trunc i32 %X to i8 ; <i8> [#uses=1]
- ;; sext -> zext -> and -> nop
- %c2 = sext i8 %c1 to i32 ; <i32> [#uses=1]
- %RV = and i32 %c2, 255 ; <i32> [#uses=1]
- ret i32 %RV
-; CHECK: %c21 = and i32 %X, 255
-; CHECK: ret i32 %c21
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: [[C21:%.*]] = and i32 %X, 255
+; CHECK-NEXT: ret i32 [[C21]]
+;
+ %c1 = trunc i32 %X to i8 ; <i8> [#uses=1]
+ ;; sext -> zext -> and -> nop
+ %c2 = sext i8 %c1 to i32 ; <i32> [#uses=1]
+ %RV = and i32 %c2, 255 ; <i32> [#uses=1]
+ ret i32 %RV
}
define i32 @test22(i32 %X) {
- %c1 = trunc i32 %X to i8 ; <i8> [#uses=1]
- ;; sext -> zext -> and -> nop
- %c2 = sext i8 %c1 to i32 ; <i32> [#uses=1]
- %RV = shl i32 %c2, 24 ; <i32> [#uses=1]
- ret i32 %RV
-; CHECK: shl i32 %X, 24
-; CHECK-NEXT: ret i32
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: [[SEXT:%.*]] = shl i32 %X, 24
+; CHECK-NEXT: ret i32 [[SEXT]]
+;
+ %c1 = trunc i32 %X to i8 ; <i8> [#uses=1]
+ ;; sext -> zext -> and -> nop
+ %c2 = sext i8 %c1 to i32 ; <i32> [#uses=1]
+ %RV = shl i32 %c2, 24 ; <i32> [#uses=1]
+ ret i32 %RV
}
define i32 @test23(i32 %X) {
;; Turn into an AND even though X
- %c1 = trunc i32 %X to i16 ; <i16> [#uses=1]
- ;; and Z are signed.
- %c2 = zext i16 %c1 to i32 ; <i32> [#uses=1]
- ret i32 %c2
-; CHECK: %c2 = and i32 %X, 65535
-; CHECK: ret i32 %c2
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: [[C2:%.*]] = and i32 %X, 65535
+; CHECK-NEXT: ret i32 [[C2]]
+;
+ %c1 = trunc i32 %X to i16 ; <i16> [#uses=1]
+ ;; and Z are signed.
+ %c2 = zext i16 %c1 to i32 ; <i32> [#uses=1]
+ ret i32 %c2
}
define i1 @test24(i1 %C) {
- %X = select i1 %C, i32 14, i32 1234 ; <i32> [#uses=1]
- ;; Fold cast into select
- %c = icmp ne i32 %X, 0 ; <i1> [#uses=1]
- ret i1 %c
-; CHECK: ret i1 true
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: ret i1 true
+;
+ %X = select i1 %C, i32 14, i32 1234 ; <i32> [#uses=1]
+ ;; Fold cast into select
+ %c = icmp ne i32 %X, 0 ; <i1> [#uses=1]
+ ret i1 %c
}
define i32 @test26(float %F) {
;; no need to cast from float->double.
- %c = fpext float %F to double ; <double> [#uses=1]
- %D = fptosi double %c to i32 ; <i32> [#uses=1]
- ret i32 %D
-; CHECK: %D = fptosi float %F to i32
-; CHECK: ret i32 %D
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: [[D:%.*]] = fptosi float %F to i32
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %c = fpext float %F to double ; <double> [#uses=1]
+ %D = fptosi double %c to i32 ; <i32> [#uses=1]
+ ret i32 %D
}
define [4 x float]* @test27([9 x [4 x float]]* %A) {
- %c = bitcast [9 x [4 x float]]* %A to [4 x float]* ; <[4 x float]*> [#uses=1]
- ret [4 x float]* %c
-; CHECK: %c = getelementptr inbounds [9 x [4 x float]], [9 x [4 x float]]* %A, i64 0, i64 0
-; CHECK: ret [4 x float]* %c
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [9 x [4 x float]], [9 x [4 x float]]* %A, i64 0, i64 0
+; CHECK-NEXT: ret [4 x float]* [[C]]
+;
+ %c = bitcast [9 x [4 x float]]* %A to [4 x float]* ; <[4 x float]*> [#uses=1]
+ ret [4 x float]* %c
}
define float* @test28([4 x float]* %A) {
- %c = bitcast [4 x float]* %A to float* ; <float*> [#uses=1]
- ret float* %c
-; CHECK: %c = getelementptr inbounds [4 x float], [4 x float]* %A, i64 0, i64 0
-; CHECK: ret float* %c
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [4 x float], [4 x float]* %A, i64 0, i64 0
+; CHECK-NEXT: ret float* [[C]]
+;
+ %c = bitcast [4 x float]* %A to float* ; <float*> [#uses=1]
+ ret float* %c
}
define i32 @test29(i32 %c1, i32 %c2) {
- %tmp1 = trunc i32 %c1 to i8 ; <i8> [#uses=1]
- %tmp4.mask = trunc i32 %c2 to i8 ; <i8> [#uses=1]
- %tmp = or i8 %tmp4.mask, %tmp1 ; <i8> [#uses=1]
- %tmp10 = zext i8 %tmp to i32 ; <i32> [#uses=1]
- ret i32 %tmp10
-; CHECK: %tmp2 = or i32 %c2, %c1
-; CHECK: %tmp10 = and i32 %tmp2, 255
-; CHECK: ret i32 %tmp10
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 %c2, %c1
+; CHECK-NEXT: [[TMP10:%.*]] = and i32 [[TMP2]], 255
+; CHECK-NEXT: ret i32 [[TMP10]]
+;
+ %tmp1 = trunc i32 %c1 to i8 ; <i8> [#uses=1]
+ %tmp4.mask = trunc i32 %c2 to i8 ; <i8> [#uses=1]
+ %tmp = or i8 %tmp4.mask, %tmp1 ; <i8> [#uses=1]
+ %tmp10 = zext i8 %tmp to i32 ; <i32> [#uses=1]
+ ret i32 %tmp10
}
define i32 @test30(i32 %c1) {
- %c2 = trunc i32 %c1 to i8 ; <i8> [#uses=1]
- %c3 = xor i8 %c2, 1 ; <i8> [#uses=1]
- %c4 = zext i8 %c3 to i32 ; <i32> [#uses=1]
- ret i32 %c4
-; CHECK: %c3 = and i32 %c1, 255
-; CHECK: %c4 = xor i32 %c3, 1
-; CHECK: ret i32 %c4
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: [[C3:%.*]] = and i32 %c1, 255
+; CHECK-NEXT: [[C4:%.*]] = xor i32 [[C3]], 1
+; CHECK-NEXT: ret i32 [[C4]]
+;
+ %c2 = trunc i32 %c1 to i8 ; <i8> [#uses=1]
+ %c3 = xor i8 %c2, 1 ; <i8> [#uses=1]
+ %c4 = zext i8 %c3 to i32 ; <i32> [#uses=1]
+ ret i32 %c4
}
define i1 @test31(i64 %A) {
- %B = trunc i64 %A to i32 ; <i32> [#uses=1]
- %C = and i32 %B, 42 ; <i32> [#uses=1]
- %D = icmp eq i32 %C, 10 ; <i1> [#uses=1]
- ret i1 %D
-; CHECK: %C = and i64 %A, 42
-; CHECK: %D = icmp eq i64 %C, 10
-; CHECK: ret i1 %D
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: [[C:%.*]] = and i64 %A, 42
+; CHECK-NEXT: [[D:%.*]] = icmp eq i64 [[C]], 10
+; CHECK-NEXT: ret i1 [[D]]
+;
+ %B = trunc i64 %A to i32 ; <i32> [#uses=1]
+ %C = and i32 %B, 42 ; <i32> [#uses=1]
+ %D = icmp eq i32 %C, 10 ; <i1> [#uses=1]
+ ret i1 %D
}
define i32 @test33(i32 %c1) {
- %x = bitcast i32 %c1 to float ; <float> [#uses=1]
- %y = bitcast float %x to i32 ; <i32> [#uses=1]
- ret i32 %y
-; CHECK: ret i32 %c1
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: ret i32 %c1
+;
+ %x = bitcast i32 %c1 to float ; <float> [#uses=1]
+ %y = bitcast float %x to i32 ; <i32> [#uses=1]
+ ret i32 %y
}
define i16 @test34(i16 %a) {
- %c1 = zext i16 %a to i32 ; <i32> [#uses=1]
- %tmp21 = lshr i32 %c1, 8 ; <i32> [#uses=1]
- %c2 = trunc i32 %tmp21 to i16 ; <i16> [#uses=1]
- ret i16 %c2
-; CHECK: %tmp21 = lshr i16 %a, 8
-; CHECK: ret i16 %tmp21
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: [[TMP21:%.*]] = lshr i16 %a, 8
+; CHECK-NEXT: ret i16 [[TMP21]]
+;
+ %c1 = zext i16 %a to i32 ; <i32> [#uses=1]
+ %tmp21 = lshr i32 %c1, 8 ; <i32> [#uses=1]
+ %c2 = trunc i32 %tmp21 to i16 ; <i16> [#uses=1]
+ ret i16 %c2
}
define i16 @test35(i16 %a) {
- %c1 = bitcast i16 %a to i16 ; <i16> [#uses=1]
- %tmp2 = lshr i16 %c1, 8 ; <i16> [#uses=1]
- %c2 = bitcast i16 %tmp2 to i16 ; <i16> [#uses=1]
- ret i16 %c2
-; CHECK: %tmp2 = lshr i16 %a, 8
-; CHECK: ret i16 %tmp2
+; CHECK-LABEL: @test35(
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i16 %a, 8
+; CHECK-NEXT: ret i16 [[TMP2]]
+;
+ %c1 = bitcast i16 %a to i16 ; <i16> [#uses=1]
+ %tmp2 = lshr i16 %c1, 8 ; <i16> [#uses=1]
+ %c2 = bitcast i16 %tmp2 to i16 ; <i16> [#uses=1]
+ ret i16 %c2
}
; icmp sgt i32 %a, -1
; rdar://6480391
define i1 @test36(i32 %a) {
- %b = lshr i32 %a, 31
- %c = trunc i32 %b to i8
- %d = icmp eq i8 %c, 0
- ret i1 %d
-; CHECK: %d = icmp sgt i32 %a, -1
-; CHECK: ret i1 %d
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: [[D:%.*]] = icmp sgt i32 %a, -1
+; CHECK-NEXT: ret i1 [[D]]
+;
+ %b = lshr i32 %a, 31
+ %c = trunc i32 %b to i8
+ %d = icmp eq i8 %c, 0
+ ret i1 %d
}
; ret i1 false
define i1 @test37(i32 %a) {
- %b = lshr i32 %a, 31
- %c = or i32 %b, 512
- %d = trunc i32 %c to i8
- %e = icmp eq i8 %d, 11
- ret i1 %e
-; CHECK: ret i1 false
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: ret i1 false
+;
+ %b = lshr i32 %a, 31
+ %c = or i32 %b, 512
+ %d = trunc i32 %c to i8
+ %e = icmp eq i8 %d, 11
+ ret i1 %e
}
define i64 @test38(i32 %a) {
- %1 = icmp eq i32 %a, -2
- %2 = zext i1 %1 to i8
- %3 = xor i8 %2, 1
- %4 = zext i8 %3 to i64
- ret i64 %4
-; CHECK: %1 = icmp ne i32 %a, -2
-; CHECK: %2 = zext i1 %1 to i64
-; CHECK: ret i64 %2
+; CHECK-LABEL: @test38(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 %a, -2
+; CHECK-NEXT: [[TMP2:%.*]] = zext i1 [[TMP1]] to i64
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
+ %1 = icmp eq i32 %a, -2
+ %2 = zext i1 %1 to i8
+ %3 = xor i8 %2, 1
+ %4 = zext i8 %3 to i64
+ ret i64 %4
}
define i16 @test39(i16 %a) {
- %tmp = zext i16 %a to i32
- %tmp21 = lshr i32 %tmp, 8
- %tmp5 = shl i32 %tmp, 8
- %tmp.upgrd.32 = or i32 %tmp21, %tmp5
- %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
- ret i16 %tmp.upgrd.3
; CHECK-LABEL: @test39(
-; CHECK: %tmp.upgrd.32 = call i16 @llvm.bswap.i16(i16 %a)
-; CHECK: ret i16 %tmp.upgrd.32
+; CHECK-NEXT: [[TMP_UPGRD_32:%.*]] = call i16 @llvm.bswap.i16(i16 %a)
+; CHECK-NEXT: ret i16 [[TMP_UPGRD_32]]
+;
+ %tmp = zext i16 %a to i32
+ %tmp21 = lshr i32 %tmp, 8
+ %tmp5 = shl i32 %tmp, 8
+ %tmp.upgrd.32 = or i32 %tmp21, %tmp5
+ %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
+ ret i16 %tmp.upgrd.3
}
define i16 @test40(i16 %a) {
- %tmp = zext i16 %a to i32
- %tmp21 = lshr i32 %tmp, 9
- %tmp5 = shl i32 %tmp, 8
- %tmp.upgrd.32 = or i32 %tmp21, %tmp5
- %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
- ret i16 %tmp.upgrd.3
; CHECK-LABEL: @test40(
-; CHECK: %tmp21 = lshr i16 %a, 9
-; CHECK: %tmp5 = shl i16 %a, 8
-; CHECK: %tmp.upgrd.32 = or i16 %tmp21, %tmp5
-; CHECK: ret i16 %tmp.upgrd.32
+; CHECK-NEXT: [[TMP21:%.*]] = lshr i16 %a, 9
+; CHECK-NEXT: [[TMP5:%.*]] = shl i16 %a, 8
+; CHECK-NEXT: [[TMP_UPGRD_32:%.*]] = or i16 [[TMP21]], [[TMP5]]
+; CHECK-NEXT: ret i16 [[TMP_UPGRD_32]]
+;
+ %tmp = zext i16 %a to i32
+ %tmp21 = lshr i32 %tmp, 9
+ %tmp5 = shl i32 %tmp, 8
+ %tmp.upgrd.32 = or i32 %tmp21, %tmp5
+ %tmp.upgrd.3 = trunc i32 %tmp.upgrd.32 to i16
+ ret i16 %tmp.upgrd.3
}
; PR1263
define i32* @test41(i32* %tmp1) {
- %tmp64 = bitcast i32* %tmp1 to { i32 }*
- %tmp65 = getelementptr { i32 }, { i32 }* %tmp64, i32 0, i32 0
- ret i32* %tmp65
; CHECK-LABEL: @test41(
-; CHECK: ret i32* %tmp1
+; CHECK-NEXT: ret i32* %tmp1
+;
+ %tmp64 = bitcast i32* %tmp1 to { i32 }*
+ %tmp65 = getelementptr { i32 }, { i32 }* %tmp64, i32 0, i32 0
+ ret i32* %tmp65
}
define i32 addrspace(1)* @test41_addrspacecast_smaller(i32* %tmp1) {
+; CHECK-LABEL: @test41_addrspacecast_smaller(
+; CHECK-NEXT: [[TMP65:%.*]] = addrspacecast i32* %tmp1 to i32 addrspace(1)*
+; CHECK-NEXT: ret i32 addrspace(1)* [[TMP65]]
+;
%tmp64 = addrspacecast i32* %tmp1 to { i32 } addrspace(1)*
%tmp65 = getelementptr { i32 }, { i32 } addrspace(1)* %tmp64, i32 0, i32 0
ret i32 addrspace(1)* %tmp65
-; CHECK-LABEL: @test41_addrspacecast_smaller(
-; CHECK: addrspacecast i32* %tmp1 to i32 addrspace(1)*
-; CHECK-NEXT: ret i32 addrspace(1)*
}
define i32* @test41_addrspacecast_larger(i32 addrspace(1)* %tmp1) {
+; CHECK-LABEL: @test41_addrspacecast_larger(
+; CHECK-NEXT: [[TMP65:%.*]] = addrspacecast i32 addrspace(1)* %tmp1 to i32*
+; CHECK-NEXT: ret i32* [[TMP65]]
+;
%tmp64 = addrspacecast i32 addrspace(1)* %tmp1 to { i32 }*
%tmp65 = getelementptr { i32 }, { i32 }* %tmp64, i32 0, i32 0
ret i32* %tmp65
-; CHECK-LABEL: @test41_addrspacecast_larger(
-; CHECK: addrspacecast i32 addrspace(1)* %tmp1 to i32*
-; CHECK-NEXT: ret i32*
}
define i32 @test42(i32 %X) {
- %Y = trunc i32 %X to i8 ; <i8> [#uses=1]
- %Z = zext i8 %Y to i32 ; <i32> [#uses=1]
- ret i32 %Z
; CHECK-LABEL: @test42(
-; CHECK: %Z = and i32 %X, 255
+; CHECK-NEXT: [[Z:%.*]] = and i32 %X, 255
+; CHECK-NEXT: ret i32 [[Z]]
+;
+ %Y = trunc i32 %X to i8 ; <i8> [#uses=1]
+ %Z = zext i8 %Y to i32 ; <i32> [#uses=1]
+ ret i32 %Z
}
; rdar://6598839
define zeroext i64 @test43(i8 zeroext %on_off) nounwind readonly {
- %A = zext i8 %on_off to i32
- %B = add i32 %A, -1
- %C = sext i32 %B to i64
- ret i64 %C ;; Should be (add (zext i8 -> i64), -1)
; CHECK-LABEL: @test43(
-; CHECK-NEXT: %A = zext i8 %on_off to i64
-; CHECK-NEXT: %B = add nsw i64 %A, -1
-; CHECK-NEXT: ret i64 %B
+; CHECK-NEXT: [[A:%.*]] = zext i8 %on_off to i64
+; CHECK-NEXT: [[B:%.*]] = add nsw i64 [[A]], -1
+; CHECK-NEXT: ret i64 [[B]]
+;
+ %A = zext i8 %on_off to i32
+ %B = add i32 %A, -1
+ %C = sext i32 %B to i64
+ ret i64 %C ;; Should be (add (zext i8 -> i64), -1)
}
define i64 @test44(i8 %T) {
- %A = zext i8 %T to i16
- %B = or i16 %A, 1234
- %C = zext i16 %B to i64
- ret i64 %C
; CHECK-LABEL: @test44(
-; CHECK-NEXT: %A = zext i8 %T to i64
-; CHECK-NEXT: %B = or i64 %A, 1234
-; CHECK-NEXT: ret i64 %B
+; CHECK-NEXT: [[A:%.*]] = zext i8 %T to i64
+; CHECK-NEXT: [[B:%.*]] = or i64 [[A]], 1234
+; CHECK-NEXT: ret i64 [[B]]
+;
+ %A = zext i8 %T to i16
+ %B = or i16 %A, 1234
+ %C = zext i16 %B to i64
+ ret i64 %C
}
define i64 @test45(i8 %A, i64 %Q) {
- %D = trunc i64 %Q to i32 ;; should be removed
- %B = sext i8 %A to i32
- %C = or i32 %B, %D
- %E = zext i32 %C to i64
- ret i64 %E
; CHECK-LABEL: @test45(
-; CHECK-NEXT: %B = sext i8 %A to i64
-; CHECK-NEXT: %C = or i64 %B, %Q
-; CHECK-NEXT: %E = and i64 %C, 4294967295
-; CHECK-NEXT: ret i64 %E
+; CHECK-NEXT: [[B:%.*]] = sext i8 %A to i64
+; CHECK-NEXT: [[C:%.*]] = or i64 [[B]], %Q
+; CHECK-NEXT: [[E:%.*]] = and i64 [[C]], 4294967295
+; CHECK-NEXT: ret i64 [[E]]
+;
+ %D = trunc i64 %Q to i32 ;; should be removed
+ %B = sext i8 %A to i32
+ %C = or i32 %B, %D
+ %E = zext i32 %C to i64
+ ret i64 %E
}
define i64 @test46(i64 %A) {
- %B = trunc i64 %A to i32
- %C = and i32 %B, 42
- %D = shl i32 %C, 8
- %E = zext i32 %D to i64
- ret i64 %E
; CHECK-LABEL: @test46(
-; CHECK-NEXT: %C = shl i64 %A, 8
-; CHECK-NEXT: %D = and i64 %C, 10752
-; CHECK-NEXT: ret i64 %D
+; CHECK-NEXT: [[C:%.*]] = shl i64 %A, 8
+; CHECK-NEXT: [[D:%.*]] = and i64 [[C]], 10752
+; CHECK-NEXT: ret i64 [[D]]
+;
+ %B = trunc i64 %A to i32
+ %C = and i32 %B, 42
+ %D = shl i32 %C, 8
+ %E = zext i32 %D to i64
+ ret i64 %E
}
define i64 @test47(i8 %A) {
- %B = sext i8 %A to i32
- %C = or i32 %B, 42
- %E = zext i32 %C to i64
- ret i64 %E
; CHECK-LABEL: @test47(
-; CHECK-NEXT: %B = sext i8 %A to i64
-; CHECK-NEXT: %C = and i64 %B, 4294967253
-; CHECK-NEXT: %E = or i64 %C, 42
-; CHECK-NEXT: ret i64 %E
+; CHECK-NEXT: [[B:%.*]] = sext i8 %A to i64
+; CHECK-NEXT: [[C:%.*]] = and i64 [[B]], 4294967253
+; CHECK-NEXT: [[E:%.*]] = or i64 [[C]], 42
+; CHECK-NEXT: ret i64 [[E]]
+;
+ %B = sext i8 %A to i32
+ %C = or i32 %B, 42
+ %E = zext i32 %C to i64
+ ret i64 %E
}
-define i64 @test48(i8 %A, i8 %a) {
- %b = zext i8 %a to i32
- %B = zext i8 %A to i32
- %C = shl i32 %B, 8
- %D = or i32 %C, %b
+define i64 @test48(i8 %A1, i8 %a2) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT: [[Z2:%.*]] = zext i8 %A1 to i32
+; CHECK-NEXT: [[C:%.*]] = shl nuw nsw i32 [[Z2]], 8
+; CHECK-NEXT: [[D:%.*]] = or i32 [[C]], [[Z2]]
+; CHECK-NEXT: [[E:%.*]] = zext i32 [[D]] to i64
+; CHECK-NEXT: ret i64 [[E]]
+;
+ %Z1 = zext i8 %a2 to i32
+ %Z2 = zext i8 %A1 to i32
+ %C = shl i32 %Z2, 8
+ %D = or i32 %C, %Z2
%E = zext i32 %D to i64
ret i64 %E
-; CHECK-LABEL: @test48(
-; CHECK-NEXT: %b = zext i8 %a to i64
-; CHECK-NEXT: %B = zext i8 %A to i64
-; CHECK-NEXT: %C = shl nuw nsw i64 %B, 8
-; CHECK-NEXT: %D = or i64 %C, %b
-; CHECK-NEXT: ret i64 %D
}
define i64 @test49(i64 %A) {
- %B = trunc i64 %A to i32
- %C = or i32 %B, 1
- %D = sext i32 %C to i64
- ret i64 %D
; CHECK-LABEL: @test49(
-; CHECK-NEXT: %C = shl i64 %A, 32
-; CHECK-NEXT: ashr exact i64 %C, 32
-; CHECK-NEXT: %D = or i64 {{.*}}, 1
-; CHECK-NEXT: ret i64 %D
+; CHECK-NEXT: [[C:%.*]] = shl i64 %A, 32
+; CHECK-NEXT: [[SEXT:%.*]] = ashr exact i64 [[C]], 32
+; CHECK-NEXT: [[D:%.*]] = or i64 [[SEXT]], 1
+; CHECK-NEXT: ret i64 [[D]]
+;
+ %B = trunc i64 %A to i32
+ %C = or i32 %B, 1
+ %D = sext i32 %C to i64
+ ret i64 %D
}
define i64 @test50(i64 %A) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT: [[A:%.*]] = lshr i64 %A, 2
+; CHECK-NEXT: [[D:%.*]] = shl i64 [[A]], 32
+; CHECK-NEXT: [[SEXT:%.*]] = add i64 [[D]], -4294967296
+; CHECK-NEXT: [[E:%.*]] = ashr exact i64 [[SEXT]], 32
+; CHECK-NEXT: ret i64 [[E]]
+;
%a = lshr i64 %A, 2
%B = trunc i64 %a to i32
%D = add i32 %B, -1
%E = sext i32 %D to i64
ret i64 %E
-; CHECK-LABEL: @test50(
; lshr+shl will be handled by DAGCombine.
-; CHECK-NEXT: lshr i64 %A, 2
-; CHECK-NEXT: shl i64 %a, 32
-; CHECK-NEXT: add i64 {{.*}}, -4294967296
-; CHECK-NEXT: %E = ashr exact i64 {{.*}}, 32
-; CHECK-NEXT: ret i64 %E
}
define i64 @test51(i64 %A, i1 %cond) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT: [[C:%.*]] = and i64 %A, 4294967294
+; CHECK-NEXT: [[D:%.*]] = or i64 %A, 1
+; CHECK-NEXT: [[E:%.*]] = select i1 %cond, i64 [[C]], i64 [[D]]
+; CHECK-NEXT: [[SEXT:%.*]] = shl i64 [[E]], 32
+; CHECK-NEXT: [[F:%.*]] = ashr exact i64 [[SEXT]], 32
+; CHECK-NEXT: ret i64 [[F]]
+;
%B = trunc i64 %A to i32
%C = and i32 %B, -2
%D = or i32 %B, 1
%E = select i1 %cond, i32 %C, i32 %D
%F = sext i32 %E to i64
ret i64 %F
-; CHECK-LABEL: @test51(
-; CHECK-NEXT: %C = and i64 %A, 4294967294
-; CHECK-NEXT: %D = or i64 %A, 1
-; CHECK-NEXT: %E = select i1 %cond, i64 %C, i64 %D
-; CHECK-NEXT: %sext = shl i64 %E, 32
-; CHECK-NEXT: %F = ashr exact i64 %sext, 32
-; CHECK-NEXT: ret i64 %F
}
define i32 @test52(i64 %A) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT: [[B:%.*]] = trunc i64 %A to i32
+; CHECK-NEXT: [[C:%.*]] = and i32 [[B]], 7224
+; CHECK-NEXT: [[D:%.*]] = or i32 [[C]], 32962
+; CHECK-NEXT: ret i32 [[D]]
+;
%B = trunc i64 %A to i16
%C = or i16 %B, -32574
%D = and i16 %C, -25350
%E = zext i16 %D to i32
ret i32 %E
-; CHECK-LABEL: @test52(
-; CHECK-NEXT: %B = trunc i64 %A to i32
-; CHECK-NEXT: %C = and i32 %B, 7224
-; CHECK-NEXT: %D = or i32 %C, 32962
-; CHECK-NEXT: ret i32 %D
}
define i64 @test53(i32 %A) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT: [[B:%.*]] = zext i32 %A to i64
+; CHECK-NEXT: [[C:%.*]] = and i64 [[B]], 7224
+; CHECK-NEXT: [[D:%.*]] = or i64 [[C]], 32962
+; CHECK-NEXT: ret i64 [[D]]
+;
%B = trunc i32 %A to i16
%C = or i16 %B, -32574
%D = and i16 %C, -25350
%E = zext i16 %D to i64
ret i64 %E
-; CHECK-LABEL: @test53(
-; CHECK-NEXT: %B = zext i32 %A to i64
-; CHECK-NEXT: %C = and i64 %B, 7224
-; CHECK-NEXT: %D = or i64 %C, 32962
-; CHECK-NEXT: ret i64 %D
}
define i32 @test54(i64 %A) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT: [[B:%.*]] = trunc i64 %A to i32
+; CHECK-NEXT: [[C:%.*]] = and i32 [[B]], 7224
+; CHECK-NEXT: [[D:%.*]] = or i32 [[C]], -32574
+; CHECK-NEXT: ret i32 [[D]]
+;
%B = trunc i64 %A to i16
%C = or i16 %B, -32574
%D = and i16 %C, -25350
%E = sext i16 %D to i32
ret i32 %E
-; CHECK-LABEL: @test54(
-; CHECK-NEXT: %B = trunc i64 %A to i32
-; CHECK-NEXT: %C = and i32 %B, 7224
-; CHECK-NEXT: %D = or i32 %C, -32574
-; CHECK-NEXT: ret i32 %D
}
define i64 @test55(i32 %A) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT: [[B:%.*]] = zext i32 %A to i64
+; CHECK-NEXT: [[C:%.*]] = and i64 [[B]], 7224
+; CHECK-NEXT: [[D:%.*]] = or i64 [[C]], -32574
+; CHECK-NEXT: ret i64 [[D]]
+;
%B = trunc i32 %A to i16
%C = or i16 %B, -32574
%D = and i16 %C, -25350
%E = sext i16 %D to i64
ret i64 %E
-; CHECK-LABEL: @test55(
-; CHECK-NEXT: %B = zext i32 %A to i64
-; CHECK-NEXT: %C = and i64 %B, 7224
-; CHECK-NEXT: %D = or i64 %C, -32574
-; CHECK-NEXT: ret i64 %D
}
define i64 @test56(i16 %A) nounwind {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT: [[TMP353:%.*]] = sext i16 %A to i64
+; CHECK-NEXT: [[TMP354:%.*]] = lshr i64 [[TMP353]], 5
+; CHECK-NEXT: [[TMP355:%.*]] = and i64 [[TMP354]], 134217727
+; CHECK-NEXT: ret i64 [[TMP355]]
+;
%tmp353 = sext i16 %A to i32
%tmp354 = lshr i32 %tmp353, 5
%tmp355 = zext i32 %tmp354 to i64
ret i64 %tmp355
-; CHECK-LABEL: @test56(
-; CHECK-NEXT: %tmp353 = sext i16 %A to i64
-; CHECK-NEXT: %tmp354 = lshr i64 %tmp353, 5
-; CHECK-NEXT: %tmp355 = and i64 %tmp354, 134217727
-; CHECK-NEXT: ret i64 %tmp355
}
define i64 @test57(i64 %A) nounwind {
- %B = trunc i64 %A to i32
- %C = lshr i32 %B, 8
- %E = zext i32 %C to i64
- ret i64 %E
; CHECK-LABEL: @test57(
-; CHECK-NEXT: %C = lshr i64 %A, 8
-; CHECK-NEXT: %E = and i64 %C, 16777215
-; CHECK-NEXT: ret i64 %E
+; CHECK-NEXT: [[C:%.*]] = lshr i64 %A, 8
+; CHECK-NEXT: [[E:%.*]] = and i64 [[C]], 16777215
+; CHECK-NEXT: ret i64 [[E]]
+;
+ %B = trunc i64 %A to i32
+ %C = lshr i32 %B, 8
+ %E = zext i32 %C to i64
+ ret i64 %E
}
define i64 @test58(i64 %A) nounwind {
- %B = trunc i64 %A to i32
- %C = lshr i32 %B, 8
- %D = or i32 %C, 128
- %E = zext i32 %D to i64
- ret i64 %E
-
; CHECK-LABEL: @test58(
-; CHECK-NEXT: %C = lshr i64 %A, 8
-; CHECK-NEXT: %D = and i64 %C, 16777087
-; CHECK-NEXT: %E = or i64 %D, 128
-; CHECK-NEXT: ret i64 %E
+; CHECK-NEXT: [[C:%.*]] = lshr i64 %A, 8
+; CHECK-NEXT: [[D:%.*]] = and i64 [[C]], 16777087
+; CHECK-NEXT: [[E:%.*]] = or i64 [[D]], 128
+; CHECK-NEXT: ret i64 [[E]]
+;
+ %B = trunc i64 %A to i32
+ %C = lshr i32 %B, 8
+ %D = or i32 %C, 128
+ %E = zext i32 %D to i64
+ ret i64 %E
+
}
define i64 @test59(i8 %A, i8 %B) nounwind {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT: [[C:%.*]] = zext i8 %A to i64
+; CHECK-NEXT: [[D:%.*]] = shl nuw nsw i64 [[C]], 4
+; CHECK-NEXT: [[E:%.*]] = and i64 [[D]], 48
+; CHECK-NEXT: [[F:%.*]] = zext i8 %B to i64
+; CHECK-NEXT: [[G:%.*]] = lshr i64 [[F]], 4
+; CHECK-NEXT: [[H:%.*]] = or i64 [[G]], [[E]]
+; CHECK-NEXT: ret i64 [[H]]
+;
%C = zext i8 %A to i32
%D = shl i32 %C, 4
%E = and i32 %D, 48
@@ -608,107 +735,117 @@ define i64 @test59(i8 %A, i8 %B) nounwind {
%H = or i32 %G, %E
%I = zext i32 %H to i64
ret i64 %I
-; CHECK-LABEL: @test59(
-; CHECK-NEXT: %C = zext i8 %A to i64
-; CHECK-NOT: i32
-; CHECK: %F = zext i8 %B to i64
-; CHECK-NOT: i32
-; CHECK: ret i64 %H
}
define <3 x i32> @test60(<4 x i32> %call4) nounwind {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <4 x i32> %call4, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <3 x i32> [[TMP10]]
+;
%tmp11 = bitcast <4 x i32> %call4 to i128
%tmp9 = trunc i128 %tmp11 to i96
%tmp10 = bitcast i96 %tmp9 to <3 x i32>
ret <3 x i32> %tmp10
-
-; CHECK-LABEL: @test60(
-; CHECK-NEXT: shufflevector
-; CHECK-NEXT: ret
+
}
define <4 x i32> @test61(<3 x i32> %call4) nounwind {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <3 x i32> %call4, <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i32> [[TMP10]]
+;
%tmp11 = bitcast <3 x i32> %call4 to i96
%tmp9 = zext i96 %tmp11 to i128
%tmp10 = bitcast i128 %tmp9 to <4 x i32>
ret <4 x i32> %tmp10
-; CHECK-LABEL: @test61(
-; CHECK-NEXT: shufflevector
-; CHECK-NEXT: ret
}
define <4 x i32> @test62(<3 x float> %call4) nounwind {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <3 x float> %call4 to <3 x i32>
+; CHECK-NEXT: [[TMP10:%.*]] = shufflevector <3 x i32> [[TMP1]], <3 x i32> <i32 0, i32 undef, i32 undef>, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i32> [[TMP10]]
+;
%tmp11 = bitcast <3 x float> %call4 to i96
%tmp9 = zext i96 %tmp11 to i128
%tmp10 = bitcast i128 %tmp9 to <4 x i32>
ret <4 x i32> %tmp10
-; CHECK-LABEL: @test62(
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: shufflevector
-; CHECK-NEXT: ret
}
; PR7311 - Don't create invalid IR on scalar->vector cast.
define <2 x float> @test63(i64 %tmp8) nounwind {
+; CHECK-LABEL: @test63(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[A:%.*]] = bitcast i64 %tmp8 to <2 x i32>
+; CHECK-NEXT: [[VCVT_I:%.*]] = uitofp <2 x i32> [[A]] to <2 x float>
+; CHECK-NEXT: ret <2 x float> [[VCVT_I]]
+;
entry:
- %a = bitcast i64 %tmp8 to <2 x i32>
- %vcvt.i = uitofp <2 x i32> %a to <2 x float>
+ %a = bitcast i64 %tmp8 to <2 x i32>
+ %vcvt.i = uitofp <2 x i32> %a to <2 x float>
ret <2 x float> %vcvt.i
-; CHECK-LABEL: @test63(
-; CHECK: bitcast
-; CHECK: uitofp
}
define <4 x float> @test64(<4 x float> %c) nounwind {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT: ret <4 x float> %c
+;
%t0 = bitcast <4 x float> %c to <4 x i32>
%t1 = bitcast <4 x i32> %t0 to <4 x float>
ret <4 x float> %t1
-; CHECK-LABEL: @test64(
-; CHECK-NEXT: ret <4 x float> %c
}
define <4 x float> @test65(<4 x float> %c) nounwind {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT: ret <4 x float> %c
+;
%t0 = bitcast <4 x float> %c to <2 x double>
%t1 = bitcast <2 x double> %t0 to <4 x float>
ret <4 x float> %t1
-; CHECK-LABEL: @test65(
-; CHECK-NEXT: ret <4 x float> %c
}
define <2 x float> @test66(<2 x float> %c) nounwind {
+; CHECK-LABEL: @test66(
+; CHECK-NEXT: ret <2 x float> %c
+;
%t0 = bitcast <2 x float> %c to double
%t1 = bitcast double %t0 to <2 x float>
ret <2 x float> %t1
-; CHECK-LABEL: @test66(
-; CHECK-NEXT: ret <2 x float> %c
}
define float @test2c() {
- ret float extractelement (<2 x float> bitcast (double bitcast (<2 x float> <float -1.000000e+00, float -1.000000e+00> to double) to <2 x float>), i32 0)
; CHECK-LABEL: @test2c(
-; CHECK-NOT: extractelement
+; CHECK-NEXT: ret float -1.000000e+00
+;
+ ret float extractelement (<2 x float> bitcast (double bitcast (<2 x float> <float -1.000000e+00, float -1.000000e+00> to double) to <2 x float>), i32 0)
}
define i64 @test_mmx(<2 x i32> %c) nounwind {
+; CHECK-LABEL: @test_mmx(
+; CHECK-NEXT: [[C:%.*]] = bitcast <2 x i32> %c to i64
+; CHECK-NEXT: ret i64 [[C]]
+;
%A = bitcast <2 x i32> %c to x86_mmx
%B = bitcast x86_mmx %A to <2 x i32>
%C = bitcast <2 x i32> %B to i64
ret i64 %C
-; CHECK-LABEL: @test_mmx(
-; CHECK-NOT: x86_mmx
}
define i64 @test_mmx_const(<2 x i32> %c) nounwind {
+; CHECK-LABEL: @test_mmx_const(
+; CHECK-NEXT: ret i64 0
+;
%A = bitcast <2 x i32> zeroinitializer to x86_mmx
%B = bitcast x86_mmx %A to <2 x i32>
%C = bitcast <2 x i32> %B to i64
ret i64 %C
-; CHECK-LABEL: @test_mmx_const(
-; CHECK-NOT: x86_mmx
}
; PR12514
define i1 @test67(i1 %a, i32 %b) {
+; CHECK-LABEL: @test67(
+; CHECK-NEXT: ret i1 false
+;
%tmp2 = zext i1 %a to i32
%conv6 = xor i32 %tmp2, 1
%and = and i32 %b, %conv6
@@ -718,31 +855,31 @@ define i1 @test67(i1 %a, i32 %b) {
%trunc = trunc i32 %conv.i.i to i8
%tobool.i = icmp eq i8 %trunc, 0
ret i1 %tobool.i
-; CHECK-LABEL: @test67(
-; CHECK: ret i1 false
}
%s = type { i32, i32, i16 }
define %s @test68(%s *%p, i64 %i) {
; CHECK-LABEL: @test68(
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr %s, %s* %p, i64 %i
+; CHECK-NEXT: [[L:%.*]] = load %s, %s* [[PP1]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%o = mul i64 %i, 12
%q = bitcast %s* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %o
-; CHECK-NEXT: getelementptr %s, %s*
%r = bitcast i8* %pp to %s*
%l = load %s, %s* %r
-; CHECK-NEXT: load %s, %s*
ret %s %l
-; CHECK-NEXT: ret %s
}
; addrspacecasts should be eliminated.
define %s @test68_addrspacecast(%s* %p, i64 %i) {
; CHECK-LABEL: @test68_addrspacecast(
-; CHECK-NEXT: getelementptr %s, %s*
-; CHECK-NEXT: load %s, %s*
-; CHECK-NEXT: ret %s
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr %s, %s* %p, i64 %i
+; CHECK-NEXT: [[L:%.*]] = load %s, %s* [[PP1]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%o = mul i64 %i, 12
%q = addrspacecast %s* %p to i8 addrspace(2)*
%pp = getelementptr inbounds i8, i8 addrspace(2)* %q, i64 %o
@@ -753,10 +890,11 @@ define %s @test68_addrspacecast(%s* %p, i64 %i) {
define %s @test68_addrspacecast_2(%s* %p, i64 %i) {
; CHECK-LABEL: @test68_addrspacecast_2(
-; CHECK-NEXT: getelementptr %s, %s* %p
-; CHECK-NEXT: addrspacecast
-; CHECK-NEXT: load %s, %s addrspace(1)*
-; CHECK-NEXT: ret %s
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr %s, %s* %p, i64 %i
+; CHECK-NEXT: [[R:%.*]] = addrspacecast %s* [[PP1]] to %s addrspace(1)*
+; CHECK-NEXT: [[L:%.*]] = load %s, %s addrspace(1)* [[R]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%o = mul i64 %i, 12
%q = addrspacecast %s* %p to i8 addrspace(2)*
%pp = getelementptr inbounds i8, i8 addrspace(2)* %q, i64 %o
@@ -767,106 +905,118 @@ define %s @test68_addrspacecast_2(%s* %p, i64 %i) {
define %s @test68_as1(%s addrspace(1)* %p, i32 %i) {
; CHECK-LABEL: @test68_as1(
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr %s, %s addrspace(1)* %p, i32 %i
+; CHECK-NEXT: [[L:%.*]] = load %s, %s addrspace(1)* [[PP1]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%o = mul i32 %i, 12
%q = bitcast %s addrspace(1)* %p to i8 addrspace(1)*
%pp = getelementptr inbounds i8, i8 addrspace(1)* %q, i32 %o
-; CHECK-NEXT: getelementptr %s, %s addrspace(1)*
%r = bitcast i8 addrspace(1)* %pp to %s addrspace(1)*
%l = load %s, %s addrspace(1)* %r
-; CHECK-NEXT: load %s, %s addrspace(1)*
ret %s %l
-; CHECK-NEXT: ret %s
}
define double @test69(double *%p, i64 %i) {
; CHECK-LABEL: @test69(
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* %p, i64 %i
+; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%o = shl nsw i64 %i, 3
%q = bitcast double* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %o
-; CHECK-NEXT: getelementptr inbounds double, double*
%r = bitcast i8* %pp to double*
%l = load double, double* %r
-; CHECK-NEXT: load double, double*
ret double %l
-; CHECK-NEXT: ret double
}
define %s @test70(%s *%p, i64 %i) {
; CHECK-LABEL: @test70(
+; CHECK-NEXT: [[O:%.*]] = mul nsw i64 %i, 3
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds %s, %s* %p, i64 [[O]]
+; CHECK-NEXT: [[L:%.*]] = load %s, %s* [[PP1]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%o = mul nsw i64 %i, 36
-; CHECK-NEXT: mul nsw i64 %i, 3
%q = bitcast %s* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %o
-; CHECK-NEXT: getelementptr inbounds %s, %s*
%r = bitcast i8* %pp to %s*
%l = load %s, %s* %r
-; CHECK-NEXT: load %s, %s*
ret %s %l
-; CHECK-NEXT: ret %s
}
define double @test71(double *%p, i64 %i) {
; CHECK-LABEL: @test71(
+; CHECK-NEXT: [[O:%.*]] = shl i64 %i, 2
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr double, double* %p, i64 [[O]]
+; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%o = shl i64 %i, 5
-; CHECK-NEXT: shl i64 %i, 2
%q = bitcast double* %p to i8*
%pp = getelementptr i8, i8* %q, i64 %o
-; CHECK-NEXT: getelementptr double, double*
%r = bitcast i8* %pp to double*
%l = load double, double* %r
-; CHECK-NEXT: load double, double*
ret double %l
-; CHECK-NEXT: ret double
}
define double @test72(double *%p, i32 %i) {
; CHECK-LABEL: @test72(
+; CHECK-NEXT: [[O:%.*]] = sext i32 %i to i64
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* %p, i64 [[O]]
+; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%so = shl nsw i32 %i, 3
%o = sext i32 %so to i64
-; CHECK-NEXT: sext i32 %i to i64
%q = bitcast double* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %o
-; CHECK-NEXT: getelementptr inbounds double, double*
%r = bitcast i8* %pp to double*
%l = load double, double* %r
-; CHECK-NEXT: load double, double*
ret double %l
-; CHECK-NEXT: ret double
}
define double @test73(double *%p, i128 %i) {
; CHECK-LABEL: @test73(
+; CHECK-NEXT: [[O:%.*]] = trunc i128 %i to i64
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr double, double* %p, i64 [[O]]
+; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%lo = shl nsw i128 %i, 3
%o = trunc i128 %lo to i64
-; CHECK-NEXT: trunc i128 %i to i64
%q = bitcast double* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %o
-; CHECK-NEXT: getelementptr double, double*
%r = bitcast i8* %pp to double*
%l = load double, double* %r
-; CHECK-NEXT: load double, double*
ret double %l
-; CHECK-NEXT: ret double
}
define double @test74(double *%p, i64 %i) {
; CHECK-LABEL: @test74(
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds double, double* %p, i64 %i
+; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%q = bitcast double* %p to i64*
%pp = getelementptr inbounds i64, i64* %q, i64 %i
-; CHECK-NEXT: getelementptr inbounds double, double*
%r = bitcast i64* %pp to double*
%l = load double, double* %r
-; CHECK-NEXT: load double, double*
ret double %l
-; CHECK-NEXT: ret double
}
define i32* @test75(i32* %p, i32 %x) {
; CHECK-LABEL: @test75(
+; CHECK-NEXT: [[Y:%.*]] = shl i32 %x, 3
+; CHECK-NEXT: [[Z:%.*]] = sext i32 [[Y]] to i64
+; CHECK-NEXT: [[Q:%.*]] = bitcast i32* %p to i8*
+; CHECK-NEXT: [[R:%.*]] = getelementptr i8, i8* [[Q]], i64 [[Z]]
+; CHECK-NEXT: [[S:%.*]] = bitcast i8* [[R]] to i32*
+; CHECK-NEXT: ret i32* [[S]]
+;
%y = shl i32 %x, 3
-; CHECK-NEXT: shl i32 %x, 3
%z = sext i32 %y to i64
-; CHECK-NEXT: sext i32 %y to i64
%q = bitcast i32* %p to i8*
%r = getelementptr i8, i8* %q, i64 %z
%s = bitcast i8* %r to i32*
@@ -875,71 +1025,82 @@ define i32* @test75(i32* %p, i32 %x) {
define %s @test76(%s *%p, i64 %i, i64 %j) {
; CHECK-LABEL: @test76(
+; CHECK-NEXT: [[O2:%.*]] = mul i64 %i, %j
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr %s, %s* %p, i64 [[O2]]
+; CHECK-NEXT: [[L:%.*]] = load %s, %s* [[PP1]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%o = mul i64 %i, 12
%o2 = mul nsw i64 %o, %j
-; CHECK-NEXT: %o2 = mul i64 %i, %j
%q = bitcast %s* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %o2
-; CHECK-NEXT: getelementptr %s, %s* %p, i64 %o2
%r = bitcast i8* %pp to %s*
%l = load %s, %s* %r
-; CHECK-NEXT: load %s, %s*
ret %s %l
-; CHECK-NEXT: ret %s
}
define %s @test77(%s *%p, i64 %i, i64 %j) {
; CHECK-LABEL: @test77(
+; CHECK-NEXT: [[O:%.*]] = mul nsw i64 %i, 3
+; CHECK-NEXT: [[O2:%.*]] = mul nsw i64 [[O]], %j
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr inbounds %s, %s* %p, i64 [[O2]]
+; CHECK-NEXT: [[L:%.*]] = load %s, %s* [[PP1]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%o = mul nsw i64 %i, 36
%o2 = mul nsw i64 %o, %j
-; CHECK-NEXT: %o = mul nsw i64 %i, 3
-; CHECK-NEXT: %o2 = mul nsw i64 %o, %j
%q = bitcast %s* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %o2
-; CHECK-NEXT: getelementptr inbounds %s, %s* %p, i64 %o2
%r = bitcast i8* %pp to %s*
%l = load %s, %s* %r
-; CHECK-NEXT: load %s, %s*
ret %s %l
-; CHECK-NEXT: ret %s
}
define %s @test78(%s *%p, i64 %i, i64 %j, i32 %k, i32 %l, i128 %m, i128 %n) {
; CHECK-LABEL: @test78(
+; CHECK-NEXT: [[A:%.*]] = mul nsw i32 %k, 3
+; CHECK-NEXT: [[B:%.*]] = mul nsw i32 [[A]], %l
+; CHECK-NEXT: [[C:%.*]] = sext i32 [[B]] to i128
+; CHECK-NEXT: [[D:%.*]] = mul nsw i128 [[C]], %m
+; CHECK-NEXT: [[E:%.*]] = mul i128 [[D]], %n
+; CHECK-NEXT: [[F:%.*]] = trunc i128 [[E]] to i64
+; CHECK-NEXT: [[G:%.*]] = mul i64 [[F]], %i
+; CHECK-NEXT: [[H:%.*]] = mul i64 [[G]], %j
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr %s, %s* %p, i64 [[H]]
+; CHECK-NEXT: [[LOAD:%.*]] = load %s, %s* [[PP1]], align 4
+; CHECK-NEXT: ret %s [[LOAD]]
+;
%a = mul nsw i32 %k, 36
-; CHECK-NEXT: mul nsw i32 %k, 3
%b = mul nsw i32 %a, %l
-; CHECK-NEXT: mul nsw i32 %a, %l
%c = sext i32 %b to i128
-; CHECK-NEXT: sext i32 %b to i128
%d = mul nsw i128 %c, %m
-; CHECK-NEXT: mul nsw i128 %c, %m
%e = mul i128 %d, %n
-; CHECK-NEXT: mul i128 %d, %n
%f = trunc i128 %e to i64
-; CHECK-NEXT: trunc i128 %e to i64
%g = mul nsw i64 %f, %i
-; CHECK-NEXT: mul i64 %f, %i
%h = mul nsw i64 %g, %j
-; CHECK-NEXT: mul i64 %g, %j
%q = bitcast %s* %p to i8*
%pp = getelementptr inbounds i8, i8* %q, i64 %h
-; CHECK-NEXT: getelementptr %s, %s* %p, i64 %h
%r = bitcast i8* %pp to %s*
%load = load %s, %s* %r
-; CHECK-NEXT: load %s, %s*
ret %s %load
-; CHECK-NEXT: ret %s
}
define %s @test79(%s *%p, i64 %i, i32 %j) {
; CHECK-LABEL: @test79(
+; CHECK-NEXT: [[A:%.*]] = mul nsw i64 %i, 36
+; CHECK-NEXT: [[B:%.*]] = trunc i64 [[A]] to i32
+; CHECK-NEXT: [[C:%.*]] = mul i32 [[B]], %j
+; CHECK-NEXT: [[Q:%.*]] = bitcast %s* %p to i8*
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 [[C]] to i64
+; CHECK-NEXT: [[PP:%.*]] = getelementptr inbounds i8, i8* [[Q]], i64 [[TMP1]]
+; CHECK-NEXT: [[R:%.*]] = bitcast i8* [[PP]] to %s*
+; CHECK-NEXT: [[L:%.*]] = load %s, %s* [[R]], align 4
+; CHECK-NEXT: ret %s [[L]]
+;
%a = mul nsw i64 %i, 36
-; CHECK: mul nsw i64 %i, 36
%b = trunc i64 %a to i32
%c = mul i32 %b, %j
%q = bitcast %s* %p to i8*
-; CHECK: bitcast
%pp = getelementptr inbounds i8, i8* %q, i32 %c
%r = bitcast i8* %pp to %s*
%l = load %s, %s* %r
@@ -948,23 +1109,25 @@ define %s @test79(%s *%p, i64 %i, i32 %j) {
define double @test80([100 x double]* %p, i32 %i) {
; CHECK-LABEL: @test80(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i32 %i to i64
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double]* %p, i64 0, i64 [[TMP1]]
+; CHECK-NEXT: [[L:%.*]] = load double, double* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%tmp = shl nsw i32 %i, 3
-; CHECK-NEXT: sext i32 %i to i64
%q = bitcast [100 x double]* %p to i8*
%pp = getelementptr i8, i8* %q, i32 %tmp
-; CHECK-NEXT: getelementptr [100 x double], [100 x double]*
%r = bitcast i8* %pp to double*
%l = load double, double* %r
-; CHECK-NEXT: load double, double*
ret double %l
-; CHECK-NEXT: ret double
}
define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
; CHECK-LABEL: @test80_addrspacecast(
-; CHECK-NEXT: getelementptr [100 x double], [100 x double] addrspace(1)* %p
-; CHECK-NEXT: load double, double addrspace(1)*
-; CHECK-NEXT: ret double
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* %p, i32 0, i32 %i
+; CHECK-NEXT: [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%tmp = shl nsw i32 %i, 3
%q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
%pp = getelementptr i8, i8 addrspace(2)* %q, i32 %tmp
@@ -975,10 +1138,11 @@ define double @test80_addrspacecast([100 x double] addrspace(1)* %p, i32 %i) {
define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
; CHECK-LABEL: @test80_addrspacecast_2(
-; CHECK-NEXT: getelementptr [100 x double], [100 x double] addrspace(1)*
-; CHECK-NEXT: addrspacecast double addrspace(1)*
-; CHECK-NEXT: load double, double addrspace(3)*
-; CHECK-NEXT: ret double
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* %p, i32 0, i32 %i
+; CHECK-NEXT: [[R:%.*]] = addrspacecast double addrspace(1)* [[PP1]] to double addrspace(3)*
+; CHECK-NEXT: [[L:%.*]] = load double, double addrspace(3)* [[R]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%tmp = shl nsw i32 %i, 3
%q = addrspacecast [100 x double] addrspace(1)* %p to i8 addrspace(2)*
%pp = getelementptr i8, i8 addrspace(2)* %q, i32 %tmp
@@ -989,19 +1153,28 @@ define double @test80_addrspacecast_2([100 x double] addrspace(1)* %p, i32 %i) {
define double @test80_as1([100 x double] addrspace(1)* %p, i16 %i) {
; CHECK-LABEL: @test80_as1(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 %i to i32
+; CHECK-NEXT: [[PP1:%.*]] = getelementptr [100 x double], [100 x double] addrspace(1)* %p, i32 0, i32 [[TMP1]]
+; CHECK-NEXT: [[L:%.*]] = load double, double addrspace(1)* [[PP1]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%tmp = shl nsw i16 %i, 3
-; CHECK-NEXT: sext i16 %i to i32
%q = bitcast [100 x double] addrspace(1)* %p to i8 addrspace(1)*
%pp = getelementptr i8, i8 addrspace(1)* %q, i16 %tmp
-; CHECK-NEXT: getelementptr [100 x double], [100 x double] addrspace(1)*
%r = bitcast i8 addrspace(1)* %pp to double addrspace(1)*
%l = load double, double addrspace(1)* %r
-; CHECK-NEXT: load double, double addrspace(1)*
ret double %l
-; CHECK-NEXT: ret double
}
define double @test81(double *%p, float %f) {
+; CHECK-LABEL: @test81(
+; CHECK-NEXT: [[I:%.*]] = fptosi float %f to i64
+; CHECK-NEXT: [[Q:%.*]] = bitcast double* %p to i8*
+; CHECK-NEXT: [[PP:%.*]] = getelementptr i8, i8* [[Q]], i64 [[I]]
+; CHECK-NEXT: [[R:%.*]] = bitcast i8* [[PP]] to double*
+; CHECK-NEXT: [[L:%.*]] = load double, double* [[R]], align 8
+; CHECK-NEXT: ret double [[L]]
+;
%i = fptosi float %f to i64
%q = bitcast double* %p to i8*
%pp = getelementptr i8, i8* %q, i64 %i
@@ -1011,141 +1184,152 @@ define double @test81(double *%p, float %f) {
}
define i64 @test82(i64 %A) nounwind {
+; CHECK-LABEL: @test82(
+; CHECK-NEXT: [[TMP1:%.*]] = shl i64 %A, 1
+; CHECK-NEXT: [[E:%.*]] = and i64 [[TMP1]], 4294966784
+; CHECK-NEXT: ret i64 [[E]]
+;
%B = trunc i64 %A to i32
%C = lshr i32 %B, 8
%D = shl i32 %C, 9
%E = zext i32 %D to i64
ret i64 %E
-
-; CHECK-LABEL: @test82(
-; CHECK-NEXT: [[REG:%[0-9]*]] = shl i64 %A, 1
-; CHECK-NEXT: %E = and i64 [[REG]], 4294966784
-; CHECK-NEXT: ret i64 %E
}
; PR15959
define i64 @test83(i16 %a, i64 %k) {
+; CHECK-LABEL: @test83(
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 %a to i32
+; CHECK-NEXT: [[SUB:%.*]] = add i64 %k, 4294967295
+; CHECK-NEXT: [[SH_PROM:%.*]] = trunc i64 [[SUB]] to i32
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[CONV]], [[SH_PROM]]
+; CHECK-NEXT: [[SH_PROM1:%.*]] = zext i32 [[SHL]] to i64
+; CHECK-NEXT: ret i64 [[SH_PROM1]]
+;
%conv = sext i16 %a to i32
%sub = add nsw i64 %k, -1
%sh_prom = trunc i64 %sub to i32
%shl = shl i32 %conv, %sh_prom
%sh_prom1 = zext i32 %shl to i64
ret i64 %sh_prom1
-
-; CHECK-LABEL: @test83(
-; CHECK: %sub = add i64 %k, 4294967295
-; CHECK: %sh_prom = trunc i64 %sub to i32
-; CHECK: %shl = shl i32 %conv, %sh_prom
}
define i8 @test84(i32 %a) {
+; CHECK-LABEL: @test84(
+; CHECK-NEXT: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; CHECK-NEXT: ret i8 [[TRUNC]]
+;
%add = add nsw i32 %a, -16777216
%shr = lshr exact i32 %add, 23
%trunc = trunc i32 %shr to i8
ret i8 %trunc
-
-; CHECK-LABEL: @test84(
-; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
-; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
-; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
}
define i8 @test85(i32 %a) {
+; CHECK-LABEL: @test85(
+; CHECK-NEXT: [[ADD:%.*]] = add i32 %a, 2130706432
+; CHECK-NEXT: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
+; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[SHR]] to i8
+; CHECK-NEXT: ret i8 [[TRUNC]]
+;
%add = add nuw i32 %a, -16777216
%shr = lshr exact i32 %add, 23
%trunc = trunc i32 %shr to i8
ret i8 %trunc
-
-; CHECK-LABEL: @test85(
-; CHECK: [[ADD:%.*]] = add i32 %a, 2130706432
-; CHECK: [[SHR:%.*]] = lshr exact i32 [[ADD]], 23
-; CHECK: [[CST:%.*]] = trunc i32 [[SHR]] to i8
}
define i16 @test86(i16 %v) {
+; CHECK-LABEL: @test86(
+; CHECK-NEXT: [[S1:%.*]] = ashr i16 %v, 4
+; CHECK-NEXT: ret i16 [[S1]]
+;
%a = sext i16 %v to i32
%s = ashr i32 %a, 4
%t = trunc i32 %s to i16
ret i16 %t
-
-; CHECK-LABEL: @test86(
-; CHECK: [[ASHR:%.*]] = ashr i16 %v, 4
-; CHECK-NEXT: ret i16 [[ASHR]]
}
define i16 @test87(i16 %v) {
+; CHECK-LABEL: @test87(
+; CHECK-NEXT: [[A1:%.*]] = ashr i16 %v, 12
+; CHECK-NEXT: ret i16 [[A1]]
+;
%c = sext i16 %v to i32
%m = mul nsw i32 %c, 16
%a = ashr i32 %m, 16
%t = trunc i32 %a to i16
ret i16 %t
-
-; CHECK-LABEL: @test87(
-; CHECK: [[ASHR:%.*]] = ashr i16 %v, 12
-; CHECK-NEXT: ret i16 [[ASHR]]
}
+; Do not optimize to ashr i16 (shift by 18)
define i16 @test88(i16 %v) {
+; CHECK-LABEL: @test88(
+; CHECK-NEXT: [[A:%.*]] = sext i16 %v to i32
+; CHECK-NEXT: [[S:%.*]] = ashr i32 [[A]], 18
+; CHECK-NEXT: [[T:%.*]] = trunc i32 [[S]] to i16
+; CHECK-NEXT: ret i16 [[T]]
+;
%a = sext i16 %v to i32
%s = ashr i32 %a, 18
%t = trunc i32 %s to i16
ret i16 %t
-
-; Do not optimize to ashr i16 (shift by 18)
-; CHECK-LABEL: @test88(
-; CHECK: [[SEXT:%.*]] = sext i16 %v to i32
-; CHECK-NEXT: [[ASHR:%.*]] = ashr i32 [[SEXT]], 18
-; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 [[ASHR]] to i16
-; CHECK-NEXT: ret i16 [[TRUNC]]
}
; Overflow on a float to int or int to float conversion is undefined (PR21130).
define i8 @overflow_fptosi() {
+; CHECK-LABEL: @overflow_fptosi(
+; CHECK-NEXT: ret i8 undef
+;
%i = fptosi double 1.56e+02 to i8
ret i8 %i
-; CHECK-LABEL: @overflow_fptosi(
-; CHECK-NEXT: ret i8 undef
}
define i8 @overflow_fptoui() {
+; CHECK-LABEL: @overflow_fptoui(
+; CHECK-NEXT: ret i8 undef
+;
%i = fptoui double 2.56e+02 to i8
ret i8 %i
-; CHECK-LABEL: @overflow_fptoui(
-; CHECK-NEXT: ret i8 undef
}
-; The maximum float is approximately 2 ** 128 which is 3.4E38.
+; The maximum float is approximately 2 ** 128 which is 3.4E38.
; The constant below is 4E38. Use a 130 bit integer to hold that
; number; 129-bits for the value + 1 bit for the sign.
define float @overflow_uitofp() {
+; CHECK-LABEL: @overflow_uitofp(
+; CHECK-NEXT: ret float undef
+;
%i = uitofp i130 400000000000000000000000000000000000000 to float
ret float %i
-; CHECK-LABEL: @overflow_uitofp(
-; CHECK-NEXT: ret float undef
}
define float @overflow_sitofp() {
+; CHECK-LABEL: @overflow_sitofp(
+; CHECK-NEXT: ret float undef
+;
%i = sitofp i130 400000000000000000000000000000000000000 to float
ret float %i
-; CHECK-LABEL: @overflow_sitofp(
-; CHECK-NEXT: ret float undef
}
define i32 @PR21388(i32* %v) {
+; CHECK-LABEL: @PR21388(
+; CHECK-NEXT: [[ICMP:%.*]] = icmp slt i32* %v, null
+; CHECK-NEXT: [[SEXT:%.*]] = sext i1 [[ICMP]] to i32
+; CHECK-NEXT: ret i32 [[SEXT]]
+;
%icmp = icmp slt i32* %v, null
%sext = sext i1 %icmp to i32
ret i32 %sext
-; CHECK-LABEL: @PR21388(
-; CHECK-NEXT: %[[icmp:.*]] = icmp slt i32* %v, null
-; CHECK-NEXT: %[[sext:.*]] = sext i1 %[[icmp]] to i32
-; CHECK-NEXT: ret i32 %[[sext]]
}
define float @sitofp_zext(i16 %a) {
; CHECK-LABEL: @sitofp_zext(
-; CHECK-NEXT: %[[sitofp:.*]] = uitofp i16 %a to float
-; CHECK-NEXT: ret float %[[sitofp]]
+; CHECK-NEXT: [[SITOFP:%.*]] = uitofp i16 %a to float
+; CHECK-NEXT: ret float [[SITOFP]]
+;
%zext = zext i16 %a to i32
%sitofp = sitofp i32 %zext to float
ret float %sitofp
@@ -1153,10 +1337,11 @@ define float @sitofp_zext(i16 %a) {
define i1 @PR23309(i32 %A, i32 %B) {
; CHECK-LABEL: @PR23309(
-; CHECK-NEXT: %[[sub:.*]] = sub i32 %A, %B
-; CHECK-NEXT: %[[and:.*]] = and i32 %[[sub]], 1
-; CHECK-NEXT: %[[cmp:.*]] = icmp ne i32 %[[and]], 0
-; CHECK-NEXT: ret i1 %[[cmp]]
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 %A, %B
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUB]], 1
+; CHECK-NEXT: [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[TRUNC]]
+;
%add = add i32 %A, -4
%sub = sub nsw i32 %add, %B
%trunc = trunc i32 %sub to i1
@@ -1165,10 +1350,11 @@ define i1 @PR23309(i32 %A, i32 %B) {
define i1 @PR23309v2(i32 %A, i32 %B) {
; CHECK-LABEL: @PR23309v2(
-; CHECK-NEXT: %[[sub:.*]] = add i32 %A, %B
-; CHECK-NEXT: %[[and:.*]] = and i32 %[[sub]], 1
-; CHECK-NEXT: %[[cmp:.*]] = icmp ne i32 %[[and]], 0
-; CHECK-NEXT: ret i1 %[[cmp]]
+; CHECK-NEXT: [[SUB:%.*]] = add i32 %A, %B
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUB]], 1
+; CHECK-NEXT: [[TRUNC:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[TRUNC]]
+;
%add = add i32 %A, -4
%sub = add nuw i32 %add, %B
%trunc = trunc i32 %sub to i1
@@ -1177,9 +1363,10 @@ define i1 @PR23309v2(i32 %A, i32 %B) {
define i16 @PR24763(i8 %V) {
; CHECK-LABEL: @PR24763(
-; CHECK-NEXT: %[[sh:.*]] = ashr i8
-; CHECK-NEXT: %[[ext:.*]] = sext i8 %[[sh]] to i16
-; CHECK-NEXT: ret i16 %[[ext]]
+; CHECK-NEXT: [[L:%.*]] = ashr i8 %V, 1
+; CHECK-NEXT: [[T:%.*]] = sext i8 [[L]] to i16
+; CHECK-NEXT: ret i16 [[T]]
+;
%conv = sext i8 %V to i32
%l = lshr i32 %conv, 1
%t = trunc i32 %l to i16
diff --git a/test/Transforms/InstCombine/compare-signs.ll b/test/Transforms/InstCombine/compare-signs.ll
index 0ed0ac7d8d9c..08bf61a4400d 100644
--- a/test/Transforms/InstCombine/compare-signs.ll
+++ b/test/Transforms/InstCombine/compare-signs.ll
@@ -1,67 +1,64 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S < %s | FileCheck %s
; PR5438
; TODO: This should also optimize down.
;define i32 @test1(i32 %a, i32 %b) nounwind readnone {
;entry:
-; %0 = icmp sgt i32 %a, -1 ; <i1> [#uses=1]
-; %1 = icmp slt i32 %b, 0 ; <i1> [#uses=1]
-; %2 = xor i1 %1, %0 ; <i1> [#uses=1]
-; %3 = zext i1 %2 to i32 ; <i32> [#uses=1]
+; %0 = icmp sgt i32 %a, -1
+; %1 = icmp slt i32 %b, 0
+; %2 = xor i1 %1, %0
+; %3 = zext i1 %2 to i32
; ret i32 %3
;}
; TODO: This optimizes partially but not all the way.
;define i32 @test2(i32 %a, i32 %b) nounwind readnone {
;entry:
-; %0 = and i32 %a, 8 ;<i32> [#uses=1]
-; %1 = and i32 %b, 8 ;<i32> [#uses=1]
-; %2 = icmp eq i32 %0, %1 ;<i1> [#uses=1]
-; %3 = zext i1 %2 to i32 ;<i32> [#uses=1]
+; %0 = and i32 %a, 8
+; %1 = and i32 %b, 8
+; %2 = icmp eq i32 %0, %1
+; %3 = zext i1 %2 to i32
; ret i32 %3
;}
define i32 @test3(i32 %a, i32 %b) nounwind readnone {
; CHECK-LABEL: @test3(
-entry:
-; CHECK: [[XOR1:%.*]] = xor i32 %a, %b
-; CHECK: [[SHIFT:%.*]] = lshr i32 [[XOR1]], 31
-; CHECK: [[XOR2:%.*]] = xor i32 [[SHIFT]], 1
- %0 = lshr i32 %a, 31 ; <i32> [#uses=1]
- %1 = lshr i32 %b, 31 ; <i32> [#uses=1]
- %2 = icmp eq i32 %0, %1 ; <i1> [#uses=1]
- %3 = zext i1 %2 to i32 ; <i32> [#uses=1]
- ret i32 %3
-; CHECK-NOT: icmp
-; CHECK-NOT: zext
-; CHECK: ret i32 [[XOR2]]
+; CHECK-NEXT: [[T2_UNSHIFTED:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: [[T2_UNSHIFTED_LOBIT:%.*]] = lshr i32 [[T2_UNSHIFTED]], 31
+; CHECK-NEXT: [[T2_UNSHIFTED_LOBIT_NOT:%.*]] = xor i32 [[T2_UNSHIFTED_LOBIT]], 1
+; CHECK-NEXT: ret i32 [[T2_UNSHIFTED_LOBIT_NOT]]
+;
+ %t0 = lshr i32 %a, 31
+ %t1 = lshr i32 %b, 31
+ %t2 = icmp eq i32 %t0, %t1
+ %t3 = zext i1 %t2 to i32
+ ret i32 %t3
}
; Variation on @test3: checking the 2nd bit in a situation where the 5th bit
; is one, not zero.
define i32 @test3i(i32 %a, i32 %b) nounwind readnone {
; CHECK-LABEL: @test3i(
-entry:
-; CHECK: xor i32 %a, %b
-; CHECK: lshr i32 %0, 31
-; CHECK: xor i32 %1, 1
- %0 = lshr i32 %a, 29 ; <i32> [#uses=1]
- %1 = lshr i32 %b, 29 ; <i32> [#uses=1]
- %2 = or i32 %0, 35
- %3 = or i32 %1, 35
- %4 = icmp eq i32 %2, %3 ; <i1> [#uses=1]
- %5 = zext i1 %4 to i32 ; <i32> [#uses=1]
- ret i32 %5
-; CHECK-NOT: icmp
-; CHECK-NOT: zext
-; CHECK: ret i32 %2
+; CHECK-NEXT: [[T01:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[T01]], 31
+; CHECK-NEXT: [[T4:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT: ret i32 [[T4]]
+;
+ %t0 = lshr i32 %a, 29
+ %t1 = lshr i32 %b, 29
+ %t2 = or i32 %t0, 35
+ %t3 = or i32 %t1, 35
+ %t4 = icmp eq i32 %t2, %t3
+ %t5 = zext i1 %t4 to i32
+ ret i32 %t5
}
define i1 @test4a(i32 %a) {
; CHECK-LABEL: @test4a(
- entry:
-; CHECK: %c = icmp slt i32 %a, 1
-; CHECK-NEXT: ret i1 %c
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 %a, 1
+; CHECK-NEXT: ret i1 [[C]]
+;
%l = ashr i32 %a, 31
%na = sub i32 0, %a
%r = lshr i32 %na, 31
@@ -72,9 +69,9 @@ define i1 @test4a(i32 %a) {
define i1 @test4b(i64 %a) {
; CHECK-LABEL: @test4b(
- entry:
-; CHECK: %c = icmp slt i64 %a, 1
-; CHECK-NEXT: ret i1 %c
+; CHECK-NEXT: [[C:%.*]] = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 [[C]]
+;
%l = ashr i64 %a, 63
%na = sub i64 0, %a
%r = lshr i64 %na, 63
@@ -85,9 +82,9 @@ define i1 @test4b(i64 %a) {
define i1 @test4c(i64 %a) {
; CHECK-LABEL: @test4c(
- entry:
-; CHECK: %c = icmp slt i64 %a, 1
-; CHECK-NEXT: ret i1 %c
+; CHECK-NEXT: [[C:%.*]] = icmp slt i64 %a, 1
+; CHECK-NEXT: ret i1 [[C]]
+;
%l = ashr i64 %a, 63
%na = sub i64 0, %a
%r = lshr i64 %na, 63
diff --git a/test/Transforms/InstCombine/compare-udiv.ll b/test/Transforms/InstCombine/compare-udiv.ll
new file mode 100644
index 000000000000..140f9b52130c
--- /dev/null
+++ b/test/Transforms/InstCombine/compare-udiv.ll
@@ -0,0 +1,132 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: @test1
+; CHECK: %cmp1 = icmp ugt i32 %d, %n
+define i1 @test1(i32 %n, i32 %d) {
+ %div = udiv i32 %n, %d
+ %cmp1 = icmp eq i32 %div, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test2
+; CHECK: %cmp1 = icmp ugt i32 %d, 64
+define i1 @test2(i32 %d) {
+ %div = udiv i32 64, %d
+ %cmp1 = icmp eq i32 %div, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test3
+; CHECK: %cmp1 = icmp ule i32 %d, %n
+define i1 @test3(i32 %n, i32 %d) {
+ %div = udiv i32 %n, %d
+ %cmp1 = icmp ne i32 %div, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test4
+; CHECK: %cmp1 = icmp ult i32 %d, 65
+define i1 @test4(i32 %d) {
+ %div = udiv i32 64, %d
+ %cmp1 = icmp ne i32 %div, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test5
+; CHECK: ret i1 true
+define i1 @test5(i32 %d) {
+ %div = udiv i32 -1, %d
+ %cmp1 = icmp ne i32 %div, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test6
+; CHECK: %cmp1 = icmp ult i32 %d, 6
+define i1 @test6(i32 %d) {
+ %div = udiv i32 5, %d
+ %cmp1 = icmp ugt i32 %div, 0
+ ret i1 %cmp1
+}
+
+; (icmp ugt (udiv C1, X), C1) -> false.
+; CHECK-LABEL: @test7
+; CHECK: ret i1 false
+define i1 @test7(i32 %d) {
+ %div = udiv i32 8, %d
+ %cmp1 = icmp ugt i32 %div, 8
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test8
+; CHECK: %cmp1 = icmp ult i32 %d, 2
+define i1 @test8(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ugt i32 %div, 3
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test9
+; CHECK: %cmp1 = icmp ult i32 %d, 2
+define i1 @test9(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ugt i32 %div, 2
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test10
+; CHECK: %cmp1 = icmp ult i32 %d, 3
+define i1 @test10(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ugt i32 %div, 1
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test11
+; CHECK: %cmp1 = icmp ugt i32 %d, 4
+define i1 @test11(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ult i32 %div, 1
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test12
+; CHECK: %cmp1 = icmp ugt i32 %d, 2
+define i1 @test12(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ult i32 %div, 2
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test13
+; CHECK: %cmp1 = icmp ugt i32 %d, 1
+define i1 @test13(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ult i32 %div, 3
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @test14
+; CHECK: %cmp1 = icmp ugt i32 %d, 1
+define i1 @test14(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ult i32 %div, 4
+ ret i1 %cmp1
+}
+
+; icmp ugt X, UINT_MAX -> false.
+; CHECK-LABEL: @test15
+; CHECK: ret i1 false
+define i1 @test15(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ugt i32 %div, -1
+ ret i1 %cmp1
+}
+
+; icmp ult X, UINT_MAX -> true.
+; CHECK-LABEL: @test16
+; CHECK: ret i1 true
+define i1 @test16(i32 %d) {
+ %div = udiv i32 4, %d
+ %cmp1 = icmp ult i32 %div, -1
+ ret i1 %cmp1
+}
diff --git a/test/Transforms/InstCombine/compare-unescaped.ll b/test/Transforms/InstCombine/compare-unescaped.ll
new file mode 100644
index 000000000000..0e512aa28911
--- /dev/null
+++ b/test/Transforms/InstCombine/compare-unescaped.ll
@@ -0,0 +1,164 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+@gp = global i32* null, align 8
+
+declare i8* @malloc(i64) #1
+
+define i1 @compare_global_trivialeq() {
+ %m = call i8* @malloc(i64 4)
+ %bc = bitcast i8* %m to i32*
+ %lgp = load i32*, i32** @gp, align 8
+ %cmp = icmp eq i32* %bc, %lgp
+ ret i1 %cmp
+; CHECK-LABEL: compare_global_trivialeq
+; CHECK: ret i1 false
+}
+
+define i1 @compare_global_trivialne() {
+ %m = call i8* @malloc(i64 4)
+ %bc = bitcast i8* %m to i32*
+ %lgp = load i32*, i32** @gp, align 8
+ %cmp = icmp ne i32* %bc, %lgp
+ ret i1 %cmp
+; CHECK-LABEL: compare_global_trivialne
+; CHECK: ret i1 true
+}
+
+
+; Although the %m is marked nocapture in the deopt operand in call to function f,
+; we cannot remove the alloc site: call to malloc
+; The comparison should fold to false irrespective of whether the call to malloc can be elided or not
+declare void @f()
+define i1 @compare_and_call_with_deopt() {
+; CHECK-LABEL: compare_and_call_with_deopt
+ %m = call i8* @malloc(i64 24)
+ %bc = bitcast i8* %m to i32*
+ %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+ %cmp = icmp eq i32* %lgp, %bc
+ tail call void @f() [ "deopt"(i8* %m) ]
+ ret i1 %cmp
+; CHECK: ret i1 false
+}
+
+; Same functon as above with deopt operand in function f, but comparison is NE
+define i1 @compare_ne_and_call_with_deopt() {
+; CHECK-LABEL: compare_ne_and_call_with_deopt
+ %m = call i8* @malloc(i64 24)
+ %bc = bitcast i8* %m to i32*
+ %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+ %cmp = icmp ne i32* %lgp, %bc
+ tail call void @f() [ "deopt"(i8* %m) ]
+ ret i1 %cmp
+; CHECK: ret i1 true
+}
+
+; Same function as above, but global not marked nonnull, and we cannot fold the comparison
+define i1 @compare_ne_global_maybe_null() {
+; CHECK-LABEL: compare_ne_global_maybe_null
+ %m = call i8* @malloc(i64 24)
+ %bc = bitcast i8* %m to i32*
+ %lgp = load i32*, i32** @gp
+ %cmp = icmp ne i32* %lgp, %bc
+ tail call void @f() [ "deopt"(i8* %m) ]
+ ret i1 %cmp
+; CHECK: ret i1 %cmp
+}
+
+; FIXME: The comparison should fold to false since %m escapes (call to function escape)
+; after the comparison.
+declare void @escape(i8*)
+define i1 @compare_and_call_after() {
+; CHECK-LABEL: compare_and_call_after
+ %m = call i8* @malloc(i64 24)
+ %bc = bitcast i8* %m to i32*
+ %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+ %cmp = icmp eq i32* %bc, %lgp
+ br i1 %cmp, label %escape_call, label %just_return
+
+escape_call:
+ call void @escape(i8* %m)
+ ret i1 true
+
+just_return:
+ ret i1 %cmp
+}
+
+define i1 @compare_distinct_mallocs() {
+ %m = call i8* @malloc(i64 4)
+ %n = call i8* @malloc(i64 4)
+ %cmp = icmp eq i8* %m, %n
+ ret i1 %cmp
+ ; CHECK-LABEL: compare_distinct_mallocs
+ ; CHECK: ret i1 false
+}
+
+; the compare is folded to true since the folding compare looks through bitcasts.
+; call to malloc and the bitcast instructions are elided after that since there are no uses of the malloc
+define i1 @compare_samepointer_under_bitcast() {
+ %m = call i8* @malloc(i64 4)
+ %bc = bitcast i8* %m to i32*
+ %bcback = bitcast i32* %bc to i8*
+ %cmp = icmp eq i8* %m, %bcback
+ ret i1 %cmp
+; CHECK-LABEL: compare_samepointer_under_bitcast
+; CHECK: ret i1 true
+}
+
+; the compare is folded to true since the folding compare looks through bitcasts.
+; The malloc call for %m cannot be elided since it is used in the call to function f.
+define i1 @compare_samepointer_escaped() {
+ %m = call i8* @malloc(i64 4)
+ %bc = bitcast i8* %m to i32*
+ %bcback = bitcast i32* %bc to i8*
+ %cmp = icmp eq i8* %m, %bcback
+ call void @f() [ "deopt"(i8* %m) ]
+ ret i1 %cmp
+; CHECK-LABEL: compare_samepointer_escaped
+; CHECK-NEXT: %m = call i8* @malloc(i64 4)
+; CHECK-NEXT: call void @f() [ "deopt"(i8* %m) ]
+; CHECK: ret i1 true
+}
+
+; Technically, we can fold the %cmp2 comparison, even though %m escapes through
+; the ret statement since `ret` terminates the function and we cannot reach from
+; the ret to cmp.
+; FIXME: Folding this %cmp2 when %m escapes through ret could be an issue with
+; cross-threading data dependencies since we do not make the distinction between
+; atomic and non-atomic loads in capture tracking.
+define i8* @compare_ret_escape(i8* %c) {
+ %m = call i8* @malloc(i64 4)
+ %n = call i8* @malloc(i64 4)
+ %cmp = icmp eq i8* %n, %c
+ br i1 %cmp, label %retst, label %chk
+
+retst:
+ ret i8* %m
+
+chk:
+ %bc = bitcast i8* %m to i32*
+ %lgp = load i32*, i32** @gp, align 8, !nonnull !0
+ %cmp2 = icmp eq i32* %bc, %lgp
+ br i1 %cmp2, label %retst, label %chk2
+
+chk2:
+ ret i8* %n
+; CHECK-LABEL: compare_ret_escape
+; CHECK: %cmp = icmp eq i8* %n, %c
+; CHECK: %cmp2 = icmp eq i32* %bc, %lgp
+}
+
+; The malloc call for %m cannot be elided since it is used in the call to function f.
+; However, the cmp can be folded to true as %n doesnt escape and %m, %n are distinct allocations
+define i1 @compare_distinct_pointer_escape() {
+ %m = call i8* @malloc(i64 4)
+ %n = call i8* @malloc(i64 4)
+ tail call void @f() [ "deopt"(i8* %m) ]
+ %cmp = icmp ne i8* %m, %n
+ ret i1 %cmp
+; CHECK-LABEL: compare_distinct_pointer_escape
+; CHECK-NEXT: %m = call i8* @malloc(i64 4)
+; CHECK-NEXT: tail call void @f() [ "deopt"(i8* %m) ]
+; CHECK-NEXT: ret i1 true
+}
+
+!0 = !{}
diff --git a/test/Transforms/InstCombine/convergent.ll b/test/Transforms/InstCombine/convergent.ll
new file mode 100644
index 000000000000..d4484cf4567e
--- /dev/null
+++ b/test/Transforms/InstCombine/convergent.ll
@@ -0,0 +1,44 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare i32 @k() convergent
+declare i32 @f()
+
+declare i64 @llvm.read_register.i64(metadata) nounwind
+
+define i32 @extern() {
+ ; Convergent attr shouldn't be removed here; k is convergent.
+ ; CHECK: call i32 @k() [[CONVERGENT_ATTR:#[0-9]+]]
+ %a = call i32 @k() convergent
+ ret i32 %a
+}
+
+define i32 @extern_no_attr() {
+ ; Convergent attr shouldn't be added here, even though k is convergent.
+ ; CHECK: call i32 @k(){{$}}
+ %a = call i32 @k()
+ ret i32 %a
+}
+
+define i32 @no_extern() {
+ ; Convergent should be removed here, as the target is convergent.
+ ; CHECK: call i32 @f(){{$}}
+ %a = call i32 @f() convergent
+ ret i32 %a
+}
+
+define i32 @indirect_call(i32 ()* %f) {
+ ; CHECK call i32 %f() [[CONVERGENT_ATTR]]
+ %a = call i32 %f() convergent
+ ret i32 %a
+}
+
+; do not remove from convergent intrinsic call sites
+; CHECK-LABEL: @convergent_intrinsic_call(
+; CHECK: call i64 @llvm.read_register.i64(metadata !0) [[CONVERGENT_ATTR]]
+define i64 @convergent_intrinsic_call() {
+ %val = call i64 @llvm.read_register.i64(metadata !0) convergent
+ ret i64 %val
+}
+
+; CHECK: [[CONVERGENT_ATTR]] = { convergent }
+!0 = !{!"foo"}
diff --git a/test/Transforms/InstCombine/cos-intrinsic.ll b/test/Transforms/InstCombine/cos-intrinsic.ll
new file mode 100644
index 000000000000..b4d07cf8047b
--- /dev/null
+++ b/test/Transforms/InstCombine/cos-intrinsic.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+; This test makes sure that the undef is propagated for the cos instrinsic
+
+declare double @llvm.cos.f64(double %Val)
+declare float @llvm.cos.f32(float %Val)
+
+; Function Attrs: nounwind readnone
+define double @test1() {
+; CHECK-LABEL: define double @test1(
+; CHECK-NEXT: ret double 0.000000e+00
+ %1 = call double @llvm.cos.f64(double undef)
+ ret double %1
+}
+
+
+; Function Attrs: nounwind readnone
+define float @test2(float %d) {
+; CHECK-LABEL: define float @test2(
+; CHECK-NEXT: %cosval = call float @llvm.cos.f32(float %d)
+ %cosval = call float @llvm.cos.f32(float %d)
+ %cosval2 = call float @llvm.cos.f32(float undef)
+ %fsum = fadd float %cosval2, %cosval
+ ret float %fsum
+; CHECK-NEXT: %fsum
+; CHECK: ret float %fsum
+}
diff --git a/test/Transforms/InstCombine/debug-line.ll b/test/Transforms/InstCombine/debug-line.ll
index 4b1db9db353b..61ff5da7e06d 100644
--- a/test/Transforms/InstCombine/debug-line.ll
+++ b/test/Transforms/InstCombine/debug-line.ll
@@ -13,16 +13,14 @@ declare i32 @printf(i8*, ...)
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!10}
-!llvm.dbg.sp = !{!0}
-!0 = distinct !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", line: 4, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !2, file: !8, scope: !1, type: !3)
!1 = !DIFile(filename: "m.c", directory: "/private/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: FullDebug, file: !8, enums: !{}, retainedTypes: !{})
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 5, column: 2, scope: !6)
!6 = distinct !DILexicalBlock(line: 4, column: 12, file: !8, scope: !0)
!7 = !DILocation(line: 6, column: 1, scope: !6)
!8 = !DIFile(filename: "m.c", directory: "/private/tmp")
-!9 = !{!0}
!10 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/InstCombine/debuginfo.ll b/test/Transforms/InstCombine/debuginfo.ll
index 9c8b2a8e4154..c89a3400c88c 100644
--- a/test/Transforms/InstCombine/debuginfo.ll
+++ b/test/Transforms/InstCombine/debuginfo.ll
@@ -32,9 +32,9 @@ entry:
!llvm.module.flags = !{!30}
!0 = !DILocalVariable(name: "__dest", line: 78, arg: 1, scope: !1, file: !2, type: !6)
-!1 = distinct !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 79, file: !27, scope: !2, type: !4, variables: !25)
+!1 = distinct !DISubprogram(name: "foobar", line: 79, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !3, scopeLine: 79, file: !27, scope: !2, type: !4, variables: !25)
!2 = !DIFile(filename: "string.h", directory: "Game")
-!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: 0, file: !28, enums: !29, retainedTypes: !29, subprograms: !24)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 127710)", isOptimized: true, emissionKind: FullDebug, file: !28, enums: !29, retainedTypes: !29)
!4 = !DISubroutineType(types: !5)
!5 = !{!6}
!6 = !DIDerivedType(tag: DW_TAG_pointer_type, size: 64, align: 64, scope: !3, baseType: null)
@@ -50,7 +50,6 @@ entry:
!21 = !DILocation(line: 80, column: 3, scope: !22)
!22 = distinct !DILexicalBlock(line: 80, column: 3, file: !27, scope: !23)
!23 = distinct !DILexicalBlock(line: 79, column: 1, file: !27, scope: !1)
-!24 = !{!1}
!25 = !{!0, !7, !9}
!26 = !DIFile(filename: "bits.c", directory: "Game")
!27 = !DIFile(filename: "string.h", directory: "Game")
diff --git a/test/Transforms/InstCombine/demorgan-zext.ll b/test/Transforms/InstCombine/demorgan-zext.ll
index da41fac3e350..045c15c05d64 100644
--- a/test/Transforms/InstCombine/demorgan-zext.ll
+++ b/test/Transforms/InstCombine/demorgan-zext.ll
@@ -1,34 +1,90 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; PR22723: Recognize De Morgan's Laws when obfuscated by zexts.
define i32 @demorgan_or(i1 %X, i1 %Y) {
+; CHECK-LABEL: @demorgan_or(
+; CHECK-NEXT: [[OR_DEMORGAN:%.*]] = and i1 %X, %Y
+; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[OR_DEMORGAN]] to i32
+; CHECK-NEXT: [[OR:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT: ret i32 [[OR]]
+;
%zextX = zext i1 %X to i32
%zextY = zext i1 %Y to i32
%notX = xor i32 %zextX, 1
%notY = xor i32 %zextY, 1
%or = or i32 %notX, %notY
ret i32 %or
-
-; CHECK-LABEL: demorgan_or(
-; CHECK-NEXT: %[[AND:.*]] = and i1 %X, %Y
-; CHECK-NEXT: %[[ZEXT:.*]] = zext i1 %[[AND]] to i32
-; CHECK-NEXT: %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
-; CHECK-NEXT: ret i32 %[[XOR]]
}
define i32 @demorgan_and(i1 %X, i1 %Y) {
+; CHECK-LABEL: @demorgan_and(
+; CHECK-NEXT: [[AND_DEMORGAN:%.*]] = or i1 %X, %Y
+; CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[AND_DEMORGAN]] to i32
+; CHECK-NEXT: [[AND:%.*]] = xor i32 [[TMP1]], 1
+; CHECK-NEXT: ret i32 [[AND]]
+;
%zextX = zext i1 %X to i32
%zextY = zext i1 %Y to i32
%notX = xor i32 %zextX, 1
%notY = xor i32 %zextY, 1
%and = and i32 %notX, %notY
ret i32 %and
+}
+
+; FIXME: Vectors should get the same transform.
+
+define <2 x i32> @demorgan_or_vec(<2 x i1> %X, <2 x i1> %Y) {
+; CHECK-LABEL: @demorgan_or_vec(
+; CHECK-NEXT: [[ZEXTX:%.*]] = zext <2 x i1> %X to <2 x i32>
+; CHECK-NEXT: [[ZEXTY:%.*]] = zext <2 x i1> %Y to <2 x i32>
+; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[ZEXTX]], <i32 1, i32 1>
+; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i32> [[ZEXTY]], <i32 1, i32 1>
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> [[NOTX]], [[NOTY]]
+; CHECK-NEXT: ret <2 x i32> [[OR]]
+;
+ %zextX = zext <2 x i1> %X to <2 x i32>
+ %zextY = zext <2 x i1> %Y to <2 x i32>
+ %notX = xor <2 x i32> %zextX, <i32 1, i32 1>
+ %notY = xor <2 x i32> %zextY, <i32 1, i32 1>
+ %or = or <2 x i32> %notX, %notY
+ ret <2 x i32> %or
+}
+
+define <2 x i32> @demorgan_and_vec(<2 x i1> %X, <2 x i1> %Y) {
+; CHECK-LABEL: @demorgan_and_vec(
+; CHECK-NEXT: [[ZEXTX:%.*]] = zext <2 x i1> %X to <2 x i32>
+; CHECK-NEXT: [[ZEXTY:%.*]] = zext <2 x i1> %Y to <2 x i32>
+; CHECK-NEXT: [[NOTX:%.*]] = xor <2 x i32> [[ZEXTX]], <i32 1, i32 1>
+; CHECK-NEXT: [[NOTY:%.*]] = xor <2 x i32> [[ZEXTY]], <i32 1, i32 1>
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> [[NOTX]], [[NOTY]]
+; CHECK-NEXT: ret <2 x i32> [[AND]]
+;
+ %zextX = zext <2 x i1> %X to <2 x i32>
+ %zextY = zext <2 x i1> %Y to <2 x i32>
+ %notX = xor <2 x i32> %zextX, <i32 1, i32 1>
+ %notY = xor <2 x i32> %zextY, <i32 1, i32 1>
+ %and = and <2 x i32> %notX, %notY
+ ret <2 x i32> %and
+}
+
+; FIXME: If the xor was canonicalized to a 'not', then this would simplify.
-; CHECK-LABEL: demorgan_and(
-; CHECK-NEXT: %[[OR:.*]] = or i1 %X, %Y
-; CHECK-NEXT: %[[ZEXT:.*]] = zext i1 %[[OR]] to i32
-; CHECK-NEXT: %[[XOR:.*]] = xor i32 %[[ZEXT]], 1
-; CHECK-NEXT: ret i32 %[[XOR]]
+define i32 @PR28476(i32 %x, i32 %y) {
+; CHECK-LABEL: @PR28476(
+; CHECK-NEXT: [[CMP0:%.*]] = icmp ne i32 %x, 0
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ne i32 %y, 0
+; CHECK-NEXT: [[AND:%.*]] = and i1 [[CMP0]], [[CMP1]]
+; CHECK-NEXT: [[ZEXT:%.*]] = zext i1 [[AND]] to i32
+; CHECK-NEXT: [[COND:%.*]] = xor i32 [[ZEXT]], 1
+; CHECK-NEXT: ret i32 [[COND]]
+;
+ %cmp0 = icmp ne i32 %x, 0
+ %cmp1 = icmp ne i32 %y, 0
+ %and = and i1 %cmp0, %cmp1
+ %zext = zext i1 %and to i32
+ %cond = xor i32 %zext, 1
+ ret i32 %cond
}
diff --git a/test/Transforms/InstCombine/div-shift.ll b/test/Transforms/InstCombine/div-shift.ll
index 3350f1940554..053210650d10 100644
--- a/test/Transforms/InstCombine/div-shift.ll
+++ b/test/Transforms/InstCombine/div-shift.ll
@@ -1,10 +1,15 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-define i32 @t1(i16 zeroext %x, i32 %y) nounwind {
+define i32 @t1(i16 zeroext %x, i32 %y) {
+; CHECK-LABEL: @t1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONV:%.*]] = zext i16 %x to i32
+; CHECK-NEXT: [[TMP0:%.*]] = add i32 %y, 1
+; CHECK-NEXT: [[D:%.*]] = lshr i32 [[CONV]], [[TMP0]]
+; CHECK-NEXT: ret i32 [[D]]
+;
entry:
-; CHECK: t1
-; CHECK-NOT: sdiv
-; CHECK: lshr i32 %conv
%conv = zext i16 %x to i32
%s = shl i32 2, %y
%d = sdiv i32 %conv, %s
@@ -12,10 +17,12 @@ entry:
}
; rdar://11721329
-define i64 @t2(i64 %x, i32 %y) nounwind {
-; CHECK: t2
-; CHECK-NOT: udiv
-; CHECK: lshr i64 %x
+define i64 @t2(i64 %x, i32 %y) {
+; CHECK-LABEL: @t2(
+; CHECK-NEXT: [[TMP1:%.*]] = zext i32 %y to i64
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i64 %x, [[TMP1]]
+; CHECK-NEXT: ret i64 [[TMP2]]
+;
%1 = shl i32 1, %y
%2 = zext i32 %1 to i64
%3 = udiv i64 %x, %2
@@ -23,26 +30,26 @@ define i64 @t2(i64 %x, i32 %y) nounwind {
}
; PR13250
-define i64 @t3(i64 %x, i32 %y) nounwind {
-; CHECK: t3
-; CHECK-NOT: udiv
-; CHECK-NEXT: %1 = add i32 %y, 2
-; CHECK-NEXT: %2 = zext i32 %1 to i64
-; CHECK-NEXT: %3 = lshr i64 %x, %2
-; CHECK-NEXT: ret i64 %3
+define i64 @t3(i64 %x, i32 %y) {
+; CHECK-LABEL: @t3(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 %y, 2
+; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
+; CHECK-NEXT: [[TMP3:%.*]] = lshr i64 %x, [[TMP2]]
+; CHECK-NEXT: ret i64 [[TMP3]]
+;
%1 = shl i32 4, %y
%2 = zext i32 %1 to i64
%3 = udiv i64 %x, %2
ret i64 %3
}
-define i32 @t4(i32 %x, i32 %y) nounwind {
-; CHECK: t4
-; CHECK-NOT: udiv
-; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %y, 5
-; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP]], i32 5, i32 %y
-; CHECK-NEXT: [[SHR:%.*]] = lshr i32 %x, [[SEL]]
-; CHECK-NEXT: ret i32 [[SHR]]
+define i32 @t4(i32 %x, i32 %y) {
+; CHECK-LABEL: @t4(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %y, 5
+; CHECK-NEXT: [[DOTV:%.*]] = select i1 [[TMP1]], i32 5, i32 %y
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 %x, [[DOTV]]
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
%1 = shl i32 1, %y
%2 = icmp ult i32 %1, 32
%3 = select i1 %2, i32 32, i32 %1
@@ -50,13 +57,13 @@ define i32 @t4(i32 %x, i32 %y) nounwind {
ret i32 %4
}
-define i32 @t5(i1 %x, i1 %y, i32 %V) nounwind {
-; CHECK: t5
-; CHECK-NOT: udiv
-; CHECK-NEXT: [[SEL1:%.*]] = select i1 %x, i32 5, i32 6
-; CHECK-NEXT: [[LSHR:%.*]] = lshr i32 %V, [[SEL1]]
-; CHECK-NEXT: [[SEL2:%.*]] = select i1 %y, i32 [[LSHR]], i32 0
-; CHECK-NEXT: ret i32 [[SEL2]]
+define i32 @t5(i1 %x, i1 %y, i32 %V) {
+; CHECK-LABEL: @t5(
+; CHECK-NEXT: [[DOTV:%.*]] = select i1 %x, i32 5, i32 6
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 %V, [[DOTV]]
+; CHECK-NEXT: [[TMP2:%.*]] = select i1 %y, i32 [[TMP1]], i32 0
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
%1 = shl i32 1, %V
%2 = select i1 %x, i32 32, i32 64
%3 = select i1 %y, i32 %2, i32 %1
@@ -64,10 +71,13 @@ define i32 @t5(i1 %x, i1 %y, i32 %V) nounwind {
ret i32 %4
}
-define i32 @t6(i32 %x, i32 %z) nounwind{
-; CHECK: t6
-; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0
-; CHECK-NOT: udiv i32 %z, %x
+define i32 @t6(i32 %x, i32 %z) {
+; CHECK-LABEL: @t6(
+; CHECK-NEXT: [[X_IS_ZERO:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT: [[DIVISOR:%.*]] = select i1 [[X_IS_ZERO]], i32 1, i32 %x
+; CHECK-NEXT: [[Y:%.*]] = udiv i32 %z, [[DIVISOR]]
+; CHECK-NEXT: ret i32 [[Y]]
+;
%x_is_zero = icmp eq i32 %x, 0
%divisor = select i1 %x_is_zero, i32 1, i32 %x
%y = udiv i32 %z, %divisor
diff --git a/test/Transforms/InstCombine/div.ll b/test/Transforms/InstCombine/div.ll
index 27a316113e52..a19cdefa31fe 100644
--- a/test/Transforms/InstCombine/div.ll
+++ b/test/Transforms/InstCombine/div.ll
@@ -1,330 +1,399 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; This test makes sure that div instructions are properly eliminated.
; RUN: opt < %s -instcombine -S | FileCheck %s
define i32 @test1(i32 %A) {
- %B = sdiv i32 %A, 1 ; <i32> [#uses=1]
- ret i32 %B
; CHECK-LABEL: @test1(
-; CHECK-NEXT: ret i32 %A
+; CHECK-NEXT: ret i32 %A
+;
+ %B = sdiv i32 %A, 1 ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test2(i32 %A) {
; => Shift
- %B = udiv i32 %A, 8 ; <i32> [#uses=1]
- ret i32 %B
; CHECK-LABEL: @test2(
-; CHECK-NEXT: lshr i32 %A, 3
+; CHECK-NEXT: [[B:%.*]] = lshr i32 %A, 3
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %B = udiv i32 %A, 8 ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test3(i32 %A) {
; => 0, don't need to keep traps
- %B = sdiv i32 0, %A ; <i32> [#uses=1]
- ret i32 %B
; CHECK-LABEL: @test3(
-; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: ret i32 0
+;
+ %B = sdiv i32 0, %A ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test4(i32 %A) {
; 0-A
- %B = sdiv i32 %A, -1 ; <i32> [#uses=1]
- ret i32 %B
; CHECK-LABEL: @test4(
-; CHECK-NEXT: sub i32 0, %A
+; CHECK-NEXT: [[B:%.*]] = sub i32 0, %A
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %B = sdiv i32 %A, -1 ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test5(i32 %A) {
- %B = udiv i32 %A, -16 ; <i32> [#uses=1]
- %C = udiv i32 %B, -4 ; <i32> [#uses=1]
- ret i32 %C
; CHECK-LABEL: @test5(
-; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: ret i32 0
+;
+ %B = udiv i32 %A, -16 ; <i32> [#uses=1]
+ %C = udiv i32 %B, -4 ; <i32> [#uses=1]
+ ret i32 %C
}
define i1 @test6(i32 %A) {
- %B = udiv i32 %A, 123 ; <i32> [#uses=1]
- ; A < 123
- %C = icmp eq i32 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
; CHECK-LABEL: @test6(
-; CHECK-NEXT: icmp ult i32 %A, 123
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %A, 123
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %B = udiv i32 %A, 123 ; <i32> [#uses=1]
+ ; A < 123
+ %C = icmp eq i32 %B, 0 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test7(i32 %A) {
- %B = udiv i32 %A, 10 ; <i32> [#uses=1]
- ; A >= 20 && A < 30
- %C = icmp eq i32 %B, 2 ; <i1> [#uses=1]
- ret i1 %C
; CHECK-LABEL: @test7(
-; CHECK-NEXT: add i32 %A, -20
-; CHECK-NEXT: icmp ult i32
+; CHECK-NEXT: [[A_OFF:%.*]] = add i32 %A, -20
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[A_OFF]], 10
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %B = udiv i32 %A, 10 ; <i32> [#uses=1]
+ ; A >= 20 && A < 30
+ %C = icmp eq i32 %B, 2 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test8(i8 %A) {
- %B = udiv i8 %A, 123 ; <i8> [#uses=1]
- ; A >= 246
- %C = icmp eq i8 %B, 2 ; <i1> [#uses=1]
- ret i1 %C
; CHECK-LABEL: @test8(
-; CHECK-NEXT: icmp ugt i8 %A, -11
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i8 %A, -11
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = udiv i8 %A, 123 ; <i8> [#uses=1]
+ ; A >= 246
+ %C = icmp eq i8 %B, 2 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test9(i8 %A) {
- %B = udiv i8 %A, 123 ; <i8> [#uses=1]
- ; A < 246
- %C = icmp ne i8 %B, 2 ; <i1> [#uses=1]
- ret i1 %C
; CHECK-LABEL: @test9(
-; CHECK-NEXT: icmp ult i8 %A, -10
+; CHECK-NEXT: [[C:%.*]] = icmp ult i8 %A, -10
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = udiv i8 %A, 123 ; <i8> [#uses=1]
+ ; A < 246
+ %C = icmp ne i8 %B, 2 ; <i1> [#uses=1]
+ ret i1 %C
}
define i32 @test10(i32 %X, i1 %C) {
- %V = select i1 %C, i32 64, i32 8 ; <i32> [#uses=1]
- %R = udiv i32 %X, %V ; <i32> [#uses=1]
- ret i32 %R
; CHECK-LABEL: @test10(
-; CHECK-NEXT: select i1 %C, i32 6, i32 3
-; CHECK-NEXT: lshr i32 %X
+; CHECK-NEXT: [[R_V:%.*]] = select i1 %C, i32 6, i32 3
+; CHECK-NEXT: [[R:%.*]] = lshr i32 %X, [[R:%.*]].v
+; CHECK-NEXT: ret i32 [[R]]
+;
+ %V = select i1 %C, i32 64, i32 8 ; <i32> [#uses=1]
+ %R = udiv i32 %X, %V ; <i32> [#uses=1]
+ ret i32 %R
}
define i32 @test11(i32 %X, i1 %C) {
- %A = select i1 %C, i32 1024, i32 32 ; <i32> [#uses=1]
- %B = udiv i32 %X, %A ; <i32> [#uses=1]
- ret i32 %B
; CHECK-LABEL: @test11(
-; CHECK-NEXT: select i1 %C, i32 10, i32 5
-; CHECK-NEXT: lshr i32 %X
+; CHECK-NEXT: [[B_V:%.*]] = select i1 %C, i32 10, i32 5
+; CHECK-NEXT: [[B:%.*]] = lshr i32 %X, [[B:%.*]].v
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %A = select i1 %C, i32 1024, i32 32 ; <i32> [#uses=1]
+ %B = udiv i32 %X, %A ; <i32> [#uses=1]
+ ret i32 %B
}
; PR2328
define i32 @test12(i32 %x) nounwind {
- %tmp3 = udiv i32 %x, %x ; 1
- ret i32 %tmp3
; CHECK-LABEL: @test12(
-; CHECK-NEXT: ret i32 1
+; CHECK-NEXT: ret i32 1
+;
+ %tmp3 = udiv i32 %x, %x ; 1
+ ret i32 %tmp3
}
define i32 @test13(i32 %x) nounwind {
- %tmp3 = sdiv i32 %x, %x ; 1
- ret i32 %tmp3
; CHECK-LABEL: @test13(
-; CHECK-NEXT: ret i32 1
+; CHECK-NEXT: ret i32 1
+;
+ %tmp3 = sdiv i32 %x, %x ; 1
+ ret i32 %tmp3
}
define i32 @test14(i8 %x) nounwind {
- %zext = zext i8 %x to i32
- %div = udiv i32 %zext, 257 ; 0
- ret i32 %div
; CHECK-LABEL: @test14(
-; CHECK-NEXT: ret i32 0
+; CHECK-NEXT: ret i32 0
+;
+ %zext = zext i8 %x to i32
+ %div = udiv i32 %zext, 257 ; 0
+ ret i32 %div
}
; PR9814
define i32 @test15(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 %b, -2
+; CHECK-NEXT: [[DIV2:%.*]] = lshr i32 %a, [[TMP1]]
+; CHECK-NEXT: ret i32 [[DIV2]]
+;
%shl = shl i32 1, %b
%div = lshr i32 %shl, 2
%div2 = udiv i32 %a, %div
ret i32 %div2
-; CHECK-LABEL: @test15(
-; CHECK-NEXT: add i32 %b, -2
-; CHECK-NEXT: lshr i32 %a,
-; CHECK-NEXT: ret i32
}
define <2 x i64> @test16(<2 x i64> %x) nounwind {
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: [[DIV:%.*]] = udiv <2 x i64> %x, <i64 192, i64 192>
+; CHECK-NEXT: ret <2 x i64> [[DIV]]
+;
%shr = lshr <2 x i64> %x, <i64 5, i64 5>
%div = udiv <2 x i64> %shr, <i64 6, i64 6>
ret <2 x i64> %div
-; CHECK-LABEL: @test16(
-; CHECK-NEXT: udiv <2 x i64> %x, <i64 192, i64 192>
-; CHECK-NEXT: ret <2 x i64>
}
define <2 x i64> @test17(<2 x i64> %x) nounwind {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: [[DIV:%.*]] = sdiv <2 x i64> %x, <i64 -3, i64 -4>
+; CHECK-NEXT: ret <2 x i64> [[DIV]]
+;
%neg = sub nsw <2 x i64> zeroinitializer, %x
%div = sdiv <2 x i64> %neg, <i64 3, i64 4>
ret <2 x i64> %div
-; CHECK-LABEL: @test17(
-; CHECK-NEXT: sdiv <2 x i64> %x, <i64 -3, i64 -4>
-; CHECK-NEXT: ret <2 x i64>
}
define <2 x i64> @test18(<2 x i64> %x) nounwind {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: [[DIV:%.*]] = sub <2 x i64> zeroinitializer, %x
+; CHECK-NEXT: ret <2 x i64> [[DIV]]
+;
%div = sdiv <2 x i64> %x, <i64 -1, i64 -1>
ret <2 x i64> %div
-; CHECK-LABEL: @test18(
-; CHECK-NEXT: sub <2 x i64> zeroinitializer, %x
-; CHECK-NEXT: ret <2 x i64>
}
define i32 @test19(i32 %x) {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %x, 1
+; CHECK-NEXT: [[A:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: ret i32 [[A]]
+;
%A = udiv i32 1, %x
ret i32 %A
-; CHECK-LABEL: @test19(
-; CHECK-NEXT: icmp eq i32 %x, 1
-; CHECK-NEXT: zext i1 %{{.*}} to i32
-; CHECK-NEXT: ret i32
}
define i32 @test20(i32 %x) {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 %x, 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT: [[A:%.*]] = select i1 [[TMP2]], i32 %x, i32 0
+; CHECK-NEXT: ret i32 [[A]]
+;
%A = sdiv i32 1, %x
ret i32 %A
-; CHECK-LABEL: @test20(
-; CHECK-NEXT: add i32 %x, 1
-; CHECK-NEXT: icmp ult i32 %{{.*}}, 3
-; CHECK-NEXT: select i1 %{{.*}}, i32 %x, i32 {{.*}}
-; CHECK-NEXT: ret i32
}
define i32 @test21(i32 %a) {
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 %a, 3
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%shl = shl nsw i32 %a, 2
%div = sdiv i32 %shl, 12
ret i32 %div
-; CHECK-LABEL: @test21(
-; CHECK-NEXT: %div = sdiv i32 %a, 3
-; CHECK-NEXT: ret i32 %div
}
define i32 @test22(i32 %a) {
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 %a, 4
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%mul = mul nsw i32 %a, 3
%div = sdiv i32 %mul, 12
ret i32 %div
-; CHECK-LABEL: @test22(
-; CHECK-NEXT: %div = sdiv i32 %a, 4
-; CHECK-NEXT: ret i32 %div
}
define i32 @test23(i32 %a) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: [[DIV:%.*]] = udiv i32 %a, 3
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%shl = shl nuw i32 %a, 2
%div = udiv i32 %shl, 12
ret i32 %div
-; CHECK-LABEL: @test23(
-; CHECK-NEXT: %div = udiv i32 %a, 3
-; CHECK-NEXT: ret i32 %div
}
define i32 @test24(i32 %a) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: [[DIV:%.*]] = lshr i32 %a, 2
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%mul = mul nuw i32 %a, 3
%div = udiv i32 %mul, 12
ret i32 %div
-; CHECK-LABEL: @test24(
-; CHECK-NEXT: %div = lshr i32 %a, 2
-; CHECK-NEXT: ret i32 %div
}
define i32 @test25(i32 %a) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: [[DIV:%.*]] = shl nsw i32 %a, 1
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%shl = shl nsw i32 %a, 2
%div = sdiv i32 %shl, 2
ret i32 %div
-; CHECK-LABEL: @test25(
-; CHECK-NEXT: %div = shl nsw i32 %a, 1
-; CHECK-NEXT: ret i32 %div
}
define i32 @test26(i32 %a) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: [[DIV:%.*]] = shl nsw i32 %a, 2
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%mul = mul nsw i32 %a, 12
%div = sdiv i32 %mul, 3
ret i32 %div
-; CHECK-LABEL: @test26(
-; CHECK-NEXT: %div = shl nsw i32 %a, 2
-; CHECK-NEXT: ret i32 %div
}
define i32 @test27(i32 %a) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: [[DIV:%.*]] = shl nuw i32 %a, 1
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%shl = shl nuw i32 %a, 2
%div = udiv i32 %shl, 2
ret i32 %div
-; CHECK-LABEL: @test27(
-; CHECK-NEXT: %div = shl nuw i32 %a, 1
-; CHECK-NEXT: ret i32 %div
}
define i32 @test28(i32 %a) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: [[DIV:%.*]] = mul nuw i32 %a, 12
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%mul = mul nuw i32 %a, 36
%div = udiv i32 %mul, 3
ret i32 %div
-; CHECK-LABEL: @test28(
-; CHECK-NEXT: %div = mul nuw i32 %a, 12
-; CHECK-NEXT: ret i32 %div
}
define i32 @test29(i32 %a) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: [[MUL_LOBIT:%.*]] = and i32 %a, 1
+; CHECK-NEXT: ret i32 [[MUL_LOBIT]]
+;
%mul = shl nsw i32 %a, 31
%div = sdiv i32 %mul, -2147483648
ret i32 %div
-; CHECK-LABEL: @test29(
-; CHECK-NEXT: %[[and:.*]] = and i32 %a, 1
-; CHECK-NEXT: ret i32 %[[and]]
}
define i32 @test30(i32 %a) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: ret i32 %a
+;
%mul = shl nuw i32 %a, 31
%div = udiv i32 %mul, -2147483648
ret i32 %div
-; CHECK-LABEL: @test30(
-; CHECK-NEXT: ret i32 %a
}
define <2 x i32> @test31(<2 x i32> %x) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: ret <2 x i32> zeroinitializer
+;
%shr = lshr <2 x i32> %x, <i32 31, i32 31>
%div = udiv <2 x i32> %shr, <i32 2147483647, i32 2147483647>
ret <2 x i32> %div
-; CHECK-LABEL: @test31(
-; CHECK-NEXT: ret <2 x i32> zeroinitializer
}
define i32 @test32(i32 %a, i32 %b) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 2, %b
+; CHECK-NEXT: [[DIV:%.*]] = lshr i32 [[SHL]], 2
+; CHECK-NEXT: [[DIV2:%.*]] = udiv i32 %a, [[DIV]]
+; CHECK-NEXT: ret i32 [[DIV2]]
+;
%shl = shl i32 2, %b
%div = lshr i32 %shl, 2
%div2 = udiv i32 %a, %div
ret i32 %div2
-; CHECK-LABEL: @test32(
-; CHECK-NEXT: %[[shl:.*]] = shl i32 2, %b
-; CHECK-NEXT: %[[shr:.*]] = lshr i32 %[[shl]], 2
-; CHECK-NEXT: %[[div:.*]] = udiv i32 %a, %[[shr]]
-; CHECK-NEXT: ret i32
}
define <2 x i64> @test33(<2 x i64> %x) nounwind {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: [[DIV:%.*]] = udiv exact <2 x i64> %x, <i64 192, i64 192>
+; CHECK-NEXT: ret <2 x i64> [[DIV]]
+;
%shr = lshr exact <2 x i64> %x, <i64 5, i64 5>
%div = udiv exact <2 x i64> %shr, <i64 6, i64 6>
ret <2 x i64> %div
-; CHECK-LABEL: @test33(
-; CHECK-NEXT: udiv exact <2 x i64> %x, <i64 192, i64 192>
-; CHECK-NEXT: ret <2 x i64>
}
define <2 x i64> @test34(<2 x i64> %x) nounwind {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: [[DIV:%.*]] = sdiv exact <2 x i64> %x, <i64 -3, i64 -4>
+; CHECK-NEXT: ret <2 x i64> [[DIV]]
+;
%neg = sub nsw <2 x i64> zeroinitializer, %x
%div = sdiv exact <2 x i64> %neg, <i64 3, i64 4>
ret <2 x i64> %div
-; CHECK-LABEL: @test34(
-; CHECK-NEXT: sdiv exact <2 x i64> %x, <i64 -3, i64 -4>
-; CHECK-NEXT: ret <2 x i64>
}
define i32 @test35(i32 %A) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 2147483647
+; CHECK-NEXT: [[MUL:%.*]] = udiv exact i32 [[AND]], 2147483647
+; CHECK-NEXT: ret i32 [[MUL]]
+;
%and = and i32 %A, 2147483647
%mul = sdiv exact i32 %and, 2147483647
ret i32 %mul
-; CHECK-LABEL: @test35(
-; CHECK-NEXT: %[[and:.*]] = and i32 %A, 2147483647
-; CHECK-NEXT: %[[udiv:.*]] = udiv exact i32 %[[and]], 2147483647
-; CHECK-NEXT: ret i32 %[[udiv]]
}
define i32 @test36(i32 %A) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 2147483647
+; CHECK-NEXT: [[MUL:%.*]] = lshr exact i32 [[AND]], %A
+; CHECK-NEXT: ret i32 [[MUL]]
+;
%and = and i32 %A, 2147483647
%shl = shl nsw i32 1, %A
%mul = sdiv exact i32 %and, %shl
ret i32 %mul
-; CHECK-LABEL: @test36(
-; CHECK-NEXT: %[[and:.*]] = and i32 %A, 2147483647
-; CHECK-NEXT: %[[shr:.*]] = lshr exact i32 %[[and]], %A
-; CHECK-NEXT: ret i32 %[[shr]]
+}
+
+; FIXME: Vector should get same transform as scalar.
+
+define <2 x i32> @test36vec(<2 x i32> %A) {
+; CHECK-LABEL: @test36vec(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
+; CHECK-NEXT: [[SHL:%.*]] = shl nuw nsw <2 x i32> <i32 1, i32 1>, %A
+; CHECK-NEXT: [[MUL:%.*]] = sdiv exact <2 x i32> [[AND]], [[SHL]]
+; CHECK-NEXT: ret <2 x i32> [[MUL]]
+;
+ %and = and <2 x i32> %A, <i32 2147483647, i32 2147483647>
+ %shl = shl nsw <2 x i32> <i32 1, i32 1>, %A
+ %mul = sdiv exact <2 x i32> %and, %shl
+ ret <2 x i32> %mul
}
define i32 @test37(i32* %b) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: store i32 0, i32* %b, align 4
+; CHECK-NEXT: br i1 undef, label %lor.rhs, label %lor.end
+; CHECK: lor.rhs:
+; CHECK-NEXT: br label %lor.end
+; CHECK: lor.end:
+; CHECK-NEXT: ret i32 0
+;
entry:
store i32 0, i32* %b, align 4
%0 = load i32, i32* %b, align 4
@@ -338,6 +407,92 @@ lor.end: ; preds = %lor.rhs, %entry
%t.0 = phi i32 [ %0, %entry ], [ %mul, %lor.rhs ]
%div = sdiv i32 %t.0, 2
ret i32 %div
-; CHECK-LABEL: @test37(
-; CHECK: ret i32 0
}
+
+; We can perform the division in the smaller type.
+
+define i32 @shrink(i8 %x) {
+; CHECK-LABEL: @shrink(
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv i8 %x, 127
+; CHECK-NEXT: [[DIV:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT: ret i32 [[DIV]]
+;
+ %conv = sext i8 %x to i32
+ %div = sdiv i32 %conv, 127
+ ret i32 %div
+}
+
+; Division in the smaller type can lead to more optimizations.
+
+define i32 @zap(i8 %x) {
+; CHECK-LABEL: @zap(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i8 %x, -128
+; CHECK-NEXT: [[DIV:%.*]] = zext i1 [[TMP1]] to i32
+; CHECK-NEXT: ret i32 [[DIV]]
+;
+ %conv = sext i8 %x to i32
+ %div = sdiv i32 %conv, -128
+ ret i32 %div
+}
+
+; Splat constant divisors should get the same folds.
+
+define <3 x i32> @shrink_vec(<3 x i8> %x) {
+; CHECK-LABEL: @shrink_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = sdiv <3 x i8> %x, <i8 127, i8 127, i8 127>
+; CHECK-NEXT: [[DIV:%.*]] = sext <3 x i8> [[TMP1]] to <3 x i32>
+; CHECK-NEXT: ret <3 x i32> [[DIV]]
+;
+ %conv = sext <3 x i8> %x to <3 x i32>
+ %div = sdiv <3 x i32> %conv, <i32 127, i32 127, i32 127>
+ ret <3 x i32> %div
+}
+
+define <2 x i32> @zap_vec(<2 x i8> %x) {
+; CHECK-LABEL: @zap_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <2 x i8> %x, <i8 -128, i8 -128>
+; CHECK-NEXT: [[DIV:%.*]] = zext <2 x i1> [[TMP1]] to <2 x i32>
+; CHECK-NEXT: ret <2 x i32> [[DIV]]
+;
+ %conv = sext <2 x i8> %x to <2 x i32>
+ %div = sdiv <2 x i32> %conv, <i32 -128, i32 -128>
+ ret <2 x i32> %div
+}
+
+; But we can't do this if the signed constant won't fit in the original type.
+
+define i32 @shrink_no(i8 %x) {
+; CHECK-LABEL: @shrink_no(
+; CHECK-NEXT: [[CONV:%.*]] = sext i8 %x to i32
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[CONV]], 128
+; CHECK-NEXT: ret i32 [[DIV]]
+;
+ %conv = sext i8 %x to i32
+ %div = sdiv i32 %conv, 128
+ ret i32 %div
+}
+
+define i32 @shrink_no2(i8 %x) {
+; CHECK-LABEL: @shrink_no2(
+; CHECK-NEXT: [[CONV:%.*]] = sext i8 %x to i32
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[CONV]], -129
+; CHECK-NEXT: ret i32 [[DIV]]
+;
+ %conv = sext i8 %x to i32
+ %div = sdiv i32 %conv, -129
+ ret i32 %div
+}
+
+; 17 bits are needed to represent 65535 as a signed value, so this shouldn't fold.
+
+define i32 @shrink_no3(i16 %x) {
+; CHECK-LABEL: @shrink_no3(
+; CHECK-NEXT: [[CONV:%.*]] = sext i16 %x to i32
+; CHECK-NEXT: [[DIV:%.*]] = sdiv i32 [[CONV]], 65535
+; CHECK-NEXT: ret i32 [[DIV]]
+;
+ %conv = sext i16 %x to i32
+ %div = sdiv i32 %conv, 65535
+ ret i32 %div
+}
+
diff --git a/test/Transforms/InstCombine/dom-conditions.ll b/test/Transforms/InstCombine/dom-conditions.ll
deleted file mode 100644
index 426404352681..000000000000
--- a/test/Transforms/InstCombine/dom-conditions.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; RUN: opt -instcombine -value-tracking-dom-conditions=1 -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-
-define i1 @test_cmp_ult(i64 %A) {
-; CHECK-LABEL: @test_cmp_ult
-entry:
- %cmp = icmp ult i64 %A, 64
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 false
- %cmp2 = icmp ugt i64 %A, 64
- ret i1 %cmp2
-untaken:
- ret i1 true
-}
-
-define i1 @test_cmp_ule(i64 %A) {
-; CHECK-LABEL: @test_cmp_ule
-entry:
- %cmp = icmp ule i64 %A, 64
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 false
- %cmp2 = icmp ugt i64 %A, 128
- ret i1 %cmp2
-untaken:
- ret i1 true
-}
-
-define i1 @test_cmp_sgt(i32 %A) {
-; CHECK-LABEL: @test_cmp_sgt
-entry:
- %cmp = icmp sgt i32 %A, 10
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i1 true
- %cmp2 = icmp sgt i32 %A, -1
- ret i1 %cmp2
-untaken:
- ret i1 true
-}
-
-define i64 @test_add_zero_bits(i64 %A) {
-; CHECK-LABEL: @test_add_zero_bits
-entry:
- %cmp = icmp eq i64 %A, 2
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: ret i64 3
- %add = add i64 %A, 1
- ret i64 %add
-untaken:
- ret i64 %A
-}
-
-define i64 @test_add_nsw(i64 %A) {
-; CHECK-LABEL: @test_add_nsw
-entry:
- %cmp = icmp ult i64 %A, 20
- br i1 %cmp, label %taken, label %untaken
-
-taken:
-; CHECK-LABEL: taken:
-; CHECK-NEXT: %add = add nuw nsw i64 %A, 1
-; CHECK-NEXT: ret i64 %add
- %add = add i64 %A, 1
- ret i64 %add
-untaken:
- ret i64 %A
-}
-
-; After sinking the instructions into the if block, check that we
-; can simplify some of them using dominating conditions.
-define i32 @test_add_zero_bits_sink(i32 %x) nounwind ssp {
-; CHECK-LABEL: @test_add_zero_bits_sink(
-; CHECK-NOT: sdiv i32
-entry:
- %a = add nsw i32 %x, 16
- %b = sdiv i32 %a, %x
- %cmp = icmp ult i32 %x, 7
- br i1 %cmp, label %bb1, label %bb2
-
-bb1:
-; CHECK-LABEL: bb1:
-; CHECK-NEXT: or i32 %x, 16
-; CHECK-NEXT: udiv i32
- ret i32 %b
-
-bb2:
- ret i32 %x
-}
-
-; A condition in the same block gives no information
-define i32 @test_neg1(i32 %x) nounwind ssp {
-; CHECK-LABEL: @test_neg1
-; CHECK: add
-; CHECK: sdiv
-; CHECK: icmp
-; CHECK: select
-entry:
- %a = add nsw i32 %x, 16
- %b = sdiv i32 %a, %x
- %cmp = icmp ult i32 %x, 7
- %ret = select i1 %cmp, i32 %a, i32 %b
- ret i32 %ret
-}
-
-; A non-dominating edge gives no information
-define i32 @test_neg2(i32 %x) {
-; CHECK-LABEL: @test_neg2
-entry:
- %cmp = icmp ult i32 %x, 7
- br i1 %cmp, label %bb1, label %merge
-
-bb1:
- br label %merge
-
-merge:
-; CHECK-LABEL: merge:
-; CHECK: icmp
-; CHECK: select
- %cmp2 = icmp ult i32 %x, 7
- %ret = select i1 %cmp2, i32 %x, i32 0
- ret i32 %ret
-}
-
-; A unconditional branch expressed as a condition one gives no
-; information (and shouldn't trip any asserts.)
-define i32 @test_neg3(i32 %x) {
-; CHECK-LABEL: @test_neg3
-entry:
- %cmp = icmp ult i32 %x, 7
- br i1 %cmp, label %merge, label %merge
-merge:
-; CHECK-LABEL: merge:
-; CHECK: icmp
-; CHECK: select
- %cmp2 = icmp ult i32 %x, 7
- %ret = select i1 %cmp2, i32 %x, i32 0
- ret i32 %ret
-}
-
-declare i32 @bar()
diff --git a/test/Transforms/InstCombine/double-float-shrink-1.ll b/test/Transforms/InstCombine/double-float-shrink-1.ll
index 74f3ebbf5230..5d015bc99ae9 100644
--- a/test/Transforms/InstCombine/double-float-shrink-1.ll
+++ b/test/Transforms/InstCombine/double-float-shrink-1.ll
@@ -386,30 +386,28 @@ declare fp128 @fmin(fp128, fp128) ; This is not the 'fmin' you're looking for.
declare double @fmax(double, double)
-declare double @tanh(double) #1
-declare double @tan(double) #1
+declare double @tanh(double)
+declare double @tan(double)
; sqrt is a special case: the shrinking optimization
; is valid even without unsafe-fp-math.
declare double @sqrt(double)
declare double @llvm.sqrt.f64(double)
-declare double @sin(double) #1
-declare double @log2(double) #1
-declare double @log1p(double) #1
-declare double @log10(double) #1
-declare double @log(double) #1
-declare double @logb(double) #1
-declare double @exp10(double) #1
-declare double @expm1(double) #1
-declare double @exp(double) #1
-declare double @cbrt(double) #1
-declare double @atanh(double) #1
-declare double @atan(double) #1
-declare double @acos(double) #1
-declare double @acosh(double) #1
-declare double @asin(double) #1
-declare double @asinh(double) #1
-
-attributes #1 = { "unsafe-fp-math"="true" }
+declare double @sin(double)
+declare double @log2(double)
+declare double @log1p(double)
+declare double @log10(double)
+declare double @log(double)
+declare double @logb(double)
+declare double @exp10(double)
+declare double @expm1(double)
+declare double @exp(double)
+declare double @cbrt(double)
+declare double @atanh(double)
+declare double @atan(double)
+declare double @acos(double)
+declare double @acosh(double)
+declare double @asin(double)
+declare double @asinh(double)
diff --git a/test/Transforms/InstCombine/exact.ll b/test/Transforms/InstCombine/exact.ll
index 868d60ac46b1..9edcd2491ffa 100644
--- a/test/Transforms/InstCombine/exact.ll
+++ b/test/Transforms/InstCombine/exact.ll
@@ -1,109 +1,133 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK-LABEL: @sdiv1(
-; CHECK: sdiv i32 %x, 8
define i32 @sdiv1(i32 %x) {
+; CHECK-LABEL: @sdiv1(
+; CHECK-NEXT: [[Y:%.*]] = sdiv i32 %x, 8
+; CHECK-NEXT: ret i32 [[Y]]
+;
%y = sdiv i32 %x, 8
ret i32 %y
}
-; CHECK-LABEL: @sdiv2(
-; CHECK: ashr exact i32 %x, 3
define i32 @sdiv2(i32 %x) {
+; CHECK-LABEL: @sdiv2(
+; CHECK-NEXT: [[Y:%.*]] = ashr exact i32 %x, 3
+; CHECK-NEXT: ret i32 [[Y]]
+;
%y = sdiv exact i32 %x, 8
ret i32 %y
}
-; CHECK-LABEL: @sdiv3(
-; CHECK: %y = srem i32 %x, 3
-; CHECK: %z = sub i32 %x, %y
-; CHECK: ret i32 %z
+define <2 x i32> @sdiv2_vec(<2 x i32> %x) {
+; CHECK-LABEL: @sdiv2_vec(
+; CHECK-NEXT: [[Y:%.*]] = ashr exact <2 x i32> %x, <i32 7, i32 7>
+; CHECK-NEXT: ret <2 x i32> [[Y]]
+;
+ %y = sdiv exact <2 x i32> %x, <i32 128, i32 128>
+ ret <2 x i32> %y
+}
+
define i32 @sdiv3(i32 %x) {
+; CHECK-LABEL: @sdiv3(
+; CHECK-NEXT: [[Y:%.*]] = srem i32 %x, 3
+; CHECK-NEXT: [[Z:%.*]] = sub i32 %x, [[Y]]
+; CHECK-NEXT: ret i32 [[Z]]
+;
%y = sdiv i32 %x, 3
%z = mul i32 %y, 3
ret i32 %z
}
-; CHECK-LABEL: @sdiv4(
-; CHECK: ret i32 %x
define i32 @sdiv4(i32 %x) {
+; CHECK-LABEL: @sdiv4(
+; CHECK-NEXT: ret i32 %x
+;
%y = sdiv exact i32 %x, 3
%z = mul i32 %y, 3
ret i32 %z
}
-; CHECK: i32 @sdiv5
-; CHECK: %y = srem i32 %x, 3
-; CHECK: %z = sub i32 %y, %x
-; CHECK: ret i32 %z
define i32 @sdiv5(i32 %x) {
+; CHECK-LABEL: @sdiv5(
+; CHECK-NEXT: [[Y:%.*]] = srem i32 %x, 3
+; CHECK-NEXT: [[Z:%.*]] = sub i32 [[Y]], %x
+; CHECK-NEXT: ret i32 [[Z]]
+;
%y = sdiv i32 %x, 3
%z = mul i32 %y, -3
ret i32 %z
}
-; CHECK-LABEL: @sdiv6(
-; CHECK: %z = sub i32 0, %x
-; CHECK: ret i32 %z
define i32 @sdiv6(i32 %x) {
+; CHECK-LABEL: @sdiv6(
+; CHECK-NEXT: [[Z:%.*]] = sub i32 0, %x
+; CHECK-NEXT: ret i32 [[Z]]
+;
%y = sdiv exact i32 %x, 3
%z = mul i32 %y, -3
ret i32 %z
}
-; CHECK-LABEL: @udiv1(
-; CHECK: ret i32 %x
define i32 @udiv1(i32 %x, i32 %w) {
+; CHECK-LABEL: @udiv1(
+; CHECK-NEXT: ret i32 %x
+;
%y = udiv exact i32 %x, %w
%z = mul i32 %y, %w
ret i32 %z
}
-; CHECK-LABEL: @udiv2(
-; CHECK: %z = lshr exact i32 %x, %w
-; CHECK: ret i32 %z
define i32 @udiv2(i32 %x, i32 %w) {
+; CHECK-LABEL: @udiv2(
+; CHECK-NEXT: [[Z:%.*]] = lshr exact i32 %x, %w
+; CHECK-NEXT: ret i32 [[Z]]
+;
%y = shl i32 1, %w
%z = udiv exact i32 %x, %y
ret i32 %z
}
-; CHECK-LABEL: @ashr1(
-; CHECK: %B = ashr exact i64 %A, 2
-; CHECK: ret i64 %B
define i64 @ashr1(i64 %X) nounwind {
+; CHECK-LABEL: @ashr1(
+; CHECK-NEXT: [[A:%.*]] = shl i64 %X, 8
+; CHECK-NEXT: [[B:%.*]] = ashr exact i64 [[A]], 2
+; CHECK-NEXT: ret i64 [[B]]
+;
%A = shl i64 %X, 8
%B = ashr i64 %A, 2 ; X/4
ret i64 %B
}
; PR9120
-; CHECK-LABEL: @ashr_icmp1(
-; CHECK: %B = icmp eq i64 %X, 0
-; CHECK: ret i1 %B
define i1 @ashr_icmp1(i64 %X) nounwind {
+; CHECK-LABEL: @ashr_icmp1(
+; CHECK-NEXT: [[B:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = ashr exact i64 %X, 2 ; X/4
%B = icmp eq i64 %A, 0
ret i1 %B
}
-; CHECK-LABEL: @ashr_icmp2(
-; CHECK: %Z = icmp slt i64 %X, 16
-; CHECK: ret i1 %Z
define i1 @ashr_icmp2(i64 %X) nounwind {
- %Y = ashr exact i64 %X, 2 ; x / 4
- %Z = icmp slt i64 %Y, 4 ; x < 16
- ret i1 %Z
+; CHECK-LABEL: @ashr_icmp2(
+; CHECK-NEXT: [[Z:%.*]] = icmp slt i64 %X, 16
+; CHECK-NEXT: ret i1 [[Z]]
+;
+ %Y = ashr exact i64 %X, 2 ; x / 4
+ %Z = icmp slt i64 %Y, 4 ; x < 16
+ ret i1 %Z
}
; PR9998
; Make sure we don't transform the ashr here into an sdiv
-; CHECK-LABEL: @pr9998(
-; CHECK: [[BIT:%[A-Za-z0-9.]+]] = and i32 %V, 1
-; CHECK-NEXT: [[CMP:%[A-Za-z0-9.]+]] = icmp ne i32 [[BIT]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @pr9998(i32 %V) nounwind {
-entry:
+; CHECK-LABEL: @pr9998(
+; CHECK-NEXT: [[W_MASK:%.*]] = and i32 %V, 1
+; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[W_MASK]], 0
+; CHECK-NEXT: ret i1 [[Z]]
+;
%W = shl i32 %V, 31
%X = ashr exact i32 %W, 31
%Y = sext i32 %X to i64
@@ -111,59 +135,81 @@ entry:
ret i1 %Z
}
-
-
+define i1 @udiv_icmp1(i64 %X) {
; CHECK-LABEL: @udiv_icmp1(
-; CHECK: icmp ne i64 %X, 0
-define i1 @udiv_icmp1(i64 %X) nounwind {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i64 %X, 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%A = udiv exact i64 %X, 5 ; X/5
%B = icmp ne i64 %A, 0
ret i1 %B
}
+define i1 @udiv_icmp2(i64 %X) {
+; CHECK-LABEL: @udiv_icmp2(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %A = udiv exact i64 %X, 5 ; X/5 == 0 --> x == 0
+ %B = icmp eq i64 %A, 0
+ ret i1 %B
+}
+
+define i1 @sdiv_icmp1(i64 %X) {
; CHECK-LABEL: @sdiv_icmp1(
-; CHECK: icmp eq i64 %X, 0
-define i1 @sdiv_icmp1(i64 %X) nounwind {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%A = sdiv exact i64 %X, 5 ; X/5 == 0 --> x == 0
%B = icmp eq i64 %A, 0
ret i1 %B
}
+define i1 @sdiv_icmp2(i64 %X) {
; CHECK-LABEL: @sdiv_icmp2(
-; CHECK: icmp eq i64 %X, 5
-define i1 @sdiv_icmp2(i64 %X) nounwind {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 %X, 5
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%A = sdiv exact i64 %X, 5 ; X/5 == 1 --> x == 5
%B = icmp eq i64 %A, 1
ret i1 %B
}
+define i1 @sdiv_icmp3(i64 %X) {
; CHECK-LABEL: @sdiv_icmp3(
-; CHECK: icmp eq i64 %X, -5
-define i1 @sdiv_icmp3(i64 %X) nounwind {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 %X, -5
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%A = sdiv exact i64 %X, 5 ; X/5 == -1 --> x == -5
%B = icmp eq i64 %A, -1
ret i1 %B
}
+define i1 @sdiv_icmp4(i64 %X) {
; CHECK-LABEL: @sdiv_icmp4(
-; CHECK: icmp eq i64 %X, 0
-define i1 @sdiv_icmp4(i64 %X) nounwind {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 %X, 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%A = sdiv exact i64 %X, -5 ; X/-5 == 0 --> x == 0
%B = icmp eq i64 %A, 0
ret i1 %B
}
+define i1 @sdiv_icmp5(i64 %X) {
; CHECK-LABEL: @sdiv_icmp5(
-; CHECK: icmp eq i64 %X, -5
-define i1 @sdiv_icmp5(i64 %X) nounwind {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 %X, -5
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%A = sdiv exact i64 %X, -5 ; X/-5 == 1 --> x == -5
%B = icmp eq i64 %A, 1
ret i1 %B
}
+define i1 @sdiv_icmp6(i64 %X) {
; CHECK-LABEL: @sdiv_icmp6(
-; CHECK: icmp eq i64 %X, 5
-define i1 @sdiv_icmp6(i64 %X) nounwind {
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i64 %X, 5
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%A = sdiv exact i64 %X, -5 ; X/-5 == 1 --> x == 5
%B = icmp eq i64 %A, -1
ret i1 %B
diff --git a/test/Transforms/InstCombine/fabs.ll b/test/Transforms/InstCombine/fabs.ll
index 941270df0e97..0479549bea3f 100644
--- a/test/Transforms/InstCombine/fabs.ll
+++ b/test/Transforms/InstCombine/fabs.ll
@@ -41,7 +41,6 @@ define fp128 @square_fabs_call_f128(fp128 %x) {
declare float @llvm.fabs.f32(float)
declare double @llvm.fabs.f64(double)
declare fp128 @llvm.fabs.f128(fp128)
-declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
define float @square_fabs_intrinsic_f32(float %x) {
%mul = fmul float %x, %x
@@ -99,27 +98,3 @@ define float @square_fabs_shrink_call2(float %x) {
; CHECK-NEXT: ret float %sq
}
-; A scalar fabs op makes the sign bit zero, so masking off all of the other bits means we can return zero.
-
-define i32 @fabs_value_tracking_f32(float %x) {
- %call = call float @llvm.fabs.f32(float %x)
- %bc = bitcast float %call to i32
- %and = and i32 %bc, 2147483648
- ret i32 %and
-
-; CHECK-LABEL: fabs_value_tracking_f32(
-; CHECK: ret i32 0
-}
-
-; TODO: A vector fabs op makes the sign bits zero, so masking off all of the other bits means we can return zero.
-
-define <4 x i32> @fabs_value_tracking_v4f32(<4 x float> %x) {
- %call = call <4 x float> @llvm.fabs.v4f32(<4 x float> %x)
- %bc = bitcast <4 x float> %call to <4 x i32>
- %and = and <4 x i32> %bc, <i32 2147483648, i32 2147483648, i32 2147483648, i32 2147483648>
- ret <4 x i32> %and
-
-; CHECK-LABEL: fabs_value_tracking_v4f32(
-; CHECK: ret <4 x i32> %and
-}
-
diff --git a/test/Transforms/InstCombine/fast-math-scalarization.ll b/test/Transforms/InstCombine/fast-math-scalarization.ll
new file mode 100644
index 000000000000..406ebebfdd26
--- /dev/null
+++ b/test/Transforms/InstCombine/fast-math-scalarization.ll
@@ -0,0 +1,39 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+; CHECK-LABEL: test_scalarize_phi
+; CHECK: fmul fast float
+define void @test_scalarize_phi(i32 * %n, float * %inout) {
+entry:
+ %0 = load volatile float, float * %inout, align 4
+ %splat.splatinsert = insertelement <4 x float> undef, float %0, i32 0
+ %splat.splat = shufflevector <4 x float> %splat.splatinsert, <4 x float> undef, <4 x i32> zeroinitializer
+ %splat.splatinsert1 = insertelement <4 x float> undef, float 3.0, i32 0
+ br label %for.cond
+
+for.cond:
+ %x.0 = phi <4 x float> [ %splat.splat, %entry ], [ %mul, %for.body ]
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %1 = load i32, i32 * %n, align 4
+ %cmp = icmp ne i32 %i.0, %1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %2 = extractelement <4 x float> %x.0, i32 1
+ store volatile float %2, float * %inout, align 4
+ %mul = fmul fast <4 x float> %x.0, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
+ %inc = add nsw i32 %i.0, 1
+ br label %for.cond
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_extract_element_fastmath
+; CHECK: fadd fast float
+define float @test_extract_element_fastmath(<4 x float> %x) #0 {
+entry:
+ %add = fadd fast <4 x float> %x, <float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000, float 0x4002A3D700000000>
+ %0 = extractelement <4 x float> %add, i32 2
+ ret float %0
+}
+
diff --git a/test/Transforms/InstCombine/fcmp-special.ll b/test/Transforms/InstCombine/fcmp-special.ll
index a39021e08d1d..df424617b42e 100644
--- a/test/Transforms/InstCombine/fcmp-special.ll
+++ b/test/Transforms/InstCombine/fcmp-special.ll
@@ -1,154 +1,175 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; Infinity
-; CHECK: inf0
-; CHECK: ret i1 false
-define i1 @inf0(double %arg) nounwind readnone {
+define i1 @inf0(double %arg) {
+; CHECK-LABEL: @inf0(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp ogt double %arg, 0x7FF0000000000000
ret i1 %tmp
}
-; CHECK: inf1
-; CHECK: ret i1 true
-define i1 @inf1(double %arg) nounwind readnone {
+define i1 @inf1(double %arg) {
+; CHECK-LABEL: @inf1(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp ule double %arg, 0x7FF0000000000000
ret i1 %tmp
}
; Negative infinity
-; CHECK: ninf0
-; CHECK: ret i1 false
-define i1 @ninf0(double %arg) nounwind readnone {
+define i1 @ninf0(double %arg) {
+; CHECK-LABEL: @ninf0(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp olt double %arg, 0xFFF0000000000000
ret i1 %tmp
}
-; CHECK: ninf1
-; CHECK: ret i1 true
-define i1 @ninf1(double %arg) nounwind readnone {
+define i1 @ninf1(double %arg) {
+; CHECK-LABEL: @ninf1(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp uge double %arg, 0xFFF0000000000000
ret i1 %tmp
}
; NaNs
-; CHECK: nan0
-; CHECK: ret i1 false
-define i1 @nan0(double %arg) nounwind readnone {
+define i1 @nan0(double %arg) {
+; CHECK-LABEL: @nan0(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp ord double %arg, 0x7FF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nan1
-; CHECK: ret i1 false
-define i1 @nan1(double %arg) nounwind readnone {
+define i1 @nan1(double %arg) {
+; CHECK-LABEL: @nan1(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp oeq double %arg, 0x7FF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nan2
-; CHECK: ret i1 false
-define i1 @nan2(double %arg) nounwind readnone {
+define i1 @nan2(double %arg) {
+; CHECK-LABEL: @nan2(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp olt double %arg, 0x7FF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nan3
-; CHECK: ret i1 true
-define i1 @nan3(double %arg) nounwind readnone {
+define i1 @nan3(double %arg) {
+; CHECK-LABEL: @nan3(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp uno double %arg, 0x7FF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nan4
-; CHECK: ret i1 true
-define i1 @nan4(double %arg) nounwind readnone {
+define i1 @nan4(double %arg) {
+; CHECK-LABEL: @nan4(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp une double %arg, 0x7FF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nan5
-; CHECK: ret i1 true
-define i1 @nan5(double %arg) nounwind readnone {
+define i1 @nan5(double %arg) {
+; CHECK-LABEL: @nan5(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp ult double %arg, 0x7FF00000FFFFFFFF
ret i1 %tmp
}
; Negative NaN.
-; CHECK: nnan0
-; CHECK: ret i1 false
-define i1 @nnan0(double %arg) nounwind readnone {
+define i1 @nnan0(double %arg) {
+; CHECK-LABEL: @nnan0(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp ord double %arg, 0xFFF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nnan1
-; CHECK: ret i1 false
-define i1 @nnan1(double %arg) nounwind readnone {
+define i1 @nnan1(double %arg) {
+; CHECK-LABEL: @nnan1(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp oeq double %arg, 0xFFF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nnan2
-; CHECK: ret i1 false
-define i1 @nnan2(double %arg) nounwind readnone {
+define i1 @nnan2(double %arg) {
+; CHECK-LABEL: @nnan2(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp olt double %arg, 0xFFF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nnan3
-; CHECK: ret i1 true
-define i1 @nnan3(double %arg) nounwind readnone {
+define i1 @nnan3(double %arg) {
+; CHECK-LABEL: @nnan3(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp uno double %arg, 0xFFF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nnan4
-; CHECK: ret i1 true
-define i1 @nnan4(double %arg) nounwind readnone {
+define i1 @nnan4(double %arg) {
+; CHECK-LABEL: @nnan4(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp une double %arg, 0xFFF00000FFFFFFFF
ret i1 %tmp
}
-; CHECK: nnan5
-; CHECK: ret i1 true
-define i1 @nnan5(double %arg) nounwind readnone {
+define i1 @nnan5(double %arg) {
+; CHECK-LABEL: @nnan5(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp ult double %arg, 0xFFF00000FFFFFFFF
ret i1 %tmp
}
; Negative zero.
-; CHECK: nzero0
-; CHECK: ret i1 true
define i1 @nzero0() {
+; CHECK-LABEL: @nzero0(
+; CHECK-NEXT: ret i1 true
+;
%tmp = fcmp oeq double 0.0, -0.0
ret i1 %tmp
}
-; CHECK: nzero1
-; CHECK: ret i1 false
define i1 @nzero1() {
+; CHECK-LABEL: @nzero1(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp ogt double 0.0, -0.0
ret i1 %tmp
}
; Misc.
-; CHECK: misc0
-; CHECK: %tmp = fcmp ord double %arg, 0.000000e+00
-; CHECK: ret i1 %tmp
define i1 @misc0(double %arg) {
+; CHECK-LABEL: @misc0(
+; CHECK-NEXT: [[TMP:%.*]] = fcmp ord double %arg, 0.000000e+00
+; CHECK-NEXT: ret i1 [[TMP]]
+;
%tmp = fcmp oeq double %arg, %arg
ret i1 %tmp
}
-; CHECK: misc1
-; CHECK: ret i1 false
define i1 @misc1(double %arg) {
+; CHECK-LABEL: @misc1(
+; CHECK-NEXT: ret i1 false
+;
%tmp = fcmp one double %arg, %arg
ret i1 %tmp
}
diff --git a/test/Transforms/InstCombine/fmul.ll b/test/Transforms/InstCombine/fmul.ll
index ac3000fc0514..25353e2ef260 100644
--- a/test/Transforms/InstCombine/fmul.ll
+++ b/test/Transforms/InstCombine/fmul.ll
@@ -152,3 +152,32 @@ define double @sqrt_squared2(double %f) {
; CHECK-NEXT: %mul2 = fmul double %sqrt, %f
; CHECK-NEXT: ret double %mul2
}
+
+declare float @llvm.fabs.f32(float) nounwind readnone
+
+; CHECK-LABEL @fabs_squared(
+; CHECK: %mul = fmul float %x, %x
+define float @fabs_squared(float %x) {
+ %x.fabs = call float @llvm.fabs.f32(float %x)
+ %mul = fmul float %x.fabs, %x.fabs
+ ret float %mul
+}
+
+; CHECK-LABEL @fabs_squared_fast(
+; CHECK: %mul = fmul fast float %x, %x
+define float @fabs_squared_fast(float %x) {
+ %x.fabs = call float @llvm.fabs.f32(float %x)
+ %mul = fmul fast float %x.fabs, %x.fabs
+ ret float %mul
+}
+
+; CHECK-LABEL @fabs_x_fabs(
+; CHECK: call float @llvm.fabs.f32(float %x)
+; CHECK: call float @llvm.fabs.f32(float %y)
+; CHECK: %mul = fmul float %x.fabs, %y.fabs
+define float @fabs_x_fabs(float %x, float %y) {
+ %x.fabs = call float @llvm.fabs.f32(float %x)
+ %y.fabs = call float @llvm.fabs.f32(float %y)
+ %mul = fmul float %x.fabs, %y.fabs
+ ret float %mul
+}
diff --git a/test/Transforms/InstCombine/fputs-opt-size.ll b/test/Transforms/InstCombine/fputs-opt-size.ll
new file mode 100644
index 000000000000..ea8ef4203e95
--- /dev/null
+++ b/test/Transforms/InstCombine/fputs-opt-size.ll
@@ -0,0 +1,28 @@
+; When optimising for size, we don't want to rewrite fputs to fwrite
+; because it requires more arguments and thus extra MOVs are required.
+;
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%struct._IO_FILE = type { i32, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, i8*, %struct._IO_marker*, %struct._IO_FILE*, i32, i32, i32, i16, i8, [1 x i8], i8*, i64, i8*, i8*, i8*, i8*, i32, i32, [40 x i8] }
+%struct._IO_marker = type { %struct._IO_marker*, %struct._IO_FILE*, i32 }
+
+@.str = private unnamed_addr constant [10 x i8] c"mylog.txt\00", align 1
+@.str.1 = private unnamed_addr constant [2 x i8] c"a\00", align 1
+@.str.2 = private unnamed_addr constant [27 x i8] c"Hello world this is a test\00", align 1
+
+define i32 @main() local_unnamed_addr #0 {
+entry:
+; CHECK-LABEL: @main(
+; CHECK-NOT: call i64 @fwrite
+; CHECK: call i32 @fputs
+
+ %call = tail call %struct._IO_FILE* @fopen(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i32 0, i32 0), i8* getelementptr inbounds ([2 x i8], [2 x i8]* @.str.1, i32 0, i32 0)) #2
+ %call1 = tail call i32 @fputs(i8* getelementptr inbounds ([27 x i8], [27 x i8]* @.str.2, i32 0, i32 0), %struct._IO_FILE* %call) #2
+ ret i32 0
+}
+
+declare noalias %struct._IO_FILE* @fopen(i8* nocapture readonly, i8* nocapture readonly) local_unnamed_addr #1
+declare i32 @fputs(i8* nocapture readonly, %struct._IO_FILE* nocapture) local_unnamed_addr #1
+
+attributes #0 = { nounwind optsize }
+attributes #1 = { nounwind optsize }
diff --git a/test/Transforms/InstCombine/gc.relocate.ll b/test/Transforms/InstCombine/gc.relocate.ll
index 308258a19417..78b3b5f42f91 100644
--- a/test/Transforms/InstCombine/gc.relocate.ll
+++ b/test/Transforms/InstCombine/gc.relocate.ll
@@ -9,17 +9,6 @@ declare zeroext i1 @return_i1()
declare token @llvm.experimental.gc.statepoint.p0f_i1f(i64, i32, i1 ()*, i32, i32, ...)
declare i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token, i32, i32)
-define i32 addrspace(1)* @deref(i32 addrspace(1)* dereferenceable(8) %dparam) gc "statepoint-example" {
-; Checks that a dereferenceabler pointer
-; CHECK-LABEL: @deref
-; CHECK: call dereferenceable(8)
-entry:
- %load = load i32, i32 addrspace(1)* %dparam
- %tok = tail call token (i64, i32, i1 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i1f(i64 0, i32 0, i1 ()* @return_i1, i32 0, i32 0, i32 0, i32 0, i32 addrspace(1)* %dparam)
- %relocate = call i32 addrspace(1)* @llvm.experimental.gc.relocate.p1i32(token %tok, i32 7, i32 7)
- ret i32 addrspace(1)* %relocate
-}
-
define i32 @explicit_nonnull(i32 addrspace(1)* nonnull %dparam) gc "statepoint-example" {
; Checks that a nonnull pointer
; CHECK-LABEL: @explicit_nonnull
@@ -50,3 +39,21 @@ gc:
no_gc:
unreachable
}
+
+
+; Make sure we don't crash when processing vectors
+define <2 x i8 addrspace(1)*> @vector(<2 x i8 addrspace(1)*> %obj) gc "statepoint-example" {
+entry:
+; CHECK-LABEL: @vector
+; CHECK: gc.statepoint
+; CHECK: gc.relocate
+ %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0, <2 x i8 addrspace(1)*> %obj)
+ %obj.relocated = call coldcc <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token %safepoint_token, i32 7, i32 7) ; (%obj, %obj)
+ ret <2 x i8 addrspace(1)*> %obj.relocated
+}
+
+declare void @do_safepoint()
+
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+declare i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token, i32, i32)
+declare <2 x i8 addrspace(1)*> @llvm.experimental.gc.relocate.v2p1i8(token, i32, i32)
diff --git a/test/Transforms/InstCombine/getelementptr-folding.ll b/test/Transforms/InstCombine/getelementptr-folding.ll
new file mode 100644
index 000000000000..11e7e43a6b44
--- /dev/null
+++ b/test/Transforms/InstCombine/getelementptr-folding.ll
@@ -0,0 +1,13 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+%struct.matrix_float3x3 = type { [3 x <3 x float>] }
+
+; We used to fold this by rewriting the indices to 0, 0, 2, 0. This is
+; invalid because there is a 4-byte padding after each <3 x float> field.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+@matrix_identity_float3x3 = external global %struct.matrix_float3x3, align 16
+@bbb = global float* getelementptr inbounds (%struct.matrix_float3x3, %struct.matrix_float3x3* @matrix_identity_float3x3, i64 0, i32 0, i64 1, i64 3)
+; CHECK: @bbb = global float* getelementptr inbounds (%struct.matrix_float3x3, %struct.matrix_float3x3* @matrix_identity_float3x3, i64 0, i32 0, i64 1, i64 3)
diff --git a/test/Transforms/InstCombine/getelementptr.ll b/test/Transforms/InstCombine/getelementptr.ll
index 276ada91f3c6..7446734e210c 100644
--- a/test/Transforms/InstCombine/getelementptr.ll
+++ b/test/Transforms/InstCombine/getelementptr.ll
@@ -624,15 +624,11 @@ define i32 @test35() nounwind {
; CHECK: call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([17 x i8], [17 x i8]* @"\01LC8", i64 0, i64 0), i8* getelementptr inbounds (%t0, %t0* @s, i64 0, i32 1, i64 0)) [[NUW:#[0-9]+]]
}
-; Instcombine should constant-fold the GEP so that indices that have
-; static array extents are within bounds of those array extents.
-; In the below, -1 is not in the range [0,11). After the transformation,
-; the same address is computed, but 3 is in the range of [0,11).
-
+; Don't treat signed offsets as unsigned.
define i8* @test36() nounwind {
ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i32 0, i64 -1)
; CHECK-LABEL: @test36(
-; CHECK: ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i64 1676976733973595601, i64 4)
+; CHECK: ret i8* getelementptr ([11 x i8], [11 x i8]* @array, i64 0, i64 -1)
}
; Instcombine shouldn't assume that gep(A,0,1) != gep(A,1,0).
diff --git a/test/Transforms/InstCombine/icmp-vec.ll b/test/Transforms/InstCombine/icmp-vec.ll
new file mode 100644
index 000000000000..6fe3f631701e
--- /dev/null
+++ b/test/Transforms/InstCombine/icmp-vec.ll
@@ -0,0 +1,193 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; Canonicalize vector ge/le comparisons with constants to gt/lt.
+
+; Normal types are ConstantDataVectors. Test the constant values adjacent to the
+; min/max values that we're not allowed to transform.
+
+define <2 x i1> @sge(<2 x i8> %x) {
+; CHECK-LABEL: @sge(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -128, i8 126>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sge <2 x i8> %x, <i8 -127, i8 -129>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @uge(<2 x i8> %x) {
+; CHECK-LABEL: @uge(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <2 x i8> %x, <i8 -2, i8 0>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp uge <2 x i8> %x, <i8 -1, i8 1>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sle(<2 x i8> %x) {
+; CHECK-LABEL: @sle(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, <i8 127, i8 -127>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sle <2 x i8> %x, <i8 126, i8 128>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ule(<2 x i8> %x) {
+; CHECK-LABEL: @ule(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> %x, <i8 -1, i8 1>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp ule <2 x i8> %x, <i8 254, i8 0>
+ ret <2 x i1> %cmp
+}
+
+; Zeros are special: they're ConstantAggregateZero.
+
+define <2 x i1> @sge_zero(<2 x i8> %x) {
+; CHECK-LABEL: @sge_zero(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <2 x i8> %x, <i8 -1, i8 -1>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sge <2 x i8> %x, <i8 0, i8 0>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @uge_zero(<2 x i8> %x) {
+; CHECK-LABEL: @uge_zero(
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
+ %cmp = icmp uge <2 x i8> %x, <i8 0, i8 0>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sle_zero(<2 x i8> %x) {
+; CHECK-LABEL: @sle_zero(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %x, <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sle <2 x i8> %x, <i8 0, i8 0>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ule_zero(<2 x i8> %x) {
+; CHECK-LABEL: @ule_zero(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <2 x i8> %x, <i8 1, i8 1>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp ule <2 x i8> %x, <i8 0, i8 0>
+ ret <2 x i1> %cmp
+}
+
+; Weird types are ConstantVectors, not ConstantDataVectors. For an i3 type:
+; Signed min = -4
+; Unsigned min = 0
+; Signed max = 3
+; Unsigned max = 7
+
+define <3 x i1> @sge_weird(<3 x i3> %x) {
+; CHECK-LABEL: @sge_weird(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt <3 x i3> %x, <i3 -4, i3 2, i3 -1>
+; CHECK-NEXT: ret <3 x i1> [[CMP]]
+;
+ %cmp = icmp sge <3 x i3> %x, <i3 -3, i3 -5, i3 0>
+ ret <3 x i1> %cmp
+}
+
+define <3 x i1> @uge_weird(<3 x i3> %x) {
+; CHECK-LABEL: @uge_weird(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt <3 x i3> %x, <i3 -2, i3 0, i3 1>
+; CHECK-NEXT: ret <3 x i1> [[CMP]]
+;
+ %cmp = icmp uge <3 x i3> %x, <i3 -1, i3 1, i3 2>
+ ret <3 x i1> %cmp
+}
+
+define <3 x i1> @sle_weird(<3 x i3> %x) {
+; CHECK-LABEL: @sle_weird(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <3 x i3> %x, <i3 3, i3 -3, i3 1>
+; CHECK-NEXT: ret <3 x i1> [[CMP]]
+;
+ %cmp = icmp sle <3 x i3> %x, <i3 2, i3 4, i3 0>
+ ret <3 x i1> %cmp
+}
+
+define <3 x i1> @ule_weird(<3 x i3> %x) {
+; CHECK-LABEL: @ule_weird(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult <3 x i3> %x, <i3 -1, i3 1, i3 2>
+; CHECK-NEXT: ret <3 x i1> [[CMP]]
+;
+ %cmp = icmp ule <3 x i3> %x, <i3 6, i3 0, i3 1>
+ ret <3 x i1> %cmp
+}
+
+; We can't do the transform if any constants are already at the limits.
+
+define <2 x i1> @sge_min(<2 x i3> %x) {
+; CHECK-LABEL: @sge_min(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge <2 x i3> %x, <i3 -4, i3 1>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sge <2 x i3> %x, <i3 -4, i3 1>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @uge_min(<2 x i3> %x) {
+; CHECK-LABEL: @uge_min(
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge <2 x i3> %x, <i3 1, i3 0>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp uge <2 x i3> %x, <i3 1, i3 0>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @sle_max(<2 x i3> %x) {
+; CHECK-LABEL: @sle_max(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i3> %x, <i3 1, i3 3>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sle <2 x i3> %x, <i3 1, i3 3>
+ ret <2 x i1> %cmp
+}
+
+define <2 x i1> @ule_max(<2 x i3> %x) {
+; CHECK-LABEL: @ule_max(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ule <2 x i3> %x, <i3 -1, i3 1>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp ule <2 x i3> %x, <i3 7, i3 1>
+ ret <2 x i1> %cmp
+}
+
+; If we can't determine if a constant element is min/max (eg, it's a ConstantExpr), do nothing.
+
+define <2 x i1> @PR27756_1(<2 x i8> %a) {
+; CHECK-LABEL: @PR27756_1(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i8> %a, <i8 bitcast (<2 x i4> <i4 1, i4 2> to i8), i8 0>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sle <2 x i8> %a, <i8 bitcast (<2 x i4> <i4 1, i4 2> to i8), i8 0>
+ ret <2 x i1> %cmp
+}
+
+; Undef elements don't prevent the transform of the comparison.
+
+define <2 x i1> @PR27756_2(<2 x i8> %a) {
+; CHECK-LABEL: @PR27756_2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt <2 x i8> %a, <i8 undef, i8 1>
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sle <2 x i8> %a, <i8 undef, i8 0>
+ ret <2 x i1> %cmp
+}
+
+@someglobal = global i32 0
+
+define <2 x i1> @PR27786(<2 x i8> %a) {
+; CHECK-LABEL: @PR27786(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle <2 x i8> %a, bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>)
+; CHECK-NEXT: ret <2 x i1> [[CMP]]
+;
+ %cmp = icmp sle <2 x i8> %a, bitcast (i16 ptrtoint (i32* @someglobal to i16) to <2 x i8>)
+ ret <2 x i1> %cmp
+}
diff --git a/test/Transforms/InstCombine/icmp.ll b/test/Transforms/InstCombine/icmp.ll
index 1e64cd7f5820..4575e1017ffe 100644
--- a/test/Transforms/InstCombine/icmp.ll
+++ b/test/Transforms/InstCombine/icmp.ll
@@ -1,219 +1,236 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-p1:16:16:16-p2:32:32:32-p3:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
define i32 @test1(i32 %X) {
-entry:
- icmp slt i32 %X, 0 ; <i1>:0 [#uses=1]
- zext i1 %0 to i32 ; <i32>:1 [#uses=1]
- ret i32 %1
; CHECK-LABEL: @test1(
-; CHECK: lshr i32 %X, 31
-; CHECK-NEXT: ret i32
+; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 %X, 31
+; CHECK-NEXT: ret i32 [[X_LOBIT]]
+;
+ %a = icmp slt i32 %X, 0
+ %b = zext i1 %a to i32
+ ret i32 %b
}
define i32 @test2(i32 %X) {
-entry:
- icmp ult i32 %X, -2147483648 ; <i1>:0 [#uses=1]
- zext i1 %0 to i32 ; <i32>:1 [#uses=1]
- ret i32 %1
; CHECK-LABEL: @test2(
-; CHECK: lshr i32 %X, 31
-; CHECK-NEXT: xor i32
-; CHECK-NEXT: ret i32
+; CHECK-NEXT: [[X_LOBIT:%.*]] = lshr i32 %X, 31
+; CHECK-NEXT: [[X_LOBIT_NOT:%.*]] = xor i32 [[X_LOBIT]], 1
+; CHECK-NEXT: ret i32 [[X_LOBIT_NOT]]
+;
+ %a = icmp ult i32 %X, -2147483648
+ %b = zext i1 %a to i32
+ ret i32 %b
}
define i32 @test3(i32 %X) {
-entry:
- icmp slt i32 %X, 0 ; <i1>:0 [#uses=1]
- sext i1 %0 to i32 ; <i32>:1 [#uses=1]
- ret i32 %1
; CHECK-LABEL: @test3(
-; CHECK: ashr i32 %X, 31
-; CHECK-NEXT: ret i32
+; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 %X, 31
+; CHECK-NEXT: ret i32 [[X_LOBIT]]
+;
+ %a = icmp slt i32 %X, 0
+ %b = sext i1 %a to i32
+ ret i32 %b
}
define i32 @test4(i32 %X) {
-entry:
- icmp ult i32 %X, -2147483648 ; <i1>:0 [#uses=1]
- sext i1 %0 to i32 ; <i32>:1 [#uses=1]
- ret i32 %1
; CHECK-LABEL: @test4(
-; CHECK: ashr i32 %X, 31
-; CHECK-NEXT: xor i32
-; CHECK-NEXT: ret i32
+; CHECK-NEXT: [[X_LOBIT:%.*]] = ashr i32 %X, 31
+; CHECK-NEXT: [[X_LOBIT_NOT:%.*]] = xor i32 [[X_LOBIT]], -1
+; CHECK-NEXT: ret i32 [[X_LOBIT_NOT]]
+;
+ %a = icmp ult i32 %X, -2147483648
+ %b = sext i1 %a to i32
+ ret i32 %b
}
; PR4837
define <2 x i1> @test5(<2 x i64> %x) {
-entry:
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
%V = icmp eq <2 x i64> %x, undef
ret <2 x i1> %V
-; CHECK-LABEL: @test5(
-; CHECK: ret <2 x i1> <i1 true, i1 true>
}
define i32 @test6(i32 %a, i32 %b) {
- %c = icmp sle i32 %a, -1
- %d = zext i1 %c to i32
- %e = sub i32 0, %d
- %f = and i32 %e, %b
- ret i32 %f
; CHECK-LABEL: @test6(
-; CHECK-NEXT: ashr i32 %a, 31
-; CHECK-NEXT: %f = and i32 %e, %b
-; CHECK-NEXT: ret i32 %f
+; CHECK-NEXT: [[E:%.*]] = ashr i32 %a, 31
+; CHECK-NEXT: [[F:%.*]] = and i32 [[E]], %b
+; CHECK-NEXT: ret i32 [[F]]
+;
+ %c = icmp sle i32 %a, -1
+ %d = zext i1 %c to i32
+ %e = sub i32 0, %d
+ %f = and i32 %e, %b
+ ret i32 %f
}
define i1 @test7(i32 %x) {
-entry:
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 %x, 0
+; CHECK-NEXT: ret i1 [[B]]
+;
%a = add i32 %x, -1
%b = icmp ult i32 %a, %x
ret i1 %b
-; CHECK-LABEL: @test7(
-; CHECK: %b = icmp ne i32 %x, 0
-; CHECK: ret i1 %b
}
define i1 @test8(i32 %x){
-entry:
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: ret i1 false
+;
%a = add i32 %x, -1
%b = icmp eq i32 %a, %x
ret i1 %b
-; CHECK-LABEL: @test8(
-; CHECK: ret i1 false
}
define i1 @test9(i32 %x) {
-entry:
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i32 %x, 1
+; CHECK-NEXT: ret i1 [[B]]
+;
%a = add i32 %x, -2
%b = icmp ugt i32 %x, %a
ret i1 %b
-; CHECK-LABEL: @test9(
-; CHECK: icmp ugt i32 %x, 1
-; CHECK: ret i1 %b
}
define i1 @test10(i32 %x){
-entry:
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 %x, -2147483648
+; CHECK-NEXT: ret i1 [[B]]
+;
%a = add i32 %x, -1
%b = icmp slt i32 %a, %x
ret i1 %b
-; CHECK-LABEL: @test10(
-; CHECK: %b = icmp ne i32 %x, -2147483648
-; CHECK: ret i1 %b
}
define i1 @test11(i32 %x) {
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i1 true
+;
%a = add nsw i32 %x, 8
%b = icmp slt i32 %x, %a
ret i1 %b
-; CHECK-LABEL: @test11(
-; CHECK: ret i1 true
}
; PR6195
define i1 @test12(i1 %A) {
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[NOT_A:%.*]] = xor i1 %A, true
+; CHECK-NEXT: ret i1 [[NOT_A]]
+;
%S = select i1 %A, i64 -4294967295, i64 8589934591
%B = icmp ne i64 bitcast (<2 x i32> <i32 1, i32 -1> to i64), %S
ret i1 %B
-; CHECK-LABEL: @test12(
-; CHECK-NEXT: = xor i1 %A, true
-; CHECK-NEXT: ret i1
}
; PR6481
define i1 @test13(i8 %X) nounwind readnone {
-entry:
- %cmp = icmp slt i8 undef, %X
- ret i1 %cmp
; CHECK-LABEL: @test13(
-; CHECK: ret i1 false
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = icmp slt i8 undef, %X
+ ret i1 %cmp
}
define i1 @test14(i8 %X) nounwind readnone {
-entry:
- %cmp = icmp slt i8 undef, -128
- ret i1 %cmp
; CHECK-LABEL: @test14(
-; CHECK: ret i1 false
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = icmp slt i8 undef, -128
+ ret i1 %cmp
}
define i1 @test15() nounwind readnone {
-entry:
- %cmp = icmp eq i8 undef, -128
- ret i1 %cmp
; CHECK-LABEL: @test15(
-; CHECK: ret i1 undef
+; CHECK-NEXT: ret i1 undef
+;
+ %cmp = icmp eq i8 undef, -128
+ ret i1 %cmp
}
define i1 @test16() nounwind readnone {
-entry:
- %cmp = icmp ne i8 undef, -128
- ret i1 %cmp
; CHECK-LABEL: @test16(
-; CHECK: ret i1 undef
+; CHECK-NEXT: ret i1 undef
+;
+ %cmp = icmp ne i8 undef, -128
+ ret i1 %cmp
}
define i1 @test17(i32 %x) nounwind {
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %x, 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %x
%and = and i32 %shl, 8
%cmp = icmp eq i32 %and, 0
ret i1 %cmp
-; CHECK-LABEL: @test17(
-; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
}
define i1 @test17a(i32 %x) nounwind {
+; CHECK-LABEL: @test17a(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %x, 2
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %x
%and = and i32 %shl, 7
%cmp = icmp eq i32 %and, 0
ret i1 %cmp
-; CHECK-LABEL: @test17a(
-; CHECK-NEXT: %cmp = icmp ugt i32 %x, 2
}
define i1 @test18(i32 %x) nounwind {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %x, 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%sh = lshr i32 8, %x
%and = and i32 %sh, 1
%cmp = icmp eq i32 %and, 0
ret i1 %cmp
-; CHECK-LABEL: @test18(
-; CHECK-NEXT: %cmp = icmp ne i32 %x, 3
}
define i1 @test19(i32 %x) nounwind {
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %x
%and = and i32 %shl, 8
%cmp = icmp eq i32 %and, 8
ret i1 %cmp
-; CHECK-LABEL: @test19(
-; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
}
define i1 @test20(i32 %x) nounwind {
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %x
%and = and i32 %shl, 8
%cmp = icmp ne i32 %and, 0
ret i1 %cmp
-; CHECK-LABEL: @test20(
-; CHECK-NEXT: %cmp = icmp eq i32 %x, 3
}
define i1 @test20a(i32 %x) nounwind {
+; CHECK-LABEL: @test20a(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %x, 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %x
%and = and i32 %shl, 7
%cmp = icmp ne i32 %and, 0
ret i1 %cmp
-; CHECK-LABEL: @test20a(
-; CHECK-NEXT: %cmp = icmp ult i32 %x, 3
}
define i1 @test21(i8 %x, i8 %y) {
; CHECK-LABEL: @test21(
-; CHECK-NOT: or i8
-; CHECK: icmp ugt
+; CHECK-NEXT: [[B:%.*]] = icmp ugt i8 %x, 3
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = or i8 %x, 1
%B = icmp ugt i8 %A, 3
ret i1 %B
@@ -221,29 +238,33 @@ define i1 @test21(i8 %x, i8 %y) {
define i1 @test22(i8 %x, i8 %y) {
; CHECK-LABEL: @test22(
-; CHECK-NOT: or i8
-; CHECK: icmp ult
+; CHECK-NEXT: [[B:%.*]] = icmp ult i8 %x, 4
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = or i8 %x, 1
%B = icmp ult i8 %A, 4
ret i1 %B
}
; PR2740
+define i1 @test23(i32 %x) {
; CHECK-LABEL: @test23(
-; CHECK: icmp sgt i32 %x, 1328634634
-define i1 @test23(i32 %x) nounwind {
- %i3 = sdiv i32 %x, -1328634635
- %i4 = icmp eq i32 %i3, -1
- ret i1 %i4
+; CHECK-NEXT: [[I4:%.*]] = icmp sgt i32 %x, 1328634634
+; CHECK-NEXT: ret i1 [[I4]]
+;
+ %i3 = sdiv i32 %x, -1328634635
+ %i4 = icmp eq i32 %i3, -1
+ ret i1 %i4
}
@X = global [1000 x i32] zeroinitializer
; PR8882
-; CHECK-LABEL: @test24(
-; CHECK: %cmp = icmp eq i64 %i, 1000
-; CHECK: ret i1 %cmp
define i1 @test24(i64 %i) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i64 %i, 1000
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%p1 = getelementptr inbounds i32, i32* getelementptr inbounds ([1000 x i32], [1000 x i32]* @X, i64 0, i64 0), i64 %i
%cmp = icmp eq i32* %p1, getelementptr inbounds ([1000 x i32], [1000 x i32]* @X, i64 1, i64 0)
ret i1 %cmp
@@ -251,154 +272,168 @@ define i1 @test24(i64 %i) {
@X_as1 = addrspace(1) global [1000 x i32] zeroinitializer
-; CHECK: @test24_as1
-; CHECK: trunc i64 %i to i16
-; CHECK: %cmp = icmp eq i16 %1, 1000
-; CHECK: ret i1 %cmp
define i1 @test24_as1(i64 %i) {
+; CHECK-LABEL: @test24_as1(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %i to i16
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i16 [[TMP1]], 1000
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%p1 = getelementptr inbounds i32, i32 addrspace(1)* getelementptr inbounds ([1000 x i32], [1000 x i32] addrspace(1)* @X_as1, i64 0, i64 0), i64 %i
%cmp = icmp eq i32 addrspace(1)* %p1, getelementptr inbounds ([1000 x i32], [1000 x i32] addrspace(1)* @X_as1, i64 1, i64 0)
ret i1 %cmp
}
-; CHECK-LABEL: @test25(
-; X + Z > Y + Z -> X > Y if there is no overflow.
-; CHECK: %c = icmp sgt i32 %x, %y
-; CHECK: ret i1 %c
define i1 @test25(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = add nsw i32 %x, %z
%rhs = add nsw i32 %y, %z
%c = icmp sgt i32 %lhs, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test26(
; X + Z > Y + Z -> X > Y if there is no overflow.
-; CHECK: %c = icmp ugt i32 %x, %y
-; CHECK: ret i1 %c
define i1 @test26(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test26(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = add nuw i32 %x, %z
%rhs = add nuw i32 %y, %z
%c = icmp ugt i32 %lhs, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test27(
; X - Z > Y - Z -> X > Y if there is no overflow.
-; CHECK: %c = icmp sgt i32 %x, %y
-; CHECK: ret i1 %c
define i1 @test27(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = sub nsw i32 %x, %z
%rhs = sub nsw i32 %y, %z
%c = icmp sgt i32 %lhs, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test28(
; X - Z > Y - Z -> X > Y if there is no overflow.
-; CHECK: %c = icmp ugt i32 %x, %y
-; CHECK: ret i1 %c
define i1 @test28(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test28(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = sub nuw i32 %x, %z
%rhs = sub nuw i32 %y, %z
%c = icmp ugt i32 %lhs, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test29(
; X + Y > X -> Y > 0 if there is no overflow.
-; CHECK: %c = icmp sgt i32 %y, 0
-; CHECK: ret i1 %c
define i1 @test29(i32 %x, i32 %y) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 %y, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = add nsw i32 %x, %y
%c = icmp sgt i32 %lhs, %x
ret i1 %c
}
-; CHECK-LABEL: @test30(
; X + Y > X -> Y > 0 if there is no overflow.
-; CHECK: %c = icmp ne i32 %y, 0
-; CHECK: ret i1 %c
define i1 @test30(i32 %x, i32 %y) {
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: [[C:%.*]] = icmp ne i32 %y, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = add nuw i32 %x, %y
%c = icmp ugt i32 %lhs, %x
ret i1 %c
}
-; CHECK-LABEL: @test31(
; X > X + Y -> 0 > Y if there is no overflow.
-; CHECK: %c = icmp slt i32 %y, 0
-; CHECK: ret i1 %c
define i1 @test31(i32 %x, i32 %y) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 %y, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
%rhs = add nsw i32 %x, %y
%c = icmp sgt i32 %x, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test32(
; X > X + Y -> 0 > Y if there is no overflow.
-; CHECK: ret i1 false
define i1 @test32(i32 %x, i32 %y) {
+; CHECK-LABEL: @test32(
+; CHECK-NEXT: ret i1 false
+;
%rhs = add nuw i32 %x, %y
%c = icmp ugt i32 %x, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test33(
; X - Y > X -> 0 > Y if there is no overflow.
-; CHECK: %c = icmp slt i32 %y, 0
-; CHECK: ret i1 %c
define i1 @test33(i32 %x, i32 %y) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: [[C:%.*]] = icmp slt i32 %y, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = sub nsw i32 %x, %y
%c = icmp sgt i32 %lhs, %x
ret i1 %c
}
-; CHECK-LABEL: @test34(
; X - Y > X -> 0 > Y if there is no overflow.
-; CHECK: ret i1 false
define i1 @test34(i32 %x, i32 %y) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: ret i1 false
+;
%lhs = sub nuw i32 %x, %y
%c = icmp ugt i32 %lhs, %x
ret i1 %c
}
-; CHECK-LABEL: @test35(
; X > X - Y -> Y > 0 if there is no overflow.
-; CHECK: %c = icmp sgt i32 %y, 0
-; CHECK: ret i1 %c
define i1 @test35(i32 %x, i32 %y) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 %y, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
%rhs = sub nsw i32 %x, %y
%c = icmp sgt i32 %x, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test36(
; X > X - Y -> Y > 0 if there is no overflow.
-; CHECK: %c = icmp ne i32 %y, 0
-; CHECK: ret i1 %c
define i1 @test36(i32 %x, i32 %y) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: [[C:%.*]] = icmp ne i32 %y, 0
+; CHECK-NEXT: ret i1 [[C]]
+;
%rhs = sub nuw i32 %x, %y
%c = icmp ugt i32 %x, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test37(
; X - Y > X - Z -> Z > Y if there is no overflow.
-; CHECK: %c = icmp sgt i32 %z, %y
-; CHECK: ret i1 %c
define i1 @test37(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 %z, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = sub nsw i32 %x, %y
%rhs = sub nsw i32 %x, %z
%c = icmp sgt i32 %lhs, %rhs
ret i1 %c
}
-; CHECK-LABEL: @test38(
; X - Y > X - Z -> Z > Y if there is no overflow.
-; CHECK: %c = icmp ugt i32 %z, %y
-; CHECK: ret i1 %c
define i1 @test38(i32 %x, i32 %y, i32 %z) {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 %z, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%lhs = sub nuw i32 %x, %y
%rhs = sub nuw i32 %x, %z
%c = icmp ugt i32 %lhs, %rhs
@@ -406,67 +441,82 @@ define i1 @test38(i32 %x, i32 %y, i32 %z) {
}
; PR9343 #1
-; CHECK-LABEL: @test39(
-; CHECK: %B = icmp eq i32 %X, 0
define i1 @test39(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT: [[B:%.*]] = icmp eq i32 %X, 0
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = ashr exact i32 %X, %Y
%B = icmp eq i32 %A, 0
ret i1 %B
}
-; CHECK-LABEL: @test40(
-; CHECK: %B = icmp ne i32 %X, 0
define i1 @test40(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT: [[B:%.*]] = icmp ne i32 %X, 0
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = lshr exact i32 %X, %Y
%B = icmp ne i32 %A, 0
ret i1 %B
}
; PR9343 #3
-; CHECK-LABEL: @test41(
-; CHECK: ret i1 true
define i1 @test41(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: ret i1 true
+;
%A = urem i32 %X, %Y
%B = icmp ugt i32 %Y, %A
ret i1 %B
}
-; CHECK-LABEL: @test42(
-; CHECK: %B = icmp sgt i32 %Y, -1
define i1 @test42(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 %Y, -1
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = srem i32 %X, %Y
%B = icmp slt i32 %A, %Y
ret i1 %B
}
-; CHECK-LABEL: @test43(
-; CHECK: %B = icmp slt i32 %Y, 0
define i1 @test43(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: [[B:%.*]] = icmp slt i32 %Y, 0
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = srem i32 %X, %Y
%B = icmp slt i32 %Y, %A
ret i1 %B
}
-; CHECK-LABEL: @test44(
-; CHECK: %B = icmp sgt i32 %Y, -1
define i1 @test44(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: [[B:%.*]] = icmp sgt i32 %Y, -1
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = srem i32 %X, %Y
%B = icmp slt i32 %A, %Y
ret i1 %B
}
-; CHECK-LABEL: @test45(
-; CHECK: %B = icmp slt i32 %Y, 0
define i1 @test45(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: [[B:%.*]] = icmp slt i32 %Y, 0
+; CHECK-NEXT: ret i1 [[B]]
+;
%A = srem i32 %X, %Y
%B = icmp slt i32 %Y, %A
ret i1 %B
}
; PR9343 #4
-; CHECK-LABEL: @test46(
-; CHECK: %C = icmp ult i32 %X, %Y
define i1 @test46(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: ret i1 [[C]]
+;
%A = ashr exact i32 %X, %Z
%B = ashr exact i32 %Y, %Z
%C = icmp ult i32 %A, %B
@@ -474,9 +524,11 @@ define i1 @test46(i32 %X, i32 %Y, i32 %Z) {
}
; PR9343 #5
-; CHECK-LABEL: @test47(
-; CHECK: %C = icmp ugt i32 %X, %Y
define i1 @test47(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 %X, %Y
+; CHECK-NEXT: ret i1 [[C]]
+;
%A = ashr exact i32 %X, %Z
%B = ashr exact i32 %Y, %Z
%C = icmp ugt i32 %A, %B
@@ -484,9 +536,11 @@ define i1 @test47(i32 %X, i32 %Y, i32 %Z) {
}
; PR9343 #8
-; CHECK-LABEL: @test48(
-; CHECK: %C = icmp eq i32 %X, %Y
define i1 @test48(i32 %X, i32 %Y, i32 %Z) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 %X, %Y
+; CHECK-NEXT: ret i1 [[C]]
+;
%A = sdiv exact i32 %X, %Z
%B = sdiv exact i32 %Y, %Z
%C = icmp eq i32 %A, %B
@@ -494,9 +548,11 @@ define i1 @test48(i32 %X, i32 %Y, i32 %Z) {
}
; PR8469
-; CHECK-LABEL: @test49(
-; CHECK: ret <2 x i1> <i1 true, i1 true>
define <2 x i1> @test49(<2 x i32> %tmp3) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
entry:
%tmp11 = and <2 x i32> %tmp3, <i32 3, i32 3>
%cmp = icmp ult <2 x i32> %tmp11, <i32 4, i32 4>
@@ -504,29 +560,35 @@ entry:
}
; PR9343 #7
-; CHECK-LABEL: @test50(
-; CHECK: ret i1 true
define i1 @test50(i16 %X, i32 %Y) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT: ret i1 true
+;
%A = zext i16 %X to i32
%B = srem i32 %A, %Y
%C = icmp sgt i32 %B, -1
ret i1 %C
}
-; CHECK-LABEL: @test51(
-; CHECK: ret i1 %C
define i1 @test51(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT: [[A:%.*]] = and i32 %X, -2147483648
+; CHECK-NEXT: [[B:%.*]] = srem i32 [[A]], %Y
+; CHECK-NEXT: [[C:%.*]] = icmp sgt i32 [[B]], -1
+; CHECK-NEXT: ret i1 [[C]]
+;
%A = and i32 %X, 2147483648
%B = srem i32 %A, %Y
%C = icmp sgt i32 %B, -1
ret i1 %C
}
-; CHECK-LABEL: @test52(
-; CHECK-NEXT: and i32 %x1, 16711935
-; CHECK-NEXT: icmp eq i32 {{.*}}, 4980863
-; CHECK-NEXT: ret i1
define i1 @test52(i32 %x1) nounwind {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x1, 16711935
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 4980863
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
%conv = and i32 %x1, 255
%cmp = icmp eq i32 %conv, 127
%tmp2 = lshr i32 %x1, 16
@@ -538,38 +600,46 @@ define i1 @test52(i32 %x1) nounwind {
}
; PR9838
-; CHECK-LABEL: @test53(
-; CHECK-NEXT: sdiv exact
-; CHECK-NEXT: sdiv
-; CHECK-NEXT: icmp
define i1 @test53(i32 %a, i32 %b) nounwind {
- %x = sdiv exact i32 %a, 30
- %y = sdiv i32 %b, 30
- %z = icmp eq i32 %x, %y
- ret i1 %z
+; CHECK-LABEL: @test53(
+; CHECK-NEXT: [[X:%.*]] = sdiv exact i32 %a, 30
+; CHECK-NEXT: [[Y:%.*]] = sdiv i32 %b, 30
+; CHECK-NEXT: [[Z:%.*]] = icmp eq i32 [[X]], [[Y]]
+; CHECK-NEXT: ret i1 [[Z]]
+;
+ %x = sdiv exact i32 %a, 30
+ %y = sdiv i32 %b, 30
+ %z = icmp eq i32 %x, %y
+ ret i1 %z
}
-; CHECK-LABEL: @test54(
-; CHECK-NEXT: %and = and i8 %a, -64
-; CHECK-NEXT: icmp eq i8 %and, -128
define i1 @test54(i8 %a) nounwind {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT: [[AND:%.*]] = and i8 %a, -64
+; CHECK-NEXT: [[RET:%.*]] = icmp eq i8 [[AND]], -128
+; CHECK-NEXT: ret i1 [[RET]]
+;
%ext = zext i8 %a to i32
%and = and i32 %ext, 192
%ret = icmp eq i32 %and, 128
ret i1 %ret
}
-; CHECK-LABEL: @test55(
-; CHECK-NEXT: icmp eq i32 %a, -123
define i1 @test55(i32 %a) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %a, -123
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%sub = sub i32 0, %a
%cmp = icmp eq i32 %sub, 123
ret i1 %cmp
}
-; CHECK-LABEL: @test56(
-; CHECK-NEXT: icmp eq i32 %a, -113
define i1 @test56(i32 %a) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %a, -113
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%sub = sub i32 10, %a
%cmp = icmp eq i32 %sub, 123
ret i1 %cmp
@@ -577,10 +647,13 @@ define i1 @test56(i32 %a) {
; PR10267 Don't make icmps more expensive when no other inst is subsumed.
declare void @foo(i32)
-; CHECK-LABEL: @test57(
-; CHECK: %and = and i32 %a, -2
-; CHECK: %cmp = icmp ne i32 %and, 0
define i1 @test57(i32 %a) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %a, -2
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: call void @foo(i32 [[AND]])
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%and = and i32 %a, -2
%cmp = icmp ne i32 %and, 0
call void @foo(i32 %and)
@@ -588,9 +661,11 @@ define i1 @test57(i32 %a) {
}
; rdar://problem/10482509
-; CHECK-LABEL: @cmpabs1(
-; CHECK-NEXT: icmp ne
define zeroext i1 @cmpabs1(i64 %val) {
+; CHECK-LABEL: @cmpabs1(
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i64 %val, 0
+; CHECK-NEXT: ret i1 [[TOBOOL]]
+;
%sub = sub nsw i64 0, %val
%cmp = icmp slt i64 %val, 0
%sub.val = select i1 %cmp, i64 %sub, i64 %val
@@ -598,9 +673,11 @@ define zeroext i1 @cmpabs1(i64 %val) {
ret i1 %tobool
}
-; CHECK-LABEL: @cmpabs2(
-; CHECK-NEXT: icmp ne
define zeroext i1 @cmpabs2(i64 %val) {
+; CHECK-LABEL: @cmpabs2(
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i64 %val, 0
+; CHECK-NEXT: ret i1 [[TOBOOL]]
+;
%sub = sub nsw i64 0, %val
%cmp = icmp slt i64 %val, 0
%sub.val = select i1 %cmp, i64 %val, i64 %sub
@@ -608,9 +685,11 @@ define zeroext i1 @cmpabs2(i64 %val) {
ret i1 %tobool
}
-; CHECK-LABEL: @test58(
-; CHECK-NEXT: call i32 @test58_d(i64 36029346783166592)
define void @test58() nounwind {
+; CHECK-LABEL: @test58(
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @test58_d(i64 36029346783166592) #1
+; CHECK-NEXT: ret void
+;
%cast = bitcast <1 x i64> <i64 36029346783166592> to i64
%call = call i32 @test58_d( i64 %cast) nounwind
ret void
@@ -618,6 +697,12 @@ define void @test58() nounwind {
declare i32 @test58_d(i64)
define i1 @test59(i8* %foo) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, i8* %foo, i64 8
+; CHECK-NEXT: [[USE:%.*]] = ptrtoint i8* [[GEP1]] to i64
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]]) #1
+; CHECK-NEXT: ret i1 true
+;
%bit = bitcast i8* %foo to i32*
%gep1 = getelementptr inbounds i32, i32* %bit, i64 2
%gep2 = getelementptr inbounds i8, i8* %foo, i64 10
@@ -626,11 +711,16 @@ define i1 @test59(i8* %foo) {
%use = ptrtoint i8* %cast1 to i64
%call = call i32 @test58_d(i64 %use) nounwind
ret i1 %cmp
-; CHECK-LABEL: @test59(
-; CHECK: ret i1 true
}
define i1 @test59_as1(i8 addrspace(1)* %foo) {
+; CHECK-LABEL: @test59_as1(
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 8
+; CHECK-NEXT: [[TMP1:%.*]] = ptrtoint i8 addrspace(1)* [[GEP1]] to i16
+; CHECK-NEXT: [[USE:%.*]] = zext i16 [[TMP1]] to i64
+; CHECK-NEXT: [[CALL:%.*]] = call i32 @test58_d(i64 [[USE]]) #1
+; CHECK-NEXT: ret i1 true
+;
%bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
%gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 2
%gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 10
@@ -639,84 +729,93 @@ define i1 @test59_as1(i8 addrspace(1)* %foo) {
%use = ptrtoint i8 addrspace(1)* %cast1 to i64
%call = call i32 @test58_d(i64 %use) nounwind
ret i1 %cmp
-; CHECK: @test59_as1
-; CHECK: %[[GEP:.+]] = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 8
-; CHECK: ptrtoint i8 addrspace(1)* %[[GEP]] to i16
-; CHECK: ret i1 true
}
define i1 @test60(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i64 %i, 2
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[GEP1_IDX]], %j
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%bit = bitcast i8* %foo to i32*
%gep1 = getelementptr inbounds i32, i32* %bit, i64 %i
%gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
%cast1 = bitcast i32* %gep1 to i8*
%cmp = icmp ult i8* %cast1, %gep2
ret i1 %cmp
-; CHECK-LABEL: @test60(
-; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
-; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
-; CHECK-NEXT: ret i1
}
define i1 @test60_as1(i8 addrspace(1)* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_as1(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %i to i16
+; CHECK-NEXT: [[TMP2:%.*]] = trunc i64 %j to i16
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 [[TMP1]], 2
+; CHECK-NEXT: [[TMP3:%.*]] = icmp sgt i16 [[TMP2]], [[GEP1_IDX]]
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
%bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
%gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i64 %i
%gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i64 %j
%cast1 = bitcast i32 addrspace(1)* %gep1 to i8 addrspace(1)*
%cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
ret i1 %cmp
-; CHECK: @test60_as1
-; CHECK: trunc i64 %i to i16
-; CHECK: trunc i64 %j to i16
-; CHECK: %gep1.idx = shl nuw i16 %{{.+}}, 2
-; CHECK-NEXT: icmp sgt i16 %{{.+}}, %gep1.idx
-; CHECK-NEXT: ret i1
}
; Same as test60, but look through an addrspacecast instead of a
; bitcast. This uses the same sized addrspace.
define i1 @test60_addrspacecast(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i64 %i, 2
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt i64 [[GEP1_IDX]], %j
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%bit = addrspacecast i8* %foo to i32 addrspace(3)*
%gep1 = getelementptr inbounds i32, i32 addrspace(3)* %bit, i64 %i
%gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
%cast1 = addrspacecast i32 addrspace(3)* %gep1 to i8*
%cmp = icmp ult i8* %cast1, %gep2
ret i1 %cmp
-; CHECK-LABEL: @test60_addrspacecast(
-; CHECK-NEXT: %gep1.idx = shl nuw i64 %i, 2
-; CHECK-NEXT: icmp slt i64 %gep1.idx, %j
-; CHECK-NEXT: ret i1
}
define i1 @test60_addrspacecast_smaller(i8* %foo, i16 %i, i64 %j) {
+; CHECK-LABEL: @test60_addrspacecast_smaller(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i16 %i, 2
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %j to i16
+; CHECK-NEXT: [[TMP2:%.*]] = icmp sgt i16 [[TMP1]], [[GEP1_IDX]]
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
%bit = addrspacecast i8* %foo to i32 addrspace(1)*
%gep1 = getelementptr inbounds i32, i32 addrspace(1)* %bit, i16 %i
%gep2 = getelementptr inbounds i8, i8* %foo, i64 %j
%cast1 = addrspacecast i32 addrspace(1)* %gep1 to i8*
%cmp = icmp ult i8* %cast1, %gep2
ret i1 %cmp
-; CHECK-LABEL: @test60_addrspacecast_smaller(
-; CHECK-NEXT: %gep1.idx = shl nuw i16 %i, 2
-; CHECK-NEXT: trunc i64 %j to i16
-; CHECK-NEXT: icmp sgt i16 %1, %gep1.idx
-; CHECK-NEXT: ret i1
}
define i1 @test60_addrspacecast_larger(i8 addrspace(1)* %foo, i32 %i, i16 %j) {
+; CHECK-LABEL: @test60_addrspacecast_larger(
+; CHECK-NEXT: [[GEP1_IDX:%.*]] = shl nuw i32 %i, 2
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 [[GEP1_IDX]] to i16
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i16 [[TMP1]], %j
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
%bit = addrspacecast i8 addrspace(1)* %foo to i32 addrspace(2)*
%gep1 = getelementptr inbounds i32, i32 addrspace(2)* %bit, i32 %i
%gep2 = getelementptr inbounds i8, i8 addrspace(1)* %foo, i16 %j
%cast1 = addrspacecast i32 addrspace(2)* %gep1 to i8 addrspace(1)*
%cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
ret i1 %cmp
-; CHECK-LABEL: @test60_addrspacecast_larger(
-; CHECK-NEXT: %gep1.idx = shl nuw i32 %i, 2
-; CHECK-NEXT: trunc i32 %gep1.idx to i16
-; CHECK-NEXT: icmp slt i16 %1, %j
-; CHECK-NEXT: ret i1
}
define i1 @test61(i8* %foo, i64 %i, i64 %j) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT: [[BIT:%.*]] = bitcast i8* %foo to i32*
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32* [[BIT]], i64 %i
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8* %foo, i64 %j
+; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32* [[GEP1]] to i8*
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8* [[CAST1]], [[GEP2]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%bit = bitcast i8* %foo to i32*
%gep1 = getelementptr i32, i32* %bit, i64 %i
%gep2 = getelementptr i8, i8* %foo, i64 %j
@@ -724,12 +823,17 @@ define i1 @test61(i8* %foo, i64 %i, i64 %j) {
%cmp = icmp ult i8* %cast1, %gep2
ret i1 %cmp
; Don't transform non-inbounds GEPs.
-; CHECK-LABEL: @test61(
-; CHECK: icmp ult i8* %cast1, %gep2
-; CHECK-NEXT: ret i1
}
define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
+; CHECK-LABEL: @test61_as1(
+; CHECK-NEXT: [[BIT:%.*]] = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
+; CHECK-NEXT: [[GEP1:%.*]] = getelementptr i32, i32 addrspace(1)* [[BIT]], i16 %i
+; CHECK-NEXT: [[GEP2:%.*]] = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
+; CHECK-NEXT: [[CAST1:%.*]] = bitcast i32 addrspace(1)* [[GEP1]] to i8 addrspace(1)*
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i8 addrspace(1)* [[CAST1]], [[GEP2]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%bit = bitcast i8 addrspace(1)* %foo to i32 addrspace(1)*
%gep1 = getelementptr i32, i32 addrspace(1)* %bit, i16 %i
%gep2 = getelementptr i8, i8 addrspace(1)* %foo, i16 %j
@@ -737,23 +841,22 @@ define i1 @test61_as1(i8 addrspace(1)* %foo, i16 %i, i16 %j) {
%cmp = icmp ult i8 addrspace(1)* %cast1, %gep2
ret i1 %cmp
; Don't transform non-inbounds GEPs.
-; CHECK: @test61_as1
-; CHECK: icmp ult i8 addrspace(1)* %cast1, %gep2
-; CHECK-NEXT: ret i1
}
define i1 @test62(i8* %a) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT: ret i1 true
+;
%arrayidx1 = getelementptr inbounds i8, i8* %a, i64 1
%arrayidx2 = getelementptr inbounds i8, i8* %a, i64 10
%cmp = icmp slt i8* %arrayidx1, %arrayidx2
ret i1 %cmp
-; CHECK-LABEL: @test62(
-; CHECK-NEXT: ret i1 true
}
define i1 @test62_as1(i8 addrspace(1)* %a) {
; CHECK-LABEL: @test62_as1(
-; CHECK-NEXT: ret i1 true
+; CHECK-NEXT: ret i1 true
+;
%arrayidx1 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 1
%arrayidx2 = getelementptr inbounds i8, i8 addrspace(1)* %a, i64 10
%cmp = icmp slt i8 addrspace(1)* %arrayidx1, %arrayidx2
@@ -761,68 +864,78 @@ define i1 @test62_as1(i8 addrspace(1)* %a) {
}
define i1 @test63(i8 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test63(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %b to i8
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[TMP1]], %a
+; CHECK-NEXT: ret i1 [[C]]
+;
%z = zext i8 %a to i32
%t = and i32 %b, 255
%c = icmp eq i32 %z, %t
ret i1 %c
-; CHECK-LABEL: @test63(
-; CHECK-NEXT: %1 = trunc i32 %b to i8
-; CHECK-NEXT: %c = icmp eq i8 %1, %a
-; CHECK-NEXT: ret i1 %c
}
define i1 @test64(i8 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test64(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %b to i8
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[TMP1]], %a
+; CHECK-NEXT: ret i1 [[C]]
+;
%t = and i32 %b, 255
%z = zext i8 %a to i32
%c = icmp eq i32 %t, %z
ret i1 %c
-; CHECK-LABEL: @test64(
-; CHECK-NEXT: %1 = trunc i32 %b to i8
-; CHECK-NEXT: %c = icmp eq i8 %1, %a
-; CHECK-NEXT: ret i1 %c
}
define i1 @test65(i64 %A, i64 %B) {
+; CHECK-LABEL: @test65(
+; CHECK-NEXT: ret i1 true
+;
%s1 = add i64 %A, %B
%s2 = add i64 %A, %B
%cmp = icmp eq i64 %s1, %s2
-; CHECK-LABEL: @test65(
-; CHECK-NEXT: ret i1 true
ret i1 %cmp
}
define i1 @test66(i64 %A, i64 %B) {
+; CHECK-LABEL: @test66(
+; CHECK-NEXT: ret i1 true
+;
%s1 = add i64 %A, %B
%s2 = add i64 %B, %A
%cmp = icmp eq i64 %s1, %s2
-; CHECK-LABEL: @test66(
-; CHECK-NEXT: ret i1 true
ret i1 %cmp
}
-; CHECK-LABEL: @test67(
-; CHECK: %and = and i32 %x, 96
-; CHECK: %cmp = icmp ne i32 %and, 0
define i1 @test67(i32 %x) nounwind uwtable {
+; CHECK-LABEL: @test67(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 96
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[AND]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%and = and i32 %x, 127
%cmp = icmp sgt i32 %and, 31
ret i1 %cmp
}
-; CHECK-LABEL: @test68(
-; CHECK: %cmp = icmp ugt i32 %and, 30
define i1 @test68(i32 %x) nounwind uwtable {
+; CHECK-LABEL: @test68(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 127
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 [[AND]], 30
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%and = and i32 %x, 127
%cmp = icmp sgt i32 %and, 30
ret i1 %cmp
}
; PR14708
-; CHECK-LABEL: @test69(
-; CHECK: %1 = or i32 %c, 32
-; CHECK: %2 = icmp eq i32 %1, 97
-; CHECK: ret i1 %2
define i1 @test69(i32 %c) nounwind uwtable {
+; CHECK-LABEL: @test69(
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %c, 32
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 97
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
%1 = icmp eq i32 %c, 97
%2 = icmp eq i32 %c, 65
%3 = or i1 %1, %2
@@ -830,50 +943,59 @@ define i1 @test69(i32 %c) nounwind uwtable {
}
; PR15940
-; CHECK-LABEL: @test70(
-; CHECK-NEXT: %A = srem i32 5, %X
-; CHECK-NEXT: %C = icmp ne i32 %A, 2
-; CHECK-NEXT: ret i1 %C
define i1 @test70(i32 %X) {
+; CHECK-LABEL: @test70(
+; CHECK-NEXT: [[A:%.*]] = srem i32 5, %X
+; CHECK-NEXT: [[C:%.*]] = icmp ne i32 [[A]], 2
+; CHECK-NEXT: ret i1 [[C]]
+;
%A = srem i32 5, %X
%B = add i32 %A, 2
%C = icmp ne i32 %B, 4
ret i1 %C
}
-; CHECK-LABEL: @icmp_sext16trunc(
-; CHECK-NEXT: %1 = trunc i32 %x to i16
-; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
define i1 @icmp_sext16trunc(i32 %x) {
+; CHECK-LABEL: @icmp_sext16trunc(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %x to i16
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%trunc = trunc i32 %x to i16
%sext = sext i16 %trunc to i32
%cmp = icmp slt i32 %sext, 36
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sext8trunc(
-; CHECK-NEXT: %1 = trunc i32 %x to i8
-; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
define i1 @icmp_sext8trunc(i32 %x) {
+; CHECK-LABEL: @icmp_sext8trunc(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %x to i8
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%trunc = trunc i32 %x to i8
%sext = sext i8 %trunc to i32
%cmp = icmp slt i32 %sext, 36
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl16(
-; CHECK-NEXT: %1 = trunc i32 %x to i16
-; CHECK-NEXT: %cmp = icmp slt i16 %1, 36
define i1 @icmp_shl16(i32 %x) {
+; CHECK-LABEL: @icmp_shl16(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %x to i16
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i16 [[TMP1]], 36
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 %x, 16
%cmp = icmp slt i32 %shl, 2359296
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl24(
-; CHECK-NEXT: %1 = trunc i32 %x to i8
-; CHECK-NEXT: %cmp = icmp slt i8 %1, 36
define i1 @icmp_shl24(i32 %x) {
+; CHECK-LABEL: @icmp_shl24(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %x to i8
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i8 [[TMP1]], 36
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 %x, 24
%cmp = icmp slt i32 %shl, 603979776
ret i1 %cmp
@@ -881,58 +1003,74 @@ define i1 @icmp_shl24(i32 %x) {
; If the (shl x, C) preserved the sign and this is a sign test,
; compare the LHS operand instead
-; CHECK-LABEL: @icmp_shl_nsw_sgt(
-; CHECK-NEXT: icmp sgt i32 %x, 0
define i1 @icmp_shl_nsw_sgt(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_sgt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl nsw i32 %x, 21
%cmp = icmp sgt i32 %shl, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_nsw_sge0(
-; CHECK-NEXT: icmp sgt i32 %x, -1
define i1 @icmp_shl_nsw_sge0(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_sge0(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %x, -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl nsw i32 %x, 21
%cmp = icmp sge i32 %shl, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_nsw_sge1(
-; CHECK-NEXT: icmp sgt i32 %x, 0
define i1 @icmp_shl_nsw_sge1(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_sge1(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl nsw i32 %x, 21
%cmp = icmp sge i32 %shl, 1
ret i1 %cmp
}
; Checks for icmp (eq|ne) (shl x, C), 0
-; CHECK-LABEL: @icmp_shl_nsw_eq(
-; CHECK-NEXT: icmp eq i32 %x, 0
define i1 @icmp_shl_nsw_eq(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_eq(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = shl nsw i32 %x, 5
%cmp = icmp eq i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_eq(
-; CHECK-NOT: icmp eq i32 %mul, 0
define i1 @icmp_shl_eq(i32 %x) {
+; CHECK-LABEL: @icmp_shl_eq(
+; CHECK-NEXT: [[MUL_MASK:%.*]] = and i32 %x, 134217727
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[MUL_MASK]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = shl i32 %x, 5
%cmp = icmp eq i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_nsw_ne(
-; CHECK-NEXT: icmp ne i32 %x, 0
define i1 @icmp_shl_nsw_ne(i32 %x) {
+; CHECK-LABEL: @icmp_shl_nsw_ne(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = shl nsw i32 %x, 7
%cmp = icmp ne i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_ne(
-; CHECK-NOT: icmp ne i32 %x, 0
define i1 @icmp_shl_ne(i32 %x) {
+; CHECK-LABEL: @icmp_shl_ne(
+; CHECK-NEXT: [[MUL_MASK:%.*]] = and i32 %x, 33554431
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[MUL_MASK]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = shl i32 %x, 7
%cmp = icmp ne i32 %mul, 0
ret i1 %cmp
@@ -940,145 +1078,176 @@ define i1 @icmp_shl_ne(i32 %x) {
; If the (mul x, C) preserved the sign and this is sign test,
; compare the LHS operand instead
-; CHECK-LABEL: @icmp_mul_nsw(
-; CHECK-NEXT: icmp sgt i32 %x, 0
define i1 @icmp_mul_nsw(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = mul nsw i32 %x, 12
%cmp = icmp sgt i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul_nsw1(
-; CHECK-NEXT: icmp slt i32 %x, 0
define i1 @icmp_mul_nsw1(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw1(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = mul nsw i32 %x, 12
%cmp = icmp sle i32 %mul, -1
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul_nsw_neg(
-; CHECK-NEXT: icmp slt i32 %x, 1
define i1 @icmp_mul_nsw_neg(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw_neg(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %x, 1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = mul nsw i32 %x, -12
%cmp = icmp sge i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul_nsw_neg1(
-; CHECK-NEXT: icmp slt i32 %x, 0
define i1 @icmp_mul_nsw_neg1(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw_neg1(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = mul nsw i32 %x, -12
%cmp = icmp sge i32 %mul, 1
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul_nsw_0(
-; CHECK-NOT: icmp sgt i32 %x, 0
define i1 @icmp_mul_nsw_0(i32 %x) {
+; CHECK-LABEL: @icmp_mul_nsw_0(
+; CHECK-NEXT: ret i1 false
+;
%mul = mul nsw i32 %x, 0
%cmp = icmp sgt i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul(
-; CHECK-NEXT: %mul = mul i32 %x, -12
define i1 @icmp_mul(i32 %x) {
+; CHECK-LABEL: @icmp_mul(
+; CHECK-NEXT: [[MUL:%.*]] = mul i32 %x, -12
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[MUL]], -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = mul i32 %x, -12
%cmp = icmp sge i32 %mul, 0
ret i1 %cmp
}
; Checks for icmp (eq|ne) (mul x, C), 0
-; CHECK-LABEL: @icmp_mul_neq0(
-; CHECK-NEXT: icmp ne i32 %x, 0
define i1 @icmp_mul_neq0(i32 %x) {
+; CHECK-LABEL: @icmp_mul_neq0(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = mul nsw i32 %x, -12
%cmp = icmp ne i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul_eq0(
-; CHECK-NEXT: icmp eq i32 %x, 0
define i1 @icmp_mul_eq0(i32 %x) {
+; CHECK-LABEL: @icmp_mul_eq0(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %x, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%mul = mul nsw i32 %x, 12
%cmp = icmp eq i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul0_eq0(
-; CHECK-NEXT: ret i1 true
define i1 @icmp_mul0_eq0(i32 %x) {
+; CHECK-LABEL: @icmp_mul0_eq0(
+; CHECK-NEXT: ret i1 true
+;
%mul = mul i32 %x, 0
%cmp = icmp eq i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_mul0_ne0(
-; CHECK-NEXT: ret i1 false
define i1 @icmp_mul0_ne0(i32 %x) {
+; CHECK-LABEL: @icmp_mul0_ne0(
+; CHECK-NEXT: ret i1 false
+;
%mul = mul i32 %x, 0
%cmp = icmp ne i32 %mul, 0
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sub1_sge(
-; CHECK-NEXT: icmp sgt i32 %x, %y
define i1 @icmp_sub1_sge(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_sub1_sge(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%sub = add nsw i32 %x, -1
%cmp = icmp sge i32 %sub, %y
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_add1_sgt(
-; CHECK-NEXT: icmp sge i32 %x, %y
define i1 @icmp_add1_sgt(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_sgt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%add = add nsw i32 %x, 1
%cmp = icmp sgt i32 %add, %y
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sub1_slt(
-; CHECK-NEXT: icmp sle i32 %x, %y
define i1 @icmp_sub1_slt(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_sub1_slt(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%sub = add nsw i32 %x, -1
%cmp = icmp slt i32 %sub, %y
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_add1_sle(
-; CHECK-NEXT: icmp slt i32 %x, %y
define i1 @icmp_add1_sle(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add1_sle(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %x, %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%add = add nsw i32 %x, 1
%cmp = icmp sle i32 %add, %y
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_add20_sge_add57(
-; CHECK-NEXT: [[ADD:%[a-z0-9]+]] = add nsw i32 %y, 37
-; CHECK-NEXT: icmp sle i32 [[ADD]], %x
define i1 @icmp_add20_sge_add57(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_add20_sge_add57(
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 %y, 37
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP1]], %x
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%1 = add nsw i32 %x, 20
%2 = add nsw i32 %y, 57
%cmp = icmp sge i32 %1, %2
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sub57_sge_sub20(
-; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = add nsw i32 %x, -37
-; CHECK-NEXT: icmp sge i32 [[SUB]], %y
define i1 @icmp_sub57_sge_sub20(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_sub57_sge_sub20(
+; CHECK-NEXT: [[TMP1:%.*]] = add nsw i32 %x, -37
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[TMP1]], %y
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%1 = add nsw i32 %x, -57
%2 = add nsw i32 %y, -20
%cmp = icmp sge i32 %1, %2
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_and_shl_neg_ne_0(
-; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_and_shl_neg_ne_0(i32 %A, i32 %B) {
+; CHECK-LABEL: @icmp_and_shl_neg_ne_0(
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %B
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SHL]], %A
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%neg = xor i32 %A, -1
%shl = shl i32 1, %B
%and = and i32 %shl, %neg
@@ -1086,12 +1255,13 @@ define i1 @icmp_and_shl_neg_ne_0(i32 %A, i32 %B) {
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_and_shl_neg_eq_0(
-; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl i32 1, %B
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[SHL]], %A
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_and_shl_neg_eq_0(i32 %A, i32 %B) {
+; CHECK-LABEL: @icmp_and_shl_neg_eq_0(
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %B
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SHL]], %A
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%neg = xor i32 %A, -1
%shl = shl i32 1, %B
%and = and i32 %shl, %neg
@@ -1099,11 +1269,12 @@ define i1 @icmp_and_shl_neg_eq_0(i32 %A, i32 %B) {
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_add_and_shr_ne_0(
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, 240
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 224
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_add_and_shr_ne_0(i32 %X) {
+; CHECK-LABEL: @icmp_add_and_shr_ne_0(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %X, 240
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[AND]], 224
+; CHECK-NEXT: ret i1 [[TOBOOL]]
+;
%shr = lshr i32 %X, 4
%and = and i32 %shr, 15
%add = add i32 %and, -14
@@ -1112,9 +1283,10 @@ define i1 @icmp_add_and_shr_ne_0(i32 %X) {
}
; PR16244
-; CHECK-LABEL: define i1 @test71(
-; CHECK-NEXT: ret i1 false
define i1 @test71(i8* %x) {
+; CHECK-LABEL: @test71(
+; CHECK-NEXT: ret i1 false
+;
%a = getelementptr i8, i8* %x, i64 8
%b = getelementptr inbounds i8, i8* %x, i64 8
%c = icmp ugt i8* %a, %b
@@ -1123,187 +1295,218 @@ define i1 @test71(i8* %x) {
define i1 @test71_as1(i8 addrspace(1)* %x) {
; CHECK-LABEL: @test71_as1(
-; CHECK-NEXT: ret i1 false
+; CHECK-NEXT: ret i1 false
+;
%a = getelementptr i8, i8 addrspace(1)* %x, i64 8
%b = getelementptr inbounds i8, i8 addrspace(1)* %x, i64 8
%c = icmp ugt i8 addrspace(1)* %a, %b
ret i1 %c
}
-; CHECK-LABEL: @icmp_shl_1_V_ult_32(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_ult_32(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_32(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp ult i32 %shl, 32
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_1_V_eq_32(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 %V, 5
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_eq_32(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_eq_32(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp eq i32 %shl, 32
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_1_V_ult_30(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_ult_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_30(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp ult i32 %shl, 30
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_1_V_ugt_30(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %V, 4
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_ugt_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ugt_30(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %V, 4
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp ugt i32 %shl, 30
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_1_V_ule_30(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %V, 5
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_ule_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ule_30(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %V, 5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp ule i32 %shl, 30
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_1_V_uge_30(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %V, 4
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_uge_30(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_uge_30(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %V, 4
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp uge i32 %shl, 30
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 %V, 31
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_uge_2147483648(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_uge_2147483648(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %V, 31
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp uge i32 %shl, 2147483648
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648(
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 %V, 31
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_shl_1_V_ult_2147483648(i32 %V) {
+; CHECK-LABEL: @icmp_shl_1_V_ult_2147483648(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 %V, 31
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%shl = shl i32 1, %V
%cmp = icmp ult i32 %shl, 2147483648
ret i1 %cmp
}
-; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B(
-; CHECK-NEXT: [[SUB:%[a-z0-9]+]] = add i64 %b, -1
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp uge i64 [[SUB]], %a
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @or_icmp_eq_B_0_icmp_ult_A_B(i64 %a, i64 %b) {
+; CHECK-LABEL: @or_icmp_eq_B_0_icmp_ult_A_B(
+; CHECK-NEXT: [[TMP1:%.*]] = add i64 %b, -1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp uge i64 [[TMP1]], %a
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
%1 = icmp eq i64 %b, 0
%2 = icmp ult i64 %a, %b
%3 = or i1 %1, %2
ret i1 %3
}
-; CHECK-LABEL: @icmp_add_ult_2(
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, -2
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 14
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_add_ult_2(i32 %X) {
+; CHECK-LABEL: @icmp_add_ult_2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %X, -2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 14
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%add = add i32 %X, -14
%cmp = icmp ult i32 %add, 2
ret i1 %cmp
}
-; CHECK: @icmp_add_X_-14_ult_2
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, -2
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[AND]], 14
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_add_X_-14_ult_2(i32 %X) {
+; CHECK-LABEL: @icmp_add_X_-14_ult_2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %X, -2
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 14
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%add = add i32 %X, -14
%cmp = icmp ult i32 %add, 2
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sub_3_X_ult_2(
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %X, 1
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp eq i32 [[OR]], 3
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_sub_3_X_ult_2(i32 %X) {
+; CHECK-LABEL: @icmp_sub_3_X_ult_2(
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %X, 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%add = sub i32 3, %X
%cmp = icmp ult i32 %add, 2
ret i1 %cmp
}
-; CHECK: @icmp_add_X_-14_uge_2
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %X, -2
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 14
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_add_X_-14_uge_2(i32 %X) {
+; CHECK-LABEL: @icmp_add_X_-14_uge_2(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %X, -2
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 14
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%add = add i32 %X, -14
%cmp = icmp uge i32 %add, 2
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sub_3_X_uge_2(
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 %X, 1
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[OR]], 3
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_sub_3_X_uge_2(i32 %X) {
+; CHECK-LABEL: @icmp_sub_3_X_uge_2(
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %X, 1
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], 3
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%add = sub i32 3, %X
%cmp = icmp uge i32 %add, 2
ret i1 %cmp
}
-; CHECK: @icmp_and_X_-16_eq-16
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %X, -17
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_and_X_-16_eq-16(i32 %X) {
+; CHECK-LABEL: @icmp_and_X_-16_eq-16(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %X, -17
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%and = and i32 %X, -16
%cmp = icmp eq i32 %and, -16
ret i1 %cmp
}
-; CHECK: @icmp_and_X_-16_ne-16
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, -16
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_and_X_-16_ne-16(i32 %X) {
+; CHECK-LABEL: @icmp_and_X_-16_ne-16(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %X, -16
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%and = and i32 %X, -16
%cmp = icmp ne i32 %and, -16
ret i1 %cmp
}
-; CHECK: @icmp_sub_-1_X_ult_4
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %X, -5
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_sub_-1_X_ult_4(i32 %X) {
+; CHECK-LABEL: @icmp_sub_-1_X_ult_4(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %X, -5
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%sub = sub i32 -1, %X
%cmp = icmp ult i32 %sub, 4
ret i1 %cmp
}
-; CHECK: @icmp_sub_-1_X_uge_4
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, -4
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_sub_-1_X_uge_4(i32 %X) {
+; CHECK-LABEL: @icmp_sub_-1_X_uge_4(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %X, -4
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%sub = sub i32 -1, %X
%cmp = icmp uge i32 %sub, 4
ret i1 %cmp
}
-; CHECK-LABEL: @icmp_swap_operands_for_cse
-; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
-; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
-; CHECK: ret i1
define i1 @icmp_swap_operands_for_cse(i32 %X, i32 %Y) {
+; CHECK-LABEL: @icmp_swap_operands_for_cse(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 %X, %Y
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: true:
+; CHECK-NEXT: [[TMP0:%.*]] = and i32 [[SUB]], 1
+; CHECK-NEXT: br label %end
+; CHECK: false:
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 [[SUB]], 16
+; CHECK-NEXT: br label %end
+; CHECK: end:
+; CHECK-NEXT: [[RES_IN:%.*]] = phi i32 [ [[TMP0]], %true ], [ [[TMP1]], %false ]
+; CHECK-NEXT: [[RES:%.*]] = icmp ne i32 [[RES:%.*]].in, 0
+; CHECK-NEXT: ret i1 [[RES]]
+;
entry:
%sub = sub i32 %X, %Y
%cmp = icmp ugt i32 %Y, %X
@@ -1320,11 +1523,25 @@ end:
ret i1 %res
}
-; CHECK-LABEL: @icmp_swap_operands_for_cse2
-; CHECK: [[CMP:%[a-z0-9]+]] = icmp ult i32 %X, %Y
-; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
-; CHECK: ret i1
define i1 @icmp_swap_operands_for_cse2(i32 %X, i32 %Y) {
+; CHECK-LABEL: @icmp_swap_operands_for_cse2(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp ult i32 %X, %Y
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: true:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 %X, %Y
+; CHECK-NEXT: [[SUB1:%.*]] = sub i32 %X, %Y
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SUB]], [[SUB1]]
+; CHECK-NEXT: br label %end
+; CHECK: false:
+; CHECK-NEXT: [[SUB2:%.*]] = sub i32 %Y, %X
+; CHECK-NEXT: br label %end
+; CHECK: end:
+; CHECK-NEXT: [[RES_IN_IN:%.*]] = phi i32 [ [[ADD]], %true ], [ [[SUB2]], %false ]
+; CHECK-NEXT: [[RES_IN:%.*]] = and i32 [[RES_IN:%.*]].in, 1
+; CHECK-NEXT: [[RES:%.*]] = icmp ne i32 [[RES:%.*]].in, 0
+; CHECK-NEXT: ret i1 [[RES]]
+;
entry:
%cmp = icmp ugt i32 %Y, %X
br i1 %cmp, label %true, label %false
@@ -1343,11 +1560,23 @@ end:
ret i1 %res
}
-; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse
-; CHECK: [[CMP:%[a-z0-9]+]] = icmp ugt i32 %Y, %X
-; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
-; CHECK: ret i1
define i1 @icmp_do_not_swap_operands_for_cse(i32 %X, i32 %Y) {
+; CHECK-LABEL: @icmp_do_not_swap_operands_for_cse(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %Y, %X
+; CHECK-NEXT: br i1 [[CMP]], label %true, label %false
+; CHECK: true:
+; CHECK-NEXT: [[SUB:%.*]] = sub i32 %X, %Y
+; CHECK-NEXT: br label %end
+; CHECK: false:
+; CHECK-NEXT: [[SUB2:%.*]] = sub i32 %Y, %X
+; CHECK-NEXT: br label %end
+; CHECK: end:
+; CHECK-NEXT: [[RES_IN_IN:%.*]] = phi i32 [ [[SUB]], %true ], [ [[SUB2]], %false ]
+; CHECK-NEXT: [[RES_IN:%.*]] = and i32 [[RES_IN:%.*]].in, 1
+; CHECK-NEXT: [[RES:%.*]] = icmp ne i32 [[RES:%.*]].in, 0
+; CHECK-NEXT: ret i1 [[RES]]
+;
entry:
%cmp = icmp ugt i32 %Y, %X
br i1 %cmp, label %true, label %false
@@ -1364,42 +1593,48 @@ end:
ret i1 %res
}
-; CHECK-LABEL: @icmp_lshr_lshr_eq
-; CHECK: %z.unshifted = xor i32 %a, %b
-; CHECK: %z = icmp ult i32 %z.unshifted, 1073741824
define i1 @icmp_lshr_lshr_eq(i32 %a, i32 %b) nounwind {
- %x = lshr i32 %a, 30
- %y = lshr i32 %b, 30
- %z = icmp eq i32 %x, %y
- ret i1 %z
+; CHECK-LABEL: @icmp_lshr_lshr_eq(
+; CHECK-NEXT: [[Z_UNSHIFTED:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: [[Z:%.*]] = icmp ult i32 [[Z:%.*]].unshifted, 1073741824
+; CHECK-NEXT: ret i1 [[Z]]
+;
+ %x = lshr i32 %a, 30
+ %y = lshr i32 %b, 30
+ %z = icmp eq i32 %x, %y
+ ret i1 %z
}
-; CHECK-LABEL: @icmp_ashr_ashr_ne
-; CHECK: %z.unshifted = xor i32 %a, %b
-; CHECK: %z = icmp ugt i32 %z.unshifted, 255
define i1 @icmp_ashr_ashr_ne(i32 %a, i32 %b) nounwind {
- %x = ashr i32 %a, 8
- %y = ashr i32 %b, 8
- %z = icmp ne i32 %x, %y
- ret i1 %z
+; CHECK-LABEL: @icmp_ashr_ashr_ne(
+; CHECK-NEXT: [[Z_UNSHIFTED:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: [[Z:%.*]] = icmp ugt i32 [[Z:%.*]].unshifted, 255
+; CHECK-NEXT: ret i1 [[Z]]
+;
+ %x = ashr i32 %a, 8
+ %y = ashr i32 %b, 8
+ %z = icmp ne i32 %x, %y
+ ret i1 %z
}
-; CHECK-LABEL: @icmp_neg_cst_slt
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp sgt i32 %a, 10
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_neg_cst_slt(i32 %a) {
+; CHECK-LABEL: @icmp_neg_cst_slt(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 %a, 10
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%1 = sub nsw i32 0, %a
%2 = icmp slt i32 %1, -10
ret i1 %2
}
-; CHECK-LABEL: @icmp_and_or_lshr
-; CHECK-NEXT: [[SHL:%[a-z0-9]+]] = shl nuw i32 1, %y
-; CHECK-NEXT: [[OR:%[a-z0-9]+]] = or i32 [[SHL]], 1
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 [[OR]], %x
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_and_or_lshr(i32 %x, i32 %y) {
+; CHECK-LABEL: @icmp_and_or_lshr(
+; CHECK-NEXT: [[SHF1:%.*]] = shl nuw i32 1, %y
+; CHECK-NEXT: [[OR2:%.*]] = or i32 [[SHF1]], 1
+; CHECK-NEXT: [[AND3:%.*]] = and i32 [[OR2]], %x
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i32 [[AND3]], 0
+; CHECK-NEXT: ret i1 [[RET]]
+;
%shf = lshr i32 %x, %y
%or = or i32 %shf, %x
%and = and i32 %or, 1
@@ -1407,11 +1642,12 @@ define i1 @icmp_and_or_lshr(i32 %x, i32 %y) {
ret i1 %ret
}
-; CHECK-LABEL: @icmp_and_or_lshr_cst
-; CHECK-NEXT: [[AND:%[a-z0-9]+]] = and i32 %x, 3
-; CHECK-NEXT: [[CMP:%[a-z0-9]+]] = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @icmp_and_or_lshr_cst(i32 %x) {
+; CHECK-LABEL: @icmp_and_or_lshr_cst(
+; CHECK-NEXT: [[AND1:%.*]] = and i32 %x, 3
+; CHECK-NEXT: [[RET:%.*]] = icmp ne i32 [[AND1]], 0
+; CHECK-NEXT: ret i1 [[RET]]
+;
%shf = lshr i32 %x, 1
%or = or i32 %shf, %x
%and = and i32 %or, 1
@@ -1419,152 +1655,175 @@ define i1 @icmp_and_or_lshr_cst(i32 %x) {
ret i1 %ret
}
-; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2
-; CHECK-NEXT: %cmp = icmp ugt i32 %a, 29
-; CHECK-NEXT: ret i1 %cmp
define i1 @shl_ap1_zero_ap2_non_zero_2(i32 %a) {
- %shl = shl i32 4, %a
- %cmp = icmp eq i32 %shl, 0
- ret i1 %cmp
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %a, 29
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl i32 4, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_4
-; CHECK-NEXT: %cmp = icmp ugt i32 %a, 30
-; CHECK-NEXT: ret i1 %cmp
define i1 @shl_ap1_zero_ap2_non_zero_4(i32 %a) {
- %shl = shl i32 -2, %a
- %cmp = icmp eq i32 %shl, 0
- ret i1 %cmp
+; CHECK-LABEL: @shl_ap1_zero_ap2_non_zero_4(
+; CHECK-NEXT: [[CMP:%.*]] = icmp ugt i32 %a, 30
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl i32 -2, %a
+ %cmp = icmp eq i32 %shl, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_positive
-; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
-; CHECK-NEXT: ret i1 %cmp
define i1 @shl_ap1_non_zero_ap2_non_zero_both_positive(i32 %a) {
- %shl = shl i32 50, %a
- %cmp = icmp eq i32 %shl, 50
- ret i1 %cmp
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_positive(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
}
-; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_negative
-; CHECK-NEXT: %cmp = icmp eq i32 %a, 0
-; CHECK-NEXT: ret i1 %cmp
define i1 @shl_ap1_non_zero_ap2_non_zero_both_negative(i32 %a) {
- %shl = shl i32 -50, %a
- %cmp = icmp eq i32 %shl, -50
- ret i1 %cmp
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_both_negative(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %a, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl i32 -50, %a
+ %cmp = icmp eq i32 %shl, -50
+ ret i1 %cmp
}
-; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_1
-; CHECK-NEXT: ret i1 false
define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_1(i32 %a) {
- %shl = shl i32 50, %a
- %cmp = icmp eq i32 %shl, 25
- ret i1 %cmp
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_1(
+; CHECK-NEXT: ret i1 false
+;
+ %shl = shl i32 50, %a
+ %cmp = icmp eq i32 %shl, 25
+ ret i1 %cmp
}
-; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_2
-; CHECK-NEXT: %cmp = icmp eq i32 %a, 1
-; CHECK-NEXT: ret i1 %cmp
define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_2(i32 %a) {
- %shl = shl i32 25, %a
- %cmp = icmp eq i32 %shl, 50
- ret i1 %cmp
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_2(
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 %a, 1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %shl = shl i32 25, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
}
-; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_3
-; CHECK-NEXT: ret i1 false
define i1 @shl_ap1_non_zero_ap2_non_zero_ap1_3(i32 %a) {
- %shl = shl i32 26, %a
- %cmp = icmp eq i32 %shl, 50
- ret i1 %cmp
+; CHECK-LABEL: @shl_ap1_non_zero_ap2_non_zero_ap1_3(
+; CHECK-NEXT: ret i1 false
+;
+ %shl = shl i32 26, %a
+ %cmp = icmp eq i32 %shl, 50
+ ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sgt_zero_add_nsw
-; CHECK-NEXT: icmp sgt i32 %a, -1
define i1 @icmp_sgt_zero_add_nsw(i32 %a) {
- %add = add nsw i32 %a, 1
- %cmp = icmp sgt i32 %add, 0
- ret i1 %cmp
+; CHECK-LABEL: @icmp_sgt_zero_add_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %a, -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sgt i32 %add, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sge_zero_add_nsw
-; CHECK-NEXT: icmp sgt i32 %a, -2
define i1 @icmp_sge_zero_add_nsw(i32 %a) {
- %add = add nsw i32 %a, 1
- %cmp = icmp sge i32 %add, 0
- ret i1 %cmp
+; CHECK-LABEL: @icmp_sge_zero_add_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %a, -2
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sge i32 %add, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @icmp_slt_zero_add_nsw
-; CHECK-NEXT: icmp slt i32 %a, -1
define i1 @icmp_slt_zero_add_nsw(i32 %a) {
- %add = add nsw i32 %a, 1
- %cmp = icmp slt i32 %add, 0
- ret i1 %cmp
+; CHECK-LABEL: @icmp_slt_zero_add_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %a, -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %add = add nsw i32 %a, 1
+ %cmp = icmp slt i32 %add, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @icmp_sle_zero_add_nsw
-; CHECK-NEXT: icmp slt i32 %a, 0
define i1 @icmp_sle_zero_add_nsw(i32 %a) {
- %add = add nsw i32 %a, 1
- %cmp = icmp sle i32 %add, 0
- ret i1 %cmp
+; CHECK-LABEL: @icmp_sle_zero_add_nsw(
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 %a, 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %add = add nsw i32 %a, 1
+ %cmp = icmp sle i32 %add, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @icmp_cmpxchg_strong
-; CHECK-NEXT: %[[xchg:.*]] = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
-; CHECK-NEXT: %[[icmp:.*]] = extractvalue { i32, i1 } %[[xchg]], 1
-; CHECK-NEXT: ret i1 %[[icmp]]
define zeroext i1 @icmp_cmpxchg_strong(i32* %sc, i32 %old_val, i32 %new_val) {
+; CHECK-LABEL: @icmp_cmpxchg_strong(
+; CHECK-NEXT: [[XCHG:%.*]] = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
+; CHECK-NEXT: [[ICMP:%.*]] = extractvalue { i32, i1
+;
%xchg = cmpxchg i32* %sc, i32 %old_val, i32 %new_val seq_cst seq_cst
%xtrc = extractvalue { i32, i1 } %xchg, 0
%icmp = icmp eq i32 %xtrc, %old_val
ret i1 %icmp
}
-; CHECK-LABEL: @f1
-; CHECK-NEXT: %[[cmp:.*]] = icmp sge i64 %a, %b
-; CHECK-NEXT: ret i1 %[[cmp]]
define i1 @f1(i64 %a, i64 %b) {
+; CHECK-LABEL: @f1(
+; CHECK-NEXT: [[V:%.*]] = icmp sge i64 %a, %b
+; CHECK-NEXT: ret i1 [[V]]
+;
%t = sub nsw i64 %a, %b
%v = icmp sge i64 %t, 0
ret i1 %v
}
-; CHECK-LABEL: @f2
-; CHECK-NEXT: %[[cmp:.*]] = icmp sgt i64 %a, %b
-; CHECK-NEXT: ret i1 %[[cmp]]
define i1 @f2(i64 %a, i64 %b) {
+; CHECK-LABEL: @f2(
+; CHECK-NEXT: [[V:%.*]] = icmp sgt i64 %a, %b
+; CHECK-NEXT: ret i1 [[V]]
+;
%t = sub nsw i64 %a, %b
%v = icmp sgt i64 %t, 0
ret i1 %v
}
-; CHECK-LABEL: @f3
-; CHECK-NEXT: %[[cmp:.*]] = icmp slt i64 %a, %b
-; CHECK-NEXT: ret i1 %[[cmp]]
define i1 @f3(i64 %a, i64 %b) {
+; CHECK-LABEL: @f3(
+; CHECK-NEXT: [[V:%.*]] = icmp slt i64 %a, %b
+; CHECK-NEXT: ret i1 [[V]]
+;
%t = sub nsw i64 %a, %b
%v = icmp slt i64 %t, 0
ret i1 %v
}
-; CHECK-LABEL: @f4
-; CHECK-NEXT: %[[cmp:.*]] = icmp sle i64 %a, %b
-; CHECK-NEXT: ret i1 %[[cmp]]
define i1 @f4(i64 %a, i64 %b) {
+; CHECK-LABEL: @f4(
+; CHECK-NEXT: [[V:%.*]] = icmp sle i64 %a, %b
+; CHECK-NEXT: ret i1 [[V]]
+;
%t = sub nsw i64 %a, %b
%v = icmp sle i64 %t, 0
ret i1 %v
}
-; CHECK-LABEL: @f5
-; CHECK: %[[cmp:.*]] = icmp slt i32 %[[sub:.*]], 0
-; CHECK: %[[neg:.*]] = sub nsw i32 0, %[[sub]]
-; CHECK: %[[sel:.*]] = select i1 %[[cmp]], i32 %[[neg]], i32 %[[sub]]
-; CHECK: ret i32 %[[sel]]
define i32 @f5(i8 %a, i8 %b) {
+; CHECK-LABEL: @f5(
+; CHECK-NEXT: [[CONV:%.*]] = zext i8 %a to i32
+; CHECK-NEXT: [[CONV3:%.*]] = zext i8 %b to i32
+; CHECK-NEXT: [[SUB:%.*]] = sub nsw i32 [[CONV]], [[CONV3]]
+; CHECK-NEXT: [[CMP4:%.*]] = icmp slt i32 [[SUB]], 0
+; CHECK-NEXT: [[SUB7:%.*]] = sub nsw i32 0, [[SUB]]
+; CHECK-NEXT: [[SUB7_SUB:%.*]] = select i1 [[CMP4]], i32 [[SUB7]], i32 [[SUB]]
+; CHECK-NEXT: ret i32 [[SUB7_SUB]]
+;
%conv = zext i8 %a to i32
%conv3 = zext i8 %b to i32
%sub = sub nsw i32 %conv, %conv3
@@ -1574,13 +1833,14 @@ define i32 @f5(i8 %a, i8 %b) {
ret i32 %sub7.sub
}
-; CHECK-LABEL: @f6
-; CHECK: %cmp.unshifted = xor i32 %a, %b
-; CHECK-NEXT: %cmp.mask = and i32 %cmp.unshifted, 255
-; CHECK-NEXT: %cmp = icmp eq i32 %cmp.mask, 0
-; CHECK-NEXT: %s = select i1 %cmp, i32 10000, i32 0
-; CHECK-NEXT: ret i32 %s
define i32 @f6(i32 %a, i32 %b) {
+; CHECK-LABEL: @f6(
+; CHECK-NEXT: [[CMP_UNSHIFTED:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 255
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[CMP:%.*]].mask, 0
+; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 10000, i32 0
+; CHECK-NEXT: ret i32 [[S]]
+;
%sext = shl i32 %a, 24
%conv = ashr i32 %sext, 24
%sext6 = shl i32 %b, 24
@@ -1590,13 +1850,14 @@ define i32 @f6(i32 %a, i32 %b) {
ret i32 %s
}
-; CHECK-LABEL: @f7
-; CHECK: %cmp.unshifted = xor i32 %a, %b
-; CHECK-NEXT: %cmp.mask = and i32 %cmp.unshifted, 511
-; CHECK-NEXT: %cmp = icmp ne i32 %cmp.mask, 0
-; CHECK-NEXT: %s = select i1 %cmp, i32 10000, i32 0
-; CHECK-NEXT: ret i32 %s
define i32 @f7(i32 %a, i32 %b) {
+; CHECK-LABEL: @f7(
+; CHECK-NEXT: [[CMP_UNSHIFTED:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: [[CMP_MASK:%.*]] = and i32 [[CMP_UNSHIFTED]], 511
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[CMP:%.*]].mask, 0
+; CHECK-NEXT: [[S:%.*]] = select i1 [[CMP]], i32 10000, i32 0
+; CHECK-NEXT: ret i32 [[S]]
+;
%sext = shl i32 %a, 23
%sext6 = shl i32 %b, 23
%cmp = icmp ne i32 %sext, %sext6
@@ -1604,83 +1865,256 @@ define i32 @f7(i32 %a, i32 %b) {
ret i32 %s
}
-; CHECK: @f8(
-; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
-; CHECK-NEXT: ret i1 [[RESULT]]
define i1 @f8(i32 %val, i32 %lim) {
+; CHECK-LABEL: @f8(
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[R]]
+;
%lim.sub = add i32 %lim, -1
%val.and = and i32 %val, %lim.sub
%r = icmp ult i32 %val.and, %lim
ret i1 %r
}
-; CHECK: @f9(
-; CHECK-NEXT: [[RESULT:%[a-z0-9]+]] = icmp ne i32 %lim, 0
-; CHECK-NEXT: ret i1 [[RESULT]]
define i1 @f9(i32 %val, i32 %lim) {
+; CHECK-LABEL: @f9(
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 %lim, 0
+; CHECK-NEXT: ret i1 [[R]]
+;
%lim.sub = sub i32 %lim, 1
%val.and = and i32 %val, %lim.sub
%r = icmp ult i32 %val.and, %lim
ret i1 %r
}
-; CHECK: @f10(
-; CHECK: [[CMP:%.*]] = icmp uge i16 %p, mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16))
-; CHECK-NEXT: ret i1 [[CMP]]
define i1 @f10(i16 %p) {
-entry:
+; CHECK-LABEL: @f10(
+; CHECK-NEXT: [[CMP580:%.*]] = icmp uge i16 %p, mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16))
+; CHECK-NEXT: ret i1 [[CMP580]]
+;
%cmp580 = icmp ule i16 mul (i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16), i16 zext (i8 ptrtoint (i1 (i16)* @f10 to i8) to i16)), %p
ret i1 %cmp580
}
-; CHECK-LABEL: @cmp_sgt_rhs_dec
-; CHECK-NOT: sub
-; CHECK: icmp sge i32 %conv, %i
define i1 @cmp_sgt_rhs_dec(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_sgt_rhs_dec(
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp sge i32 [[CONV]], %i
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%conv = fptosi float %x to i32
%dec = sub nsw i32 %i, 1
%cmp = icmp sgt i32 %conv, %dec
ret i1 %cmp
}
-; CHECK-LABEL: @cmp_sle_rhs_dec
-; CHECK-NOT: sub
-; CHECK: icmp slt i32 %conv, %i
define i1 @cmp_sle_rhs_dec(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_sle_rhs_dec(
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[CONV]], %i
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%conv = fptosi float %x to i32
%dec = sub nsw i32 %i, 1
%cmp = icmp sle i32 %conv, %dec
ret i1 %cmp
}
-; CHECK-LABEL: @cmp_sge_rhs_inc
-; CHECK-NOT: add
-; CHECK: icmp sgt i32 %conv, %i
define i1 @cmp_sge_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_sge_rhs_inc(
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], %i
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%conv = fptosi float %x to i32
%inc = add nsw i32 %i, 1
%cmp = icmp sge i32 %conv, %inc
ret i1 %cmp
}
-; CHECK-LABEL: @cmp_slt_rhs_inc
-; CHECK-NOT: add
-; CHECK: icmp sle i32 %conv, %i
define i1 @cmp_slt_rhs_inc(float %x, i32 %i) {
+; CHECK-LABEL: @cmp_slt_rhs_inc(
+; CHECK-NEXT: [[CONV:%.*]] = fptosi float %x to i32
+; CHECK-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV]], %i
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%conv = fptosi float %x to i32
%inc = add nsw i32 %i, 1
%cmp = icmp slt i32 %conv, %inc
ret i1 %cmp
}
-; CHECK-LABEL: @PR26407
-; CHECK-NEXT: %[[addx:.*]] = add i32 %x, 2147483647
-; CHECK-NEXT: %[[addy:.*]] = add i32 %y, 2147483647
-; CHECK-NEXT: %[[cmp:.*]] = icmp uge i32 %[[addx]], %[[addy]]
-; CHECK-NEXT: ret i1 %[[cmp]]
define i1 @PR26407(i32 %x, i32 %y) {
+; CHECK-LABEL: @PR26407(
+; CHECK-NEXT: [[ADDX:%.*]] = add i32 %x, 2147483647
+; CHECK-NEXT: [[ADDY:%.*]] = add i32 %y, 2147483647
+; CHECK-NEXT: [[CMP:%.*]] = icmp uge i32 [[ADDX]], [[ADDY]]
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%addx = add i32 %x, 2147483647
%addy = add i32 %y, 2147483647
%cmp = icmp uge i32 %addx, %addy
ret i1 %cmp
}
+
+define i1 @cmp_inverse_mask_bits_set_eq(i32 %x) {
+; CHECK-LABEL: @cmp_inverse_mask_bits_set_eq(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x, -43
+; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[TMP1]], -43
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %or = or i32 %x, 42
+ %cmp = icmp eq i32 %or, -1
+ ret i1 %cmp
+}
+
+define i1 @cmp_inverse_mask_bits_set_ne(i32 %x) {
+; CHECK-LABEL: @cmp_inverse_mask_bits_set_ne(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x, -43
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i32 [[TMP1]], -43
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %or = or i32 %x, 42
+ %cmp = icmp ne i32 %or, -1
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: @idom_sign_bit_check_edge_dominates
+define void @idom_sign_bit_check_edge_dominates(i64 %a) {
+entry:
+ %cmp = icmp slt i64 %a, 0
+ br i1 %cmp, label %land.lhs.true, label %lor.rhs
+
+land.lhs.true: ; preds = %entry
+ br label %lor.end
+
+; CHECK-LABEL: lor.rhs:
+; CHECK-NOT: icmp sgt i64 %a, 0
+; CHECK: icmp eq i64 %a, 0
+lor.rhs: ; preds = %entry
+ %cmp2 = icmp sgt i64 %a, 0
+ br i1 %cmp2, label %land.rhs, label %lor.end
+
+land.rhs: ; preds = %lor.rhs
+ br label %lor.end
+
+lor.end: ; preds = %land.rhs, %lor.rhs, %land.lhs.true
+ ret void
+}
+
+; CHECK-LABEL: @idom_sign_bit_check_edge_not_dominates
+define void @idom_sign_bit_check_edge_not_dominates(i64 %a) {
+entry:
+ %cmp = icmp slt i64 %a, 0
+ br i1 %cmp, label %land.lhs.true, label %lor.rhs
+
+land.lhs.true: ; preds = %entry
+ br i1 undef, label %lor.end, label %lor.rhs
+
+; CHECK-LABEL: lor.rhs:
+; CHECK: icmp sgt i64 %a, 0
+; CHECK-NOT: icmp eq i64 %a, 0
+lor.rhs: ; preds = %land.lhs.true, %entry
+ %cmp2 = icmp sgt i64 %a, 0
+ br i1 %cmp2, label %land.rhs, label %lor.end
+
+land.rhs: ; preds = %lor.rhs
+ br label %lor.end
+
+lor.end: ; preds = %land.rhs, %lor.rhs, %land.lhs.true
+ ret void
+}
+
+; CHECK-LABEL: @idom_sign_bit_check_edge_dominates_select
+define void @idom_sign_bit_check_edge_dominates_select(i64 %a, i64 %b) {
+entry:
+ %cmp = icmp slt i64 %a, 5
+ br i1 %cmp, label %land.lhs.true, label %lor.rhs
+
+land.lhs.true: ; preds = %entry
+ br label %lor.end
+
+; CHECK-LABEL: lor.rhs:
+; CHECK-NOT: [[B:%.*]] = icmp sgt i64 %a, 5
+; CHECK: [[C:%.*]] = icmp eq i64 %a, %b
+; CHECK-NOT: [[D:%.*]] = select i1 [[B]], i64 %a, i64 5
+; CHECK-NOT: icmp ne i64 [[D]], %b
+; CHECK-NEXT: br i1 [[C]], label %lor.end, label %land.rhs
+lor.rhs: ; preds = %entry
+ %cmp2 = icmp sgt i64 %a, 5
+ %select = select i1 %cmp2, i64 %a, i64 5
+ %cmp3 = icmp ne i64 %select, %b
+ br i1 %cmp3, label %land.rhs, label %lor.end
+
+land.rhs: ; preds = %lor.rhs
+ br label %lor.end
+
+lor.end: ; preds = %land.rhs, %lor.rhs, %land.lhs.true
+ ret void
+}
+
+; CHECK-LABEL: @idom_zbranch
+define void @idom_zbranch(i64 %a) {
+entry:
+ %cmp = icmp sgt i64 %a, 0
+ br i1 %cmp, label %lor.end, label %lor.rhs
+
+; CHECK-LABEL: lor.rhs:
+; CHECK: icmp slt i64 %a, 0
+; CHECK-NOT: icmp eq i64 %a, 0
+lor.rhs: ; preds = %entry
+ %cmp2 = icmp slt i64 %a, 0
+ br i1 %cmp2, label %land.rhs, label %lor.end
+
+land.rhs: ; preds = %lor.rhs
+ br label %lor.end
+
+lor.end: ; preds = %land.rhs, %lor.rhs
+ ret void
+}
+
+; CHECK-LABEL: @idom_not_zbranch
+define void @idom_not_zbranch(i32 %a, i32 %b) {
+entry:
+ %cmp = icmp sgt i32 %a, 0
+ br i1 %cmp, label %return, label %if.end
+
+; CHECK-LABEL: if.end:
+; CHECK-NOT: [[B:%.*]] = icmp slt i32 %a, 0
+; CHECK: [[C:%.*]] = icmp eq i32 %a, %b
+; CHECK-NOT: [[D:%.*]] = select i1 [[B]], i32 %a, i32 0
+; CHECK-NOT: icmp ne i32 [[D]], %b
+; CHECK-NEXT: br i1 [[C]], label %return, label %if.then3
+if.end: ; preds = %entry
+ %cmp1 = icmp slt i32 %a, 0
+ %a. = select i1 %cmp1, i32 %a, i32 0
+ %cmp2 = icmp ne i32 %a., %b
+ br i1 %cmp2, label %if.then3, label %return
+
+if.then3: ; preds = %if.end
+ br label %return
+
+return: ; preds = %if.end, %entry, %if.then3
+ ret void
+}
+
+; When canonicalizing to 'gt/lt', make sure the constant is correct.
+
+define i1 @PR27792(i128 %a) {
+; CHECK-LABEL: @PR27792(
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i128 %a, -1
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = icmp sge i128 %a, 0
+ ret i1 %cmp
+}
+
+define i1 @PR27792_2(i128 %a) {
+; CHECK-LABEL: @PR27792_2(
+; CHECK-NEXT: [[B:%.*]] = icmp ne i128 %a, 0
+; CHECK-NEXT: ret i1 [[B]]
+;
+ %b = icmp uge i128 %a, 1
+ ret i1 %b
+}
+
diff --git a/test/Transforms/InstCombine/indexed-gep-compares.ll b/test/Transforms/InstCombine/indexed-gep-compares.ll
new file mode 100644
index 000000000000..495881549e25
--- /dev/null
+++ b/test/Transforms/InstCombine/indexed-gep-compares.ll
@@ -0,0 +1,170 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:64"
+
+define i32 *@test1(i32* %A, i32 %Offset) {
+entry:
+ %tmp = getelementptr inbounds i32, i32* %A, i32 %Offset
+ br label %bb
+
+bb:
+ %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+ %LHS = getelementptr inbounds i32, i32* %A, i32 100
+ %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+ %cond = icmp ult i32 * %LHS, %RHS
+ br i1 %cond, label %bb2, label %bb
+
+bb2:
+ ret i32* %RHS
+
+; CHECK-LABEL: @test1(
+; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ]
+; CHECK: %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK: br i1 %cond, label %bb2, label %bb
+; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %A, i32 %[[INDEX]]
+; CHECK: ret i32* %[[PTR]]
+}
+
+define i32 *@test2(i32 %A, i32 %Offset) {
+entry:
+ %A.ptr = inttoptr i32 %A to i32*
+ %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
+ br label %bb
+
+bb:
+ %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+ %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
+ %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+ %cmp0 = ptrtoint i32 *%LHS to i32
+ %cmp1 = ptrtoint i32 *%RHS to i32
+ %cond = icmp ult i32 %cmp0, %cmp1
+ br i1 %cond, label %bb2, label %bb
+
+bb2:
+ ret i32* %RHS
+
+; CHECK-LABEL: @test2(
+; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %entry ]
+; CHECK: %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK: br i1 %cond, label %bb2, label %bb
+; CHECK: %[[TOPTR:[0-9A-Za-z.]+]] = inttoptr i32 %[[ADD:[0-9A-Za-z.]+]] to i32*
+; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %[[TOPTR]], i32 %[[INDEX]]
+; CHECK: ret i32* %[[PTR]]
+}
+
+; Perform the transformation only if we know that the GEPs used are inbounds.
+define i32 *@test3(i32* %A, i32 %Offset) {
+entry:
+ %tmp = getelementptr i32, i32* %A, i32 %Offset
+ br label %bb
+
+bb:
+ %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+ %LHS = getelementptr i32, i32* %A, i32 100
+ %RHS.next = getelementptr i32, i32* %RHS, i64 1
+ %cond = icmp ult i32 * %LHS, %RHS
+ br i1 %cond, label %bb2, label %bb
+
+bb2:
+ ret i32* %RHS
+
+; CHECK-LABEL: @test3(
+; CHECK-NOT: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100
+}
+
+; An inttoptr that requires an extension or truncation will be opaque when determining
+; the base pointer. In this case we can still perform the transformation by considering
+; A.ptr as being the base pointer.
+define i32 *@test4(i16 %A, i32 %Offset) {
+entry:
+ %A.ptr = inttoptr i16 %A to i32*
+ %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
+ br label %bb
+
+bb:
+ %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %entry ]
+ %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
+ %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+ %cmp0 = ptrtoint i32 *%LHS to i32
+ %cmp1 = ptrtoint i32 *%RHS to i32
+ %cond = icmp ult i32 %cmp0, %cmp1
+ br i1 %cond, label %bb2, label %bb
+
+bb2:
+ ret i32* %RHS
+
+; CHECK-LABEL: @test4(
+; CHECK: %cond = icmp sgt i32 %{{[0-9A-Za-z.]+}}, 100
+}
+
+declare i32* @fun_ptr()
+
+define i32 *@test5(i32 %Offset) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %A = invoke i32 *@fun_ptr() to label %cont unwind label %lpad
+
+cont:
+ %tmp = getelementptr inbounds i32, i32* %A, i32 %Offset
+ br label %bb
+
+bb:
+ %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %cont ]
+ %LHS = getelementptr inbounds i32, i32* %A, i32 100
+ %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+ %cond = icmp ult i32 * %LHS, %RHS
+ br i1 %cond, label %bb2, label %bb
+
+bb2:
+ ret i32* %RHS
+
+lpad:
+ %l = landingpad { i8*, i32 } cleanup
+ ret i32* null
+
+; CHECK-LABEL: @test5(
+; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %cont ]
+; CHECK: %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK: br i1 %cond, label %bb2, label %bb
+; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %A, i32 %[[INDEX]]
+; CHECK: ret i32* %[[PTR]]
+}
+
+declare i32 @fun_i32()
+
+define i32 *@test6(i32 %Offset) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %A = invoke i32 @fun_i32() to label %cont unwind label %lpad
+
+cont:
+ %A.ptr = inttoptr i32 %A to i32*
+ %tmp = getelementptr inbounds i32, i32* %A.ptr, i32 %Offset
+ br label %bb
+
+bb:
+ %RHS = phi i32* [ %RHS.next, %bb ], [ %tmp, %cont ]
+ %LHS = getelementptr inbounds i32, i32* %A.ptr, i32 100
+ %RHS.next = getelementptr inbounds i32, i32* %RHS, i64 1
+ %cond = icmp ult i32 * %LHS, %RHS
+ br i1 %cond, label %bb2, label %bb
+
+bb2:
+ ret i32* %RHS
+
+lpad:
+ %l = landingpad { i8*, i32 } cleanup
+ ret i32* null
+
+; CHECK-LABEL: @test6(
+; CHECK: %[[INDEX:[0-9A-Za-z.]+]] = phi i32 [ %[[ADD:[0-9A-Za-z.]+]], %bb ], [ %Offset, %cont ]
+; CHECK: %[[ADD]] = add nsw i32 %[[INDEX]], 1
+; CHECK: %cond = icmp sgt i32 %[[INDEX]], 100
+; CHECK: br i1 %cond, label %bb2, label %bb
+; CHECK: %[[TOPTR:[0-9A-Za-z.]+]] = inttoptr i32 %[[ADD:[0-9A-Za-z.]+]] to i32*
+; CHECK: %[[PTR:[0-9A-Za-z.]+]] = getelementptr inbounds i32, i32* %[[TOPTR]], i32 %[[INDEX]]
+; CHECK: ret i32* %[[PTR]]
+}
+
+declare i32 @__gxx_personality_v0(...)
diff --git a/test/Transforms/InstCombine/insert-val-extract-elem.ll b/test/Transforms/InstCombine/insert-val-extract-elem.ll
new file mode 100644
index 000000000000..db7b4031f372
--- /dev/null
+++ b/test/Transforms/InstCombine/insert-val-extract-elem.ll
@@ -0,0 +1,74 @@
+; RUN: opt -S -instcombine %s | FileCheck %s
+
+; CHECK-LABEL: julia_2xdouble
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <2 x double>
+define void @julia_2xdouble([2 x double]* sret, <2 x double>*) {
+top:
+ %x = load <2 x double>, <2 x double>* %1
+ %x0 = extractelement <2 x double> %x, i32 0
+ %i0 = insertvalue [2 x double] undef, double %x0, 0
+ %x1 = extractelement <2 x double> %x, i32 1
+ %i1 = insertvalue [2 x double] %i0, double %x1, 1
+ store [2 x double] %i1, [2 x double]* %0, align 4
+ ret void
+}
+
+; Test with two inserts to the same index
+; CHECK-LABEL: julia_2xi64
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <2 x i64>
+define void @julia_2xi64([2 x i64]* sret, <2 x i64>*) {
+top:
+ %x = load <2 x i64>, <2 x i64>* %1
+ %x0 = extractelement <2 x i64> %x, i32 1
+ %i0 = insertvalue [2 x i64] undef, i64 %x0, 0
+ %x1 = extractelement <2 x i64> %x, i32 1
+ %i1 = insertvalue [2 x i64] %i0, i64 %x1, 1
+ %x2 = extractelement <2 x i64> %x, i32 0
+ %i2 = insertvalue [2 x i64] %i1, i64 %x2, 0
+ store [2 x i64] %i2, [2 x i64]* %0, align 4
+ ret void
+}
+
+; CHECK-LABEL: julia_4xfloat
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <4 x float>
+define void @julia_4xfloat([4 x float]* sret, <4 x float>*) {
+top:
+ %x = load <4 x float>, <4 x float>* %1
+ %x0 = extractelement <4 x float> %x, i32 0
+ %i0 = insertvalue [4 x float] undef, float %x0, 0
+ %x1 = extractelement <4 x float> %x, i32 1
+ %i1 = insertvalue [4 x float] %i0, float %x1, 1
+ %x2 = extractelement <4 x float> %x, i32 2
+ %i2 = insertvalue [4 x float] %i1, float %x2, 2
+ %x3 = extractelement <4 x float> %x, i32 3
+ %i3 = insertvalue [4 x float] %i2, float %x3, 3
+ store [4 x float] %i3, [4 x float]* %0, align 4
+ ret void
+}
+
+%pseudovec = type { float, float, float, float }
+
+; CHECK-LABEL: julia_pseudovec
+; CHECK-NOT: insertvalue
+; CHECK-NOT: extractelement
+; CHECK: store <4 x float>
+define void @julia_pseudovec(%pseudovec* sret, <4 x float>*) {
+top:
+ %x = load <4 x float>, <4 x float>* %1
+ %x0 = extractelement <4 x float> %x, i32 0
+ %i0 = insertvalue %pseudovec undef, float %x0, 0
+ %x1 = extractelement <4 x float> %x, i32 1
+ %i1 = insertvalue %pseudovec %i0, float %x1, 1
+ %x2 = extractelement <4 x float> %x, i32 2
+ %i2 = insertvalue %pseudovec %i1, float %x2, 2
+ %x3 = extractelement <4 x float> %x, i32 3
+ %i3 = insertvalue %pseudovec %i2, float %x3, 3
+ store %pseudovec %i3, %pseudovec* %0, align 4
+ ret void
+}
diff --git a/test/Transforms/InstCombine/lifetime.ll b/test/Transforms/InstCombine/lifetime.ll
index e5cbe3401410..c296d29b99b9 100644
--- a/test/Transforms/InstCombine/lifetime.ll
+++ b/test/Transforms/InstCombine/lifetime.ll
@@ -64,11 +64,10 @@ fin:
!llvm.module.flags = !{!22, !23}
!llvm.ident = !{!24}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248826) (llvm/trunk 248827)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "test.cpp", directory: "/home/user")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!4 = distinct !DISubprogram(name: "bar", linkageName: "bar", scope: !1, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !8)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7}
!7 = !DIBasicType(name: "bool", size: 8, align: 8, encoding: DW_ATE_boolean)
diff --git a/test/Transforms/InstCombine/load-cmp.ll b/test/Transforms/InstCombine/load-cmp.ll
index fe1bf1517539..75952e01c19c 100644
--- a/test/Transforms/InstCombine/load-cmp.ll
+++ b/test/Transforms/InstCombine/load-cmp.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -instcombine -S -default-data-layout="p:32:32:32-p1:16:16:16-n8:16:32:64" < %s | FileCheck %s
@G16 = internal constant [10 x i16] [i16 35, i16 82, i16 69, i16 81, i16 85,
@@ -20,137 +21,150 @@
define i1 @test1(i32 %X) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 %X, 9
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16, i16* %P
%R = icmp eq i16 %Q, 0
ret i1 %R
-; CHECK-LABEL: @test1(
-; CHECK-NEXT: %R = icmp eq i32 %X, 9
-; CHECK-NEXT: ret i1 %R
}
define i1 @test1_noinbounds(i32 %X) {
+; CHECK-LABEL: @test1_noinbounds(
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 %X, 9
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16, i16* %P
%R = icmp eq i16 %Q, 0
ret i1 %R
-; CHECK-LABEL: @test1_noinbounds(
-; CHECK-NEXT: %R = icmp eq i32 %X, 9
-; CHECK-NEXT: ret i1 %R
}
define i1 @test1_noinbounds_i64(i64 %X) {
+; CHECK-LABEL: @test1_noinbounds_i64(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %X to i32
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr [10 x i16], [10 x i16]* @G16, i64 0, i64 %X
%Q = load i16, i16* %P
%R = icmp eq i16 %Q, 0
ret i1 %R
-; CHECK-LABEL: @test1_noinbounds_i64(
-; CHECK: %R = icmp eq i32 %1, 9
-; CHECK-NEXT: ret i1 %R
}
define i1 @test1_noinbounds_as1(i32 %x) {
+; CHECK-LABEL: @test1_noinbounds_as1(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %x to i16
+; CHECK-NEXT: [[R:%.*]] = icmp eq i16 [[TMP1]], 9
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr [10 x i16], [10 x i16] addrspace(1)* @G16_as1, i16 0, i32 %x
%q = load i16, i16 addrspace(1)* %p
%r = icmp eq i16 %q, 0
ret i1 %r
-; CHECK-LABEL: @test1_noinbounds_as1(
-; CHECK-NEXT: trunc i32 %x to i16
-; CHECK-NEXT: %r = icmp eq i16 %1, 9
-; CHECK-NEXT: ret i1 %r
}
define i1 @test2(i32 %X) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 %X, 4
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16, i16* %P
%R = icmp slt i16 %Q, 85
ret i1 %R
-; CHECK-LABEL: @test2(
-; CHECK-NEXT: %R = icmp ne i32 %X, 4
-; CHECK-NEXT: ret i1 %R
}
define i1 @test3(i32 %X) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 %X, 1
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
%Q = load double, double* %P
%R = fcmp oeq double %Q, 1.0
ret i1 %R
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: %R = icmp eq i32 %X, 1
-; CHECK-NEXT: ret i1 %R
}
define i1 @test4(i32 %X) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 933, %X
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], 1
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16, i16* %P
%R = icmp sle i16 %Q, 73
ret i1 %R
-; CHECK-LABEL: @test4(
-; CHECK-NEXT: lshr i32 933, %X
-; CHECK-NEXT: and i32 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
}
define i1 @test4_i16(i16 %X) {
+; CHECK-LABEL: @test4_i16(
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 %X to i32
+; CHECK-NEXT: [[TMP2:%.*]] = lshr i32 933, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i32 [[TMP2]], 1
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP3]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i16 %X
%Q = load i16, i16* %P
%R = icmp sle i16 %Q, 73
ret i1 %R
-; CHECK-LABEL: @test4_i16(
-; CHECK-NEXT: sext i16 %X to i32
-; CHECK-NEXT: lshr i32 933, %1
-; CHECK-NEXT: and i32 {{.*}}, 1
-; CHECK-NEXT: %R = icmp ne i32 {{.*}}, 0
-; CHECK-NEXT: ret i1 %R
}
define i1 @test5(i32 %X) {
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %X, 2
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 %X, 7
+; CHECK-NEXT: [[R:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16, i16* %P
%R = icmp eq i16 %Q, 69
ret i1 %R
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: icmp eq i32 %X, 2
-; CHECK-NEXT: icmp eq i32 %X, 7
-; CHECK-NEXT: %R = or i1
-; CHECK-NEXT: ret i1 %R
}
define i1 @test6(i32 %X) {
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 %X, -1
+; CHECK-NEXT: [[R:%.*]] = icmp ult i32 [[TMP1]], 3
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
%Q = load double, double* %P
%R = fcmp ogt double %Q, 0.0
ret i1 %R
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ult i32 {{.*}}, 3
-; CHECK-NEXT: ret i1 %R
}
define i1 @test7(i32 %X) {
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 %X, -1
+; CHECK-NEXT: [[R:%.*]] = icmp ugt i32 [[TMP1]], 2
+; CHECK-NEXT: ret i1 [[R]]
+;
%P = getelementptr inbounds [6 x double], [6 x double]* @GD, i32 0, i32 %X
%Q = load double, double* %P
%R = fcmp olt double %Q, 0.0
ret i1 %R
-; CHECK-LABEL: @test7(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: %R = icmp ugt i32 {{.*}}, 2
-; CHECK-NEXT: ret i1 %R
}
define i1 @test8(i32 %X) {
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %X, 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 9
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
%P = getelementptr inbounds [10 x i16], [10 x i16]* @G16, i32 0, i32 %X
%Q = load i16, i16* %P
%R = and i16 %Q, 3
%S = icmp eq i16 %R, 0
ret i1 %S
-; CHECK-LABEL: @test8(
-; CHECK-NEXT: or i32 %X, 1
-; CHECK-NEXT: icmp eq i32 {{.*}}, 9
-; CHECK-NEXT: ret i1
}
@GA = internal constant [4 x { i32, i32 } ] [
@@ -161,19 +175,21 @@ define i1 @test8(i32 %X) {
]
define i1 @test9(i32 %X) {
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 %X, -1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 2
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%P = getelementptr inbounds [4 x { i32, i32 } ], [4 x { i32, i32 } ]* @GA, i32 0, i32 %X, i32 1
%Q = load i32, i32* %P
%R = icmp eq i32 %Q, 1
ret i1 %R
-; CHECK-LABEL: @test9(
-; CHECK-NEXT: add i32 %X, -1
-; CHECK-NEXT: icmp ult i32 {{.*}}, 2
-; CHECK-NEXT: ret i1
}
define i1 @test10_struct(i32 %x) {
; CHECK-LABEL: @test10_struct(
-; CHECK: ret i1 false
+; CHECK-NEXT: ret i1 false
+;
%p = getelementptr inbounds %Foo, %Foo* @GS, i32 %x, i32 0
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
@@ -182,7 +198,11 @@ define i1 @test10_struct(i32 %x) {
define i1 @test10_struct_noinbounds(i32 %x) {
; CHECK-LABEL: @test10_struct_noinbounds(
-; CHECK: getelementptr %Foo, %Foo* @GS, i32 %x, i32 0
+; CHECK-NEXT: [[P:%.*]] = getelementptr %Foo, %Foo* @GS, i32 %x, i32 0
+; CHECK-NEXT: [[Q:%.*]] = load i32, i32* [[P]], align 8
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[Q]], 9
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr %Foo, %Foo* @GS, i32 %x, i32 0
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
@@ -193,7 +213,8 @@ define i1 @test10_struct_noinbounds(i32 %x) {
; Index < ptr size
define i1 @test10_struct_i16(i16 %x){
; CHECK-LABEL: @test10_struct_i16(
-; CHECK: ret i1 false
+; CHECK-NEXT: ret i1 false
+;
%p = getelementptr inbounds %Foo, %Foo* @GS, i16 %x, i32 0
%q = load i32, i32* %p
%r = icmp eq i32 %q, 0
@@ -204,7 +225,8 @@ define i1 @test10_struct_i16(i16 %x){
; Index > ptr size
define i1 @test10_struct_i64(i64 %x){
; CHECK-LABEL: @test10_struct_i64(
-; CHECK: ret i1 false
+; CHECK-NEXT: ret i1 false
+;
%p = getelementptr inbounds %Foo, %Foo* @GS, i64 %x, i32 0
%q = load i32, i32* %p
%r = icmp eq i32 %q, 0
@@ -213,8 +235,12 @@ define i1 @test10_struct_i64(i64 %x){
define i1 @test10_struct_noinbounds_i16(i16 %x) {
; CHECK-LABEL: @test10_struct_noinbounds_i16(
-; CHECK: %1 = sext i16 %x to i32
-; CHECK: getelementptr %Foo, %Foo* @GS, i32 %1, i32 0
+; CHECK-NEXT: [[TMP1:%.*]] = sext i16 %x to i32
+; CHECK-NEXT: [[P:%.*]] = getelementptr %Foo, %Foo* @GS, i32 [[TMP1]], i32 0
+; CHECK-NEXT: [[Q:%.*]] = load i32, i32* [[P]], align 8
+; CHECK-NEXT: [[R:%.*]] = icmp eq i32 [[Q]], 0
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr %Foo, %Foo* @GS, i16 %x, i32 0
%q = load i32, i32* %p
%r = icmp eq i32 %q, 0
@@ -223,8 +249,9 @@ define i1 @test10_struct_noinbounds_i16(i16 %x) {
define i1 @test10_struct_arr(i32 %x) {
; CHECK-LABEL: @test10_struct_arr(
-; CHECK-NEXT: %r = icmp ne i32 %x, 1
-; CHECK-NEXT: ret i1 %r
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 %x, 1
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
@@ -233,8 +260,9 @@ define i1 @test10_struct_arr(i32 %x) {
define i1 @test10_struct_arr_noinbounds(i32 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds(
-; CHECK-NEXT: %r = icmp ne i32 %x, 1
-; CHECK-NEXT: ret i1 %r
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 %x, 1
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i32 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
@@ -243,8 +271,9 @@ define i1 @test10_struct_arr_noinbounds(i32 %x) {
define i1 @test10_struct_arr_i16(i16 %x) {
; CHECK-LABEL: @test10_struct_arr_i16(
-; CHECK-NEXT: %r = icmp ne i16 %x, 1
-; CHECK-NEXT: ret i1 %r
+; CHECK-NEXT: [[R:%.*]] = icmp ne i16 %x, 1
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i16 0, i16 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
@@ -253,9 +282,10 @@ define i1 @test10_struct_arr_i16(i16 %x) {
define i1 @test10_struct_arr_i64(i64 %x) {
; CHECK-LABEL: @test10_struct_arr_i64(
-; CHECK-NEXT: trunc i64 %x to i32
-; CHECK-NEXT: %r = icmp ne i32 %1, 1
-; CHECK-NEXT: ret i1 %r
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %x to i32
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr inbounds [4 x %Foo], [4 x %Foo]* @GStructArr, i64 0, i64 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
@@ -264,7 +294,9 @@ define i1 @test10_struct_arr_i64(i64 %x) {
define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds_i16(
-; CHECK-NEXT: %r = icmp ne i16 %x, 1
+; CHECK-NEXT: [[R:%.*]] = icmp ne i16 %x, 1
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i16 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
@@ -273,8 +305,10 @@ define i1 @test10_struct_arr_noinbounds_i16(i16 %x) {
define i1 @test10_struct_arr_noinbounds_i64(i64 %x) {
; CHECK-LABEL: @test10_struct_arr_noinbounds_i64(
-; CHECK: %r = icmp ne i32 %1, 1
-; CHECK-NEXT: ret i1 %r
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i64 %x to i32
+; CHECK-NEXT: [[R:%.*]] = icmp ne i32 [[TMP1]], 1
+; CHECK-NEXT: ret i1 [[R]]
+;
%p = getelementptr [4 x %Foo], [4 x %Foo]* @GStructArr, i32 0, i64 %x, i32 2
%q = load i32, i32* %p
%r = icmp eq i32 %q, 9
diff --git a/test/Transforms/InstCombine/logical-select.ll b/test/Transforms/InstCombine/logical-select.ll
index f8c06768453d..7d3769fc13dd 100644
--- a/test/Transforms/InstCombine/logical-select.ll
+++ b/test/Transforms/InstCombine/logical-select.ll
@@ -1,7 +1,13 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[J]]
+;
%e = icmp slt i32 %a, %b
%f = sext i1 %e to i32
%g = and i32 %c, %f
@@ -9,11 +15,14 @@ define i32 @foo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
%i = and i32 %d, %h
%j = or i32 %g, %i
ret i32 %j
-; CHECK: %e = icmp slt i32 %a, %b
-; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
-; CHECK-NEXT: ret i32 [[result]]
}
-define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
+
+define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: [[E:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT: [[J:%.*]] = select i1 [[E]], i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[J]]
+;
%e = icmp slt i32 %a, %b
%f = sext i1 %e to i32
%g = and i32 %c, %f
@@ -21,48 +30,419 @@ define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
%i = and i32 %d, %h
%j = or i32 %i, %g
ret i32 %j
-; CHECK: %e = icmp slt i32 %a, %b
-; CHECK-NEXT: [[result:%.*]] = select i1 %e, i32 %c, i32 %d
-; CHECK-NEXT: ret i32 [[result]]
}
-define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-entry:
- %0 = icmp slt i32 %a, %b
- %iftmp.0.0 = select i1 %0, i32 -1, i32 0
- %1 = and i32 %iftmp.0.0, %c
+define i32 @goo(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @goo(
+; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[T3]]
+;
+ %t0 = icmp slt i32 %a, %b
+ %iftmp.0.0 = select i1 %t0, i32 -1, i32 0
+ %t1 = and i32 %iftmp.0.0, %c
%not = xor i32 %iftmp.0.0, -1
- %2 = and i32 %not, %d
- %3 = or i32 %1, %2
- ret i32 %3
-; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
-; CHECK-NEXT: ret i32 [[result]]
-}
-define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-entry:
- %0 = icmp slt i32 %a, %b
- %iftmp.0.0 = select i1 %0, i32 -1, i32 0
- %1 = and i32 %iftmp.0.0, %c
- %iftmp = select i1 %0, i32 0, i32 -1
- %2 = and i32 %iftmp, %d
- %3 = or i32 %1, %2
- ret i32 %3
-; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
-; CHECK-NEXT: ret i32 [[result]]
-}
-
-define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) nounwind {
-entry:
- %0 = icmp slt i32 %a, %b
- %iftmp.1.0 = select i1 %0, i32 -1, i32 0
- %1 = and i32 %iftmp.1.0, %c
+ %t2 = and i32 %not, %d
+ %t3 = or i32 %t1, %t2
+ ret i32 %t3
+}
+
+define i32 @poo(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @poo(
+; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[T3]]
+;
+ %t0 = icmp slt i32 %a, %b
+ %iftmp.0.0 = select i1 %t0, i32 -1, i32 0
+ %t1 = and i32 %iftmp.0.0, %c
+ %iftmp = select i1 %t0, i32 0, i32 -1
+ %t2 = and i32 %iftmp, %d
+ %t3 = or i32 %t1, %t2
+ ret i32 %t3
+}
+
+define i32 @par(i32 %a, i32 %b, i32 %c, i32 %d) {
+; CHECK-LABEL: @par(
+; CHECK-NEXT: [[T0:%.*]] = icmp slt i32 %a, %b
+; CHECK-NEXT: [[T3:%.*]] = select i1 [[T0]], i32 %c, i32 %d
+; CHECK-NEXT: ret i32 [[T3]]
+;
+ %t0 = icmp slt i32 %a, %b
+ %iftmp.1.0 = select i1 %t0, i32 -1, i32 0
+ %t1 = and i32 %iftmp.1.0, %c
%not = xor i32 %iftmp.1.0, -1
- %2 = and i32 %not, %d
- %3 = or i32 %1, %2
- ret i32 %3
-; CHECK: %0 = icmp slt i32 %a, %b
-; CHECK-NEXT: [[result:%.*]] = select i1 %0, i32 %c, i32 %d
-; CHECK-NEXT: ret i32 [[result]]
+ %t2 = and i32 %not, %d
+ %t3 = or i32 %t1, %t2
+ ret i32 %t3
+}
+
+; In the following tests (8 commutation variants), verify that a bitcast doesn't get
+; in the way of a select transform. These bitcasts are common in SSE/AVX and possibly
+; other vector code because of canonicalization to i64 elements for vectors.
+
+; The fptosi instructions are included to avoid commutation canonicalization based on
+; operator weight. Using another cast operator ensures that both operands of all logic
+; ops are equally weighted, and this ensures that we're testing all commutation
+; possibilities.
+
+define <2 x i64> @bitcast_select_swap0(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap0(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %bc1, %sia
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %bc2, %sib
+ %or = or <2 x i64> %and1, %and2
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap1(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap1(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %bc1, %sia
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %bc2, %sib
+ %or = or <2 x i64> %and2, %and1
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap2(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap2(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %bc1, %sia
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %sib, %bc2
+ %or = or <2 x i64> %and1, %and2
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap3(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap3(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %bc1, %sia
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %sib, %bc2
+ %or = or <2 x i64> %and2, %and1
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap4(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap4(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %sia, %bc1
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %bc2, %sib
+ %or = or <2 x i64> %and1, %and2
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap5(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap5(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %sia, %bc1
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %bc2, %sib
+ %or = or <2 x i64> %and2, %and1
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap6(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap6(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %sia, %bc1
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %sib, %bc2
+ %or = or <2 x i64> %and1, %and2
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_swap7(<4 x i1> %cmp, <2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @bitcast_select_swap7(
+; CHECK-NEXT: [[SIA:%.*]] = fptosi <2 x double> %a to <2 x i64>
+; CHECK-NEXT: [[SIB:%.*]] = fptosi <2 x double> %b to <2 x i64>
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> [[SIA]] to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <2 x i64> [[SIB]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %cmp, <4 x i32> [[TMP1]], <4 x i32> [[TMP2]]
+; CHECK-NEXT: [[OR:%.*]] = bitcast <4 x i32> [[TMP3]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[OR]]
+;
+ %sia = fptosi <2 x double> %a to <2 x i64>
+ %sib = fptosi <2 x double> %b to <2 x i64>
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %sia, %bc1
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %sib, %bc2
+ %or = or <2 x i64> %and2, %and1
+ ret <2 x i64> %or
+}
+
+define <2 x i64> @bitcast_select_multi_uses(<4 x i1> %cmp, <2 x i64> %a, <2 x i64> %b) {
+; CHECK-LABEL: @bitcast_select_multi_uses(
+; CHECK-NEXT: [[SEXT:%.*]] = sext <4 x i1> %cmp to <4 x i32>
+; CHECK-NEXT: [[BC1:%.*]] = bitcast <4 x i32> [[SEXT]] to <2 x i64>
+; CHECK-NEXT: [[AND1:%.*]] = and <2 x i64> [[BC1]], %a
+; CHECK-NEXT: [[NEG:%.*]] = xor <4 x i32> [[SEXT]], <i32 -1, i32 -1, i32 -1, i32 -1>
+; CHECK-NEXT: [[BC2:%.*]] = bitcast <4 x i32> [[NEG]] to <2 x i64>
+; CHECK-NEXT: [[AND2:%.*]] = and <2 x i64> [[BC2]], %b
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i64> [[AND2]], [[AND1]]
+; CHECK-NEXT: [[ADD:%.*]] = add <2 x i64> [[AND2]], [[BC2]]
+; CHECK-NEXT: [[SUB:%.*]] = sub <2 x i64> [[OR]], [[ADD]]
+; CHECK-NEXT: ret <2 x i64> [[SUB]]
+;
+ %sext = sext <4 x i1> %cmp to <4 x i32>
+ %bc1 = bitcast <4 x i32> %sext to <2 x i64>
+ %and1 = and <2 x i64> %a, %bc1
+ %neg = xor <4 x i32> %sext, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %bc2 = bitcast <4 x i32> %neg to <2 x i64>
+ %and2 = and <2 x i64> %b, %bc2
+ %or = or <2 x i64> %and2, %and1
+ %add = add <2 x i64> %and2, %bc2
+ %sub = sub <2 x i64> %or, %add
+ ret <2 x i64> %sub
+}
+
+define i1 @bools(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: @bools(
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 %c, i1 %b, i1 %a
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %not = xor i1 %c, -1
+ %and1 = and i1 %not, %a
+ %and2 = and i1 %c, %b
+ %or = or i1 %and1, %and2
+ ret i1 %or
}
+
+; Form a select if we know we can get replace 2 simple logic ops.
+
+define i1 @bools_multi_uses1(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: @bools_multi_uses1(
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 %c, true
+; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], %a
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 %c, i1 %b, i1 %a
+; CHECK-NEXT: [[XOR:%.*]] = xor i1 [[TMP1]], [[AND1]]
+; CHECK-NEXT: ret i1 [[XOR]]
+;
+ %not = xor i1 %c, -1
+ %and1 = and i1 %not, %a
+ %and2 = and i1 %c, %b
+ %or = or i1 %and1, %and2
+ %xor = xor i1 %or, %and1
+ ret i1 %xor
+}
+
+; Don't replace a cheap logic op with a potentially expensive select
+; unless we can also eliminate one of the other original ops.
+
+define i1 @bools_multi_uses2(i1 %a, i1 %b, i1 %c) {
+; CHECK-LABEL: @bools_multi_uses2(
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 %c, true
+; CHECK-NEXT: [[AND1:%.*]] = and i1 [[NOT]], %a
+; CHECK-NEXT: [[AND2:%.*]] = and i1 %c, %b
+; CHECK-NEXT: [[ADD:%.*]] = xor i1 [[AND1]], [[AND2]]
+; CHECK-NEXT: ret i1 [[ADD]]
+;
+ %not = xor i1 %c, -1
+ %and1 = and i1 %not, %a
+ %and2 = and i1 %c, %b
+ %or = or i1 %and1, %and2
+ %add = add i1 %and1, %and2
+ %and3 = and i1 %or, %add
+ ret i1 %and3
+}
+
+define <4 x i1> @vec_of_bools(<4 x i1> %a, <4 x i1> %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_of_bools(
+; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> %c, <4 x i1> %b, <4 x i1> %a
+; CHECK-NEXT: ret <4 x i1> [[TMP1]]
+;
+ %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
+ %and1 = and <4 x i1> %not, %a
+ %and2 = and <4 x i1> %b, %c
+ %or = or <4 x i1> %and2, %and1
+ ret <4 x i1> %or
+}
+
+define i4 @vec_of_casted_bools(i4 %a, i4 %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_of_casted_bools(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast i4 %a to <4 x i1>
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast i4 %b to <4 x i1>
+; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> %c, <4 x i1> [[TMP2]], <4 x i1> [[TMP1]]
+; CHECK-NEXT: [[TMP4:%.*]] = bitcast <4 x i1> [[TMP3]] to i4
+; CHECK-NEXT: ret i4 [[TMP4]]
+;
+ %not = xor <4 x i1> %c, <i1 true, i1 true, i1 true, i1 true>
+ %bc1 = bitcast <4 x i1> %not to i4
+ %bc2 = bitcast <4 x i1> %c to i4
+ %and1 = and i4 %a, %bc1
+ %and2 = and i4 %bc2, %b
+ %or = or i4 %and1, %and2
+ ret i4 %or
+}
+
+; Inverted 'and' constants mean this is a select.
+
+define <4 x i32> @vec_sel_consts(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_sel_consts(
+; CHECK-NEXT: [[TMP1:%.*]] = select <4 x i1> <i1 true, i1 false, i1 false, i1 true>, <4 x i32> %a, <4 x i32> %b
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 -1>
+ %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 -1, i32 0>
+ %or = or <4 x i32> %and1, %and2
+ ret <4 x i32> %or
+}
+
+; The select condition constant is always derived from the first operand of the 'or'.
+
+define <3 x i129> @vec_sel_consts_weird(<3 x i129> %a, <3 x i129> %b) {
+; CHECK-LABEL: @vec_sel_consts_weird(
+; CHECK-NEXT: [[TMP1:%.*]] = select <3 x i1> <i1 false, i1 true, i1 false>, <3 x i129> %b, <3 x i129> %a
+; CHECK-NEXT: ret <3 x i129> [[TMP1]]
+;
+ %and1 = and <3 x i129> %a, <i129 -1, i129 0, i129 -1>
+ %and2 = and <3 x i129> %b, <i129 0, i129 -1, i129 0>
+ %or = or <3 x i129> %and2, %and1
+ ret <3 x i129> %or
+}
+
+; The mask elements must be inverted for this to be a select.
+
+define <4 x i32> @vec_not_sel_consts(<4 x i32> %a, <4 x i32> %b) {
+; CHECK-LABEL: @vec_not_sel_consts(
+; CHECK-NEXT: [[AND1:%.*]] = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: [[AND2:%.*]] = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1>
+; CHECK-NEXT: [[OR:%.*]] = or <4 x i32> [[AND1]], [[AND2]]
+; CHECK-NEXT: ret <4 x i32> [[OR]]
+;
+ %and1 = and <4 x i32> %a, <i32 -1, i32 0, i32 0, i32 0>
+ %and2 = and <4 x i32> %b, <i32 0, i32 -1, i32 0, i32 -1>
+ %or = or <4 x i32> %and1, %and2
+ ret <4 x i32> %or
+}
+
+; The inverted constants may be operands of xor instructions.
+
+define <4 x i32> @vec_sel_xor(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_sel_xor(
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, <i1 false, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
+ %mask = sext <4 x i1> %c to <4 x i32>
+ %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
+ %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
+ %and1 = and <4 x i32> %not_mask_flip1, %a
+ %and2 = and <4 x i32> %mask_flip1, %b
+ %or = or <4 x i32> %and1, %and2
+ ret <4 x i32> %or
+}
+
+; Allow the transform even if the mask values have multiple uses because
+; there's still a net reduction of instructions from removing the and/and/or.
+
+define <4 x i32> @vec_sel_xor_multi_use(<4 x i32> %a, <4 x i32> %b, <4 x i1> %c) {
+; CHECK-LABEL: @vec_sel_xor_multi_use(
+; CHECK-NEXT: [[MASK:%.*]] = sext <4 x i1> %c to <4 x i32>
+; CHECK-NEXT: [[MASK_FLIP1:%.*]] = xor <4 x i32> [[MASK]], <i32 -1, i32 0, i32 0, i32 0>
+; CHECK-NEXT: [[TMP1:%.*]] = xor <4 x i1> %c, <i1 false, i1 true, i1 true, i1 true>
+; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> %a, <4 x i32> %b
+; CHECK-NEXT: [[ADD:%.*]] = add <4 x i32> [[TMP2]], [[MASK_FLIP1]]
+; CHECK-NEXT: ret <4 x i32> [[ADD]]
+;
+ %mask = sext <4 x i1> %c to <4 x i32>
+ %mask_flip1 = xor <4 x i32> %mask, <i32 -1, i32 0, i32 0, i32 0>
+ %not_mask_flip1 = xor <4 x i32> %mask, <i32 0, i32 -1, i32 -1, i32 -1>
+ %and1 = and <4 x i32> %not_mask_flip1, %a
+ %and2 = and <4 x i32> %mask_flip1, %b
+ %or = or <4 x i32> %and1, %and2
+ %add = add <4 x i32> %or, %mask_flip1
+ ret <4 x i32> %add
+}
+
diff --git a/test/Transforms/InstCombine/masked_intrinsics.ll b/test/Transforms/InstCombine/masked_intrinsics.ll
new file mode 100644
index 000000000000..ce79ce56b5cb
--- /dev/null
+++ b/test/Transforms/InstCombine/masked_intrinsics.ll
@@ -0,0 +1,66 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+declare <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptrs, i32, <2 x i1> %mask, <2 x double> %src0)
+declare void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptrs, i32, <2 x i1> %mask)
+declare <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32, <2 x i1> %mask, <2 x double> %passthru)
+declare void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32, <2 x i1> %mask)
+
+define <2 x double> @load_zeromask(<2 x double>* %ptr, <2 x double> %passthru) {
+ %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 1, <2 x i1> zeroinitializer, <2 x double> %passthru)
+ ret <2 x double> %res
+
+; CHECK-LABEL: @load_zeromask(
+; CHECK-NEXT: ret <2 x double> %passthru
+}
+
+define <2 x double> @load_onemask(<2 x double>* %ptr, <2 x double> %passthru) {
+ %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 1>, <2 x double> %passthru)
+ ret <2 x double> %res
+
+; CHECK-LABEL: @load_onemask(
+; CHECK-NEXT: %unmaskedload = load <2 x double>, <2 x double>* %ptr, align 2
+; CHECK-NEXT: ret <2 x double> %unmaskedload
+}
+
+define <2 x double> @load_undefmask(<2 x double>* %ptr, <2 x double> %passthru) {
+ %res = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %ptr, i32 2, <2 x i1> <i1 1, i1 undef>, <2 x double> %passthru)
+ ret <2 x double> %res
+
+; CHECK-LABEL: @load_undefmask(
+; CHECK-NEXT: %unmaskedload = load <2 x double>, <2 x double>* %ptr, align 2
+; CHECK-NEXT: ret <2 x double> %unmaskedload
+}
+
+define void @store_zeromask(<2 x double>* %ptr, <2 x double> %val) {
+ call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 3, <2 x i1> zeroinitializer)
+ ret void
+
+; CHECK-LABEL: @store_zeromask(
+; CHECK-NEXT: ret void
+}
+
+define void @store_onemask(<2 x double>* %ptr, <2 x double> %val) {
+ call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %val, <2 x double>* %ptr, i32 4, <2 x i1> <i1 1, i1 1>)
+ ret void
+
+; CHECK-LABEL: @store_onemask(
+; CHECK-NEXT: store <2 x double> %val, <2 x double>* %ptr, align 4
+; CHECK-NEXT: ret void
+}
+
+define <2 x double> @gather_zeromask(<2 x double*> %ptrs, <2 x double> %passthru) {
+ %res = call <2 x double> @llvm.masked.gather.v2f64(<2 x double*> %ptrs, i32 5, <2 x i1> zeroinitializer, <2 x double> %passthru)
+ ret <2 x double> %res
+
+; CHECK-LABEL: @gather_zeromask(
+; CHECK-NEXT: ret <2 x double> %passthru
+}
+
+define void @scatter_zeromask(<2 x double*> %ptrs, <2 x double> %val) {
+ call void @llvm.masked.scatter.v2f64(<2 x double> %val, <2 x double*> %ptrs, i32 6, <2 x i1> zeroinitializer)
+ ret void
+
+; CHECK-LABEL: @scatter_zeromask(
+; CHECK-NEXT: ret void
+}
+
diff --git a/test/Transforms/InstCombine/mem-gep-zidx.ll b/test/Transforms/InstCombine/mem-gep-zidx.ll
index cf021b133703..4499051b2552 100644
--- a/test/Transforms/InstCombine/mem-gep-zidx.ll
+++ b/test/Transforms/InstCombine/mem-gep-zidx.ll
@@ -4,6 +4,7 @@ target triple = "powerpc64-unknown-linux-gnu"
@f.a = private unnamed_addr constant [1 x i32] [i32 12], align 4
@f.b = private unnamed_addr constant [1 x i32] [i32 55], align 4
+@f.c = linkonce unnamed_addr alias [1 x i32], [1 x i32]* @f.b
define signext i32 @test1(i32 signext %x) #0 {
entry:
@@ -44,5 +45,16 @@ entry:
; CHECK: getelementptr inbounds [1 x i32], [1 x i32]* %p, i64 0, i64 0
}
+define signext i32 @test4(i32 signext %x, i1 %y) #0 {
+entry:
+ %idxprom = sext i32 %x to i64
+ %arrayidx = getelementptr inbounds [1 x i32], [1 x i32]* @f.c, i64 0, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ ret i32 %0
+
+; CHECK-LABEL: @test4
+; CHECK: getelementptr inbounds [1 x i32], [1 x i32]* @f.c, i64 0, i64 %idxprom
+}
+
attributes #0 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/memchr.ll b/test/Transforms/InstCombine/memchr.ll
index 216dba874ccb..b0573567bf60 100644
--- a/test/Transforms/InstCombine/memchr.ll
+++ b/test/Transforms/InstCombine/memchr.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; Test that the memchr library call simplifier works correctly.
; RUN: opt < %s -instcombine -S | FileCheck %s
@@ -15,11 +16,10 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
declare i8* @memchr(i8*, i32, i32)
define void @test1() {
-; CHECK-LABEL: @test1
-; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6)
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
%dst = call i8* @memchr(i8* %str, i32 119, i32 14)
store i8* %dst, i8** @chp
@@ -27,11 +27,10 @@ define void @test1() {
}
define void @test2() {
-; CHECK-LABEL: @test2
-; CHECK: store i8* null, i8** @chp, align 4
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: store i8* null, i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%str = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
%dst = call i8* @memchr(i8* %str, i32 119, i32 1)
store i8* %dst, i8** @chp
@@ -39,11 +38,10 @@ define void @test2() {
}
define void @test3() {
-; CHECK-LABEL: @test3
-; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13)
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13), i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
%dst = call i8* @memchr(i8* %src, i32 0, i32 14)
store i8* %dst, i8** @chp
@@ -51,11 +49,11 @@ define void @test3() {
}
define void @test4(i32 %chr) {
-; CHECK-LABEL: @test4
-; CHECK: call i8* @memchr
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[DST:%.*]] = call i8* @memchr(i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 0), i32 %chr, i32 14)
+; CHECK-NEXT: store i8* [[DST]], i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
%dst = call i8* @memchr(i8* %src, i32 %chr, i32 14)
store i8* %dst, i8** @chp
@@ -63,11 +61,10 @@ define void @test4(i32 %chr) {
}
define void @test5() {
-; CHECK-LABEL: @test5
-; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13)
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 13), i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
%dst = call i8* @memchr(i8* %src, i32 65280, i32 14)
store i8* %dst, i8** @chp
@@ -75,11 +72,10 @@ define void @test5() {
}
define void @test6() {
-; CHECK-LABEL: @test6
-; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6)
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hello, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
; Overflow, but we still find the right thing.
%dst = call i8* @memchr(i8* %src, i32 119, i32 100)
@@ -88,11 +84,10 @@ define void @test6() {
}
define void @test7() {
-; CHECK-LABEL: @test7
-; CHECK: store i8* null, i8** @chp, align 4
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: store i8* null, i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%src = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
; Overflow
%dst = call i8* @memchr(i8* %src, i32 120, i32 100)
@@ -101,11 +96,10 @@ define void @test7() {
}
define void @test8() {
-; CHECK-LABEL: @test8
-; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hellonull, i32 0, i32 6)
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hellonull, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%str = getelementptr [14 x i8], [14 x i8]* @hellonull, i32 0, i32 0
%dst = call i8* @memchr(i8* %str, i32 119, i32 14)
store i8* %dst, i8** @chp
@@ -113,11 +107,10 @@ define void @test8() {
}
define void @test9() {
-; CHECK-LABEL: @test9
-; CHECK: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hellonull, i32 0, i32 6)
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: store i8* getelementptr inbounds ([14 x i8], [14 x i8]* @hellonull, i32 0, i32 6), i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%str = getelementptr [14 x i8], [14 x i8]* @hellonull, i32 0, i32 2
%dst = call i8* @memchr(i8* %str, i32 119, i32 12)
store i8* %dst, i8** @chp
@@ -125,11 +118,10 @@ define void @test9() {
}
define void @test10() {
-; CHECK-LABEL: @test10
-; CHECK: store i8* null, i8** @chp, align 4
-; CHECK-NOT: call i8* @memchr
-; CHECK: ret void
-
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: store i8* null, i8** @chp, align 4
+; CHECK-NEXT: ret void
+;
%str = getelementptr [14 x i8], [14 x i8]* @hello, i32 0, i32 0
%dst = call i8* @memchr(i8* %str, i32 119, i32 6)
store i8* %dst, i8** @chp
@@ -138,15 +130,15 @@ define void @test10() {
; Check transformation memchr("\r\n", C, 2) != nullptr -> (C & 9216) != 0
define i1 @test11(i32 %C) {
-; CHECK-LABEL: @test11
-; CHECK-NEXT: [[TRUNC:%.*]] = trunc i32 %C to i16
-; CHECK-NEXT: %memchr.bounds = icmp ult i16 [[TRUNC]], 16
-; CHECK-NEXT: [[SHL:%.*]] = shl i16 1, [[TRUNC]]
-; CHECK-NEXT: [[AND:%.*]] = and i16 [[SHL]], 9216
-; CHECK-NEXT: %memchr.bits = icmp ne i16 [[AND]], 0
-; CHECK-NEXT: %memchr = and i1 %memchr.bounds, %memchr.bits
-; CHECK-NEXT: ret i1 %memchr
-
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: [[TMP1:%.*]] = trunc i32 %C to i16
+; CHECK-NEXT: [[MEMCHR_BOUNDS:%.*]] = icmp ult i16 [[TMP1]], 16
+; CHECK-NEXT: [[TMP2:%.*]] = shl i16 1, [[TMP1]]
+; CHECK-NEXT: [[TMP3:%.*]] = and i16 [[TMP2]], 9216
+; CHECK-NEXT: [[MEMCHR_BITS:%.*]] = icmp ne i16 [[TMP3]], 0
+; CHECK-NEXT: [[MEMCHR:%.*]] = and i1 [[MEMCHR:%.*]].bounds, [[MEMCHR:%.*]].bits
+; CHECK-NEXT: ret i1 [[MEMCHR]]
+;
%dst = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @newlines, i64 0, i64 0), i32 %C, i32 2)
%cmp = icmp ne i8* %dst, null
ret i1 %cmp
@@ -154,46 +146,46 @@ define i1 @test11(i32 %C) {
; No 64 bits here
define i1 @test12(i32 %C) {
-; CHECK-LABEL: @test12
-; CHECK-NEXT: %dst = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i32 0, i32 0), i32 %C, i32 3)
-; CHECK-NEXT: %cmp = icmp ne i8* %dst, null
-; CHECK-NEXT: ret i1 %cmp
-
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: [[DST:%.*]] = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i32 0, i32 0), i32 %C, i32 3)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[DST]], null
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%dst = call i8* @memchr(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @spaces, i64 0, i64 0), i32 %C, i32 3)
%cmp = icmp ne i8* %dst, null
ret i1 %cmp
}
define i1 @test13(i32 %C) {
-; CHECK-LABEL: @test13
-; CHECK-NEXT: %memchr.bounds = icmp ult i32 %C, 32
-; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, %C
-; CHECK-NEXT: [[AND:%.*]] = and i32 [[SHL]], -2147483647
-; CHECK-NEXT: %memchr.bits = icmp ne i32 [[AND]], 0
-; CHECK-NEXT: %memchr = and i1 %memchr.bounds, %memchr.bits
-; CHECK-NEXT: ret i1 %memchr
-
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[MEMCHR_BOUNDS:%.*]] = icmp ult i32 %C, 32
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 1, %C
+; CHECK-NEXT: [[TMP2:%.*]] = and i32 [[TMP1]], -2147483647
+; CHECK-NEXT: [[MEMCHR_BITS:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK-NEXT: [[MEMCHR:%.*]] = and i1 [[MEMCHR:%.*]].bounds, [[MEMCHR:%.*]].bits
+; CHECK-NEXT: ret i1 [[MEMCHR]]
+;
%dst = call i8* @memchr(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @single, i64 0, i64 0), i32 %C, i32 2)
%cmp = icmp ne i8* %dst, null
ret i1 %cmp
}
define i1 @test14(i32 %C) {
-; CHECK-LABEL: @test14
-; CHECK-NEXT: icmp eq i32 %C, 31
-; CHECK-NEXT: ret
-
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32 %C, 31
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%dst = call i8* @memchr(i8* getelementptr inbounds ([2 x i8], [2 x i8]* @single, i64 0, i64 0), i32 %C, i32 1)
%cmp = icmp ne i8* %dst, null
ret i1 %cmp
}
define i1 @test15(i32 %C) {
-; CHECK-LABEL: @test15
-; CHECK-NEXT: %dst = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @negative, i32 0, i32 0), i32 %C, i32 3)
-; CHECK-NEXT: %cmp = icmp ne i8* %dst, null
-; CHECK-NEXT: ret i1 %cmp
-
+; CHECK-LABEL: @test15(
+; CHECK-NEXT: [[DST:%.*]] = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @negative, i32 0, i32 0), i32 %C, i32 3)
+; CHECK-NEXT: [[CMP:%.*]] = icmp ne i8* [[DST]], null
+; CHECK-NEXT: ret i1 [[CMP]]
+;
%dst = call i8* @memchr(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @negative, i64 0, i64 0), i32 %C, i32 3)
%cmp = icmp ne i8* %dst, null
ret i1 %cmp
diff --git a/test/Transforms/InstCombine/memset-1.ll b/test/Transforms/InstCombine/memset-1.ll
index 991567d6b597..7310e5f4faf8 100644
--- a/test/Transforms/InstCombine/memset-1.ll
+++ b/test/Transforms/InstCombine/memset-1.ll
@@ -15,3 +15,51 @@ define i8* @test_simplify1(i8* %mem, i32 %val, i32 %size) {
ret i8* %ret
; CHECK: ret i8* %mem
}
+
+define i8* @pr25892_lite(i32 %size) #0 {
+ %call1 = call i8* @malloc(i32 %size) #1
+ %call2 = call i8* @memset(i8* %call1, i32 0, i32 %size) #1
+ ret i8* %call2
+
+; CHECK-LABEL: @pr25892_lite(
+; CHECK-NEXT: %calloc = call i8* @calloc(i32 1, i32 %size)
+; CHECK-NEXT: ret i8* %calloc
+}
+
+; FIXME: memset(malloc(x), 0, x) -> calloc(1, x)
+; This doesn't fire currently because the malloc has more than one use.
+
+define float* @pr25892(i32 %size) #0 {
+entry:
+ %call = tail call i8* @malloc(i32 %size) #1
+ %cmp = icmp eq i8* %call, null
+ br i1 %cmp, label %cleanup, label %if.end
+if.end:
+ %bc = bitcast i8* %call to float*
+ %call2 = tail call i8* @memset(i8* nonnull %call, i32 0, i32 %size) #1
+ br label %cleanup
+cleanup:
+ %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+ ret float* %retval.0
+
+; CHECK-LABEL: @pr25892(
+; CHECK: entry:
+; CHECK-NEXT: %call = tail call i8* @malloc(i32 %size) #1
+; CHECK-NEXT: %cmp = icmp eq i8* %call, null
+; CHECK-NEXT: br i1 %cmp, label %cleanup, label %if.end
+; CHECK: if.end:
+; CHECK-NEXT: %bc = bitcast i8* %call to float*
+; CHECK-NEXT: call void @llvm.memset.p0i8.i32(i8* nonnull %call, i8 0, i32 %size, i32 1, i1 false)
+; CHECK-NEXT: br label %cleanup
+; CHECK: cleanup:
+; CHECK-NEXT: %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+; CHECK-NEXT: ret float* %retval.0
+}
+
+declare noalias i8* @malloc(i32) #1
+declare i64 @llvm.objectsize.i64.p0i8(i8*, i1) #2
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
+
diff --git a/test/Transforms/InstCombine/memset_chk-1.ll b/test/Transforms/InstCombine/memset_chk-1.ll
index 56ea14c8292d..9d08e96cb49b 100644
--- a/test/Transforms/InstCombine/memset_chk-1.ll
+++ b/test/Transforms/InstCombine/memset_chk-1.ll
@@ -90,3 +90,41 @@ declare i64 @strlen(i8* nocapture)
declare i64 @llvm.objectsize.i64.p0i8(i8*, i1)
declare i8* @__memset_chk(i8*, i32, i64, i64)
+
+; FIXME: memset(malloc(x), 0, x) -> calloc(1, x)
+
+define float* @pr25892(i64 %size) #0 {
+entry:
+ %call = tail call i8* @malloc(i64 %size) #1
+ %cmp = icmp eq i8* %call, null
+ br i1 %cmp, label %cleanup, label %if.end
+if.end:
+ %bc = bitcast i8* %call to float*
+ %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false)
+ %call3 = tail call i8* @__memset_chk(i8* nonnull %call, i32 0, i64 %size, i64 %call2) #1
+ br label %cleanup
+cleanup:
+ %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+ ret float* %retval.0
+
+; CHECK-LABEL: @pr25892(
+; CHECK: entry:
+; CHECK-NEXT: %call = tail call i8* @malloc(i64 %size)
+; CHECK-NEXT: %cmp = icmp eq i8* %call, null
+; CHECK-NEXT: br i1 %cmp, label %cleanup, label %if.end
+; CHECK: if.end:
+; CHECK-NEXT: %bc = bitcast i8* %call to float*
+; CHECK-NEXT: %call2 = tail call i64 @llvm.objectsize.i64.p0i8(i8* nonnull %call, i1 false)
+; CHECK-NEXT: %call3 = tail call i8* @__memset_chk(i8* nonnull %call, i32 0, i64 %size, i64 %call2)
+; CHECK-NEXT: br label %cleanup
+; CHECK: cleanup:
+; CHECK-NEXT: %retval.0 = phi float* [ %bc, %if.end ], [ null, %entry ]
+; CHECK-NEXT: ret float* %retval.0
+}
+
+declare noalias i8* @malloc(i64) #1
+
+attributes #0 = { nounwind ssp uwtable }
+attributes #1 = { nounwind }
+attributes #2 = { nounwind readnone }
+
diff --git a/test/Transforms/InstCombine/min-positive.ll b/test/Transforms/InstCombine/min-positive.ll
new file mode 100644
index 000000000000..9bbdb2944a32
--- /dev/null
+++ b/test/Transforms/InstCombine/min-positive.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+@g = external global i32
+
+define i1 @test(i32 %other) {
+; CHECK-LABEL: @test
+; CHECK: %test = icmp sgt i32 %other, 0
+ %positive = load i32, i32* @g, !range !{i32 1, i32 2048}
+ %cmp = icmp slt i32 %positive, %other
+ %sel = select i1 %cmp, i32 %positive, i32 %other
+ %test = icmp sgt i32 %sel, 0
+ ret i1 %test
+}
+
+define i1 @test2(i32 %other) {
+; CHECK-LABEL: @test2
+; CHECK: %test = icmp sgt i32 %other, 0
+ %positive = load i32, i32* @g, !range !{i32 1, i32 2048}
+ %cmp = icmp slt i32 %other, %positive
+ %sel = select i1 %cmp, i32 %other, i32 %positive
+ %test = icmp sgt i32 %sel, 0
+ ret i1 %test
+}
+
+; %positive might be zero
+define i1 @test3(i32 %other) {
+; CHECK-LABEL: @test3
+; CHECK: %test = icmp sgt i32 %sel, 0
+ %positive = load i32, i32* @g, !range !{i32 0, i32 2048}
+ %cmp = icmp slt i32 %positive, %other
+ %sel = select i1 %cmp, i32 %positive, i32 %other
+ %test = icmp sgt i32 %sel, 0
+ ret i1 %test
+}
diff --git a/test/Transforms/InstCombine/minmax-fp.ll b/test/Transforms/InstCombine/minmax-fp.ll
index b90afe3405f7..b6eb1bb68348 100644
--- a/test/Transforms/InstCombine/minmax-fp.ll
+++ b/test/Transforms/InstCombine/minmax-fp.ll
@@ -154,3 +154,29 @@ define i8 @t15(float %a) {
%3 = select i1 %1, i8 %2, i8 0
ret i8 %3
}
+
+; CHECK-LABEL: @t16
+; CHECK: %[[cmp:.*]] = icmp sgt i32 %x, 0
+; CHECK: %[[cst:.*]] = sitofp i32 %x to double
+; CHECK: %[[sel:.*]] = select i1 %[[cmp]], double %[[cst]], double 5.000000e-01
+; CHECK: ret double %[[sel]]
+define double @t16(i32 %x) {
+entry:
+ %cmp = icmp sgt i32 %x, 0
+ %cst = sitofp i32 %x to double
+ %sel = select i1 %cmp, double %cst, double 5.000000e-01
+ ret double %sel
+}
+
+; CHECK-LABEL: @t17
+; CHECK: %[[cmp:.*]] = icmp sgt i32 %x, 2
+; CHECK: %[[sel:.*]] = select i1 %[[cmp]], i32 %x, i32 2
+; CHECK: %[[cst:.*]] = sitofp i32 %[[sel]] to double
+; CHECK: ret double %[[cst]]
+define double @t17(i32 %x) {
+entry:
+ %cmp = icmp sgt i32 %x, 2
+ %cst = sitofp i32 %x to double
+ %sel = select i1 %cmp, double %cst, double 2.0
+ ret double %sel
+}
diff --git a/test/Transforms/InstCombine/misc-2002.ll b/test/Transforms/InstCombine/misc-2002.ll
new file mode 100644
index 000000000000..1c44e17edbd8
--- /dev/null
+++ b/test/Transforms/InstCombine/misc-2002.ll
@@ -0,0 +1,50 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+define void @hang_2002-03-11(i32 %X) {
+; CHECK-LABEL: @hang_2002-03-11(
+; CHECK-NEXT: ret void
+;
+ %reg117 = add i32 %X, 0
+ ret void
+}
+
+; Instcombine was missing a test that caused it to make illegal transformations
+; sometimes. In this case, it transformed the sub into an add:
+
+define i32 @sub_failure_2002-05-14(i32 %i, i32 %j) {
+; CHECK-LABEL: @sub_failure_2002-05-14(
+; CHECK-NEXT: [[A:%.*]] = mul i32 %i, %j
+; CHECK-NEXT: [[B:%.*]] = sub i32 2, [[A]]
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %A = mul i32 %i, %j
+ %B = sub i32 2, %A
+ ret i32 %B
+}
+
+; This testcase was incorrectly getting completely eliminated. There should be
+; SOME instruction named %c here, even if it's a bitwise and.
+
+define i64 @cast_test_2002-08-02(i64 %A) {
+; CHECK-LABEL: @cast_test_2002-08-02(
+; CHECK-NEXT: [[C2:%.*]] = and i64 %A, 255
+; CHECK-NEXT: ret i64 [[C2]]
+;
+ %c1 = trunc i64 %A to i8
+ %c2 = zext i8 %c1 to i64
+ ret i64 %c2
+}
+
+define i32 @missed_const_prop_2002-12-05(i32 %A) {
+; CHECK-LABEL: @missed_const_prop_2002-12-05(
+; CHECK-NEXT: ret i32 0
+;
+ %A.neg = sub i32 0, %A
+ %.neg = sub i32 0, 1
+ %X = add i32 %.neg, 1
+ %Y.neg.ra = add i32 %A, %X
+ %r = add i32 %A.neg, %Y.neg.ra
+ ret i32 %r
+}
+
diff --git a/test/Transforms/InstCombine/mul-masked-bits.ll b/test/Transforms/InstCombine/mul-masked-bits.ll
index a43d5f20beaa..fcff725cdf6f 100644
--- a/test/Transforms/InstCombine/mul-masked-bits.ll
+++ b/test/Transforms/InstCombine/mul-masked-bits.ll
@@ -1,6 +1,15 @@
-; RUN: opt < %s -instcombine -S | grep ashr
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
define i32 @foo(i32 %x, i32 %y) {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: [[A:%.*]] = and i32 %x, 7
+; CHECK-NEXT: [[B:%.*]] = and i32 %y, 7
+; CHECK-NEXT: [[C:%.*]] = mul nuw nsw i32 [[A]], [[B]]
+; CHECK-NEXT: [[D:%.*]] = shl nuw i32 [[C]], 26
+; CHECK-NEXT: [[E:%.*]] = ashr exact i32 [[D]], 26
+; CHECK-NEXT: ret i32 [[E]]
+;
%a = and i32 %x, 7
%b = and i32 %y, 7
%c = mul i32 %a, %b
diff --git a/test/Transforms/InstCombine/narrow-switch.ll b/test/Transforms/InstCombine/narrow-switch.ll
index f3f19bae03dd..7cbc5e9f60b8 100644
--- a/test/Transforms/InstCombine/narrow-switch.ll
+++ b/test/Transforms/InstCombine/narrow-switch.ll
@@ -1,20 +1,25 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; Vary legal integer types in data layout.
+; RUN: opt < %s -instcombine -S -default-data-layout=n32 | FileCheck %s --check-prefix=ALL --check-prefix=CHECK32
+; RUN: opt < %s -instcombine -S -default-data-layout=n32:64 | FileCheck %s --check-prefix=ALL --check-prefix=CHECK64
-target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
-
-; CHECK-LABEL: define i32 @positive1
-; CHECK: switch i32
-; CHECK: i32 10, label
-; CHECK: i32 100, label
-; CHECK: i32 1001, label
+; In all cases, the data-layout is irrelevant. We should shrink as much as possible in InstCombine
+; and allow the backend to expand as much as needed to ensure optimal codegen for any target.
define i32 @positive1(i64 %a) {
+; ALL-LABEL: @positive1(
+; ALL: switch i32
+; ALL-NEXT: i32 10, label %return
+; ALL-NEXT: i32 100, label %sw.bb1
+; ALL-NEXT: i32 1001, label %sw.bb2
+; ALL-NEXT: ]
+;
entry:
%and = and i64 %a, 4294967295
switch i64 %and, label %sw.default [
- i64 10, label %return
- i64 100, label %sw.bb1
- i64 1001, label %sw.bb2
+ i64 10, label %return
+ i64 100, label %sw.bb1
+ i64 1001, label %sw.bb2
]
sw.bb1:
@@ -31,19 +36,20 @@ return:
ret i32 %retval.0
}
-; CHECK-LABEL: define i32 @negative1
-; CHECK: switch i32
-; CHECK: i32 -10, label
-; CHECK: i32 -100, label
-; CHECK: i32 -1001, label
-
define i32 @negative1(i64 %a) {
+; ALL-LABEL: @negative1(
+; ALL: switch i32
+; ALL-NEXT: i32 -10, label %return
+; ALL-NEXT: i32 -100, label %sw.bb1
+; ALL-NEXT: i32 -1001, label %sw.bb2
+; ALL-NEXT: ]
+;
entry:
%or = or i64 %a, -4294967296
switch i64 %or, label %sw.default [
- i64 -10, label %return
- i64 -100, label %sw.bb1
- i64 -1001, label %sw.bb2
+ i64 -10, label %return
+ i64 -100, label %sw.bb1
+ i64 -1001, label %sw.bb2
]
sw.bb1:
@@ -63,19 +69,20 @@ return:
; Make sure truncating a constant int larger than 64-bit doesn't trigger an
; assertion.
-; CHECK-LABEL: define i32 @trunc72to68
-; CHECK: switch i68
-; CHECK: i68 10, label
-; CHECK: i68 100, label
-; CHECK: i68 1001, label
-
define i32 @trunc72to68(i72 %a) {
+; ALL-LABEL: @trunc72to68(
+; ALL: switch i68
+; ALL-NEXT: i68 10, label %return
+; ALL-NEXT: i68 100, label %sw.bb1
+; ALL-NEXT: i68 1001, label %sw.bb2
+; ALL-NEXT: ]
+;
entry:
%and = and i72 %a, 295147905179352825855
switch i72 %and, label %sw.default [
- i72 10, label %return
- i72 100, label %sw.bb1
- i72 1001, label %sw.bb2
+ i72 10, label %return
+ i72 100, label %sw.bb1
+ i72 1001, label %sw.bb2
]
sw.bb1:
@@ -95,21 +102,22 @@ return:
; Make sure to avoid assertion crashes and use the type before
; truncation to generate the sub constant expressions that leads
; to the recomputed condition.
-;
-; CHECK-LABEL: @trunc64to59
-; CHECK: switch i59
-; CHECK: i59 0, label
-; CHECK: i59 18717182647723699, label
define void @trunc64to59(i64 %a) {
+; ALL-LABEL: @trunc64to59(
+; ALL: switch i59
+; ALL-NEXT: i59 0, label %sw.bb1
+; ALL-NEXT: i59 18717182647723699, label %sw.bb2
+; ALL-NEXT: ]
+;
entry:
%tmp0 = and i64 %a, 15
%tmp1 = mul i64 %tmp0, -6425668444178048401
%tmp2 = add i64 %tmp1, 5170979678563097242
%tmp3 = mul i64 %tmp2, 1627972535142754813
switch i64 %tmp3, label %sw.default [
- i64 847514119312061490, label %sw.bb1
- i64 866231301959785189, label %sw.bb2
+ i64 847514119312061490, label %sw.bb1
+ i64 866231301959785189, label %sw.bb2
]
sw.bb1:
diff --git a/test/Transforms/InstCombine/opaque.ll b/test/Transforms/InstCombine/opaque.ll
new file mode 100644
index 000000000000..f2a91855d5a9
--- /dev/null
+++ b/test/Transforms/InstCombine/opaque.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -instcombine -disable-output
+; Checks that bitcasts are not converted into GEP when
+; when the size of an aggregate cannot be determined.
+%swift.opaque = type opaque
+%SQ = type <{ [8 x i8] }>
+%Si = type <{ i64 }>
+
+%V = type <{ <{ %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8, %Vs4Int8 }>, %Si, %SQ, %SQ, %Si, %swift.opaque }>
+%Vs4Int8 = type <{ i8 }>
+%swift.type = type { i64 }
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly,
+i64, i32, i1) #8
+
+@_swift_slowAlloc = external global i8* (i64, i64)*
+
+declare i8* @rt_swift_slowAlloc(i64, i64)
+
+define %swift.opaque* @_TwTkV([24 x i8]* %dest, %swift.opaque* %src,
+%swift.type* %bios_boot_params) #0 {
+entry:
+ %0 = bitcast %swift.opaque* %src to %V*
+ %1 = call noalias i8* @rt_swift_slowAlloc(i64 40, i64 0) #11
+ %2 = bitcast [24 x i8]* %dest to i8**
+ store i8* %1, i8** %2, align 8
+ %3 = bitcast i8* %1 to %V*
+ %4 = bitcast %V* %3 to i8*
+ %5 = bitcast %V* %0 to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %5, i64 40, i32 1, i1 false)
+ %6 = bitcast %V* %3 to %swift.opaque*
+ ret %swift.opaque* %6
+}
diff --git a/test/Transforms/InstCombine/or-fcmp.ll b/test/Transforms/InstCombine/or-fcmp.ll
index 29963f6c5c24..848e2c5d40f1 100644
--- a/test/Transforms/InstCombine/or-fcmp.ll
+++ b/test/Transforms/InstCombine/or-fcmp.ll
@@ -1,58 +1,1456 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-; CHECK-LABEL: @t1(
-define zeroext i8 @t1(float %x, float %y) nounwind {
- %a = fcmp ueq float %x, %y ; <i1> [#uses=1]
- %b = fcmp uno float %x, %y ; <i1> [#uses=1]
- %c = or i1 %a, %b
-; CHECK-NOT: fcmp uno
-; CHECK: fcmp ueq
- %retval = zext i1 %c to i8
- ret i8 %retval
-}
-
-; CHECK-LABEL: @t2(
-define zeroext i8 @t2(float %x, float %y) nounwind {
- %a = fcmp olt float %x, %y ; <i1> [#uses=1]
- %b = fcmp oeq float %x, %y ; <i1> [#uses=1]
-; CHECK-NOT: fcmp olt
-; CHECK-NOT: fcmp oeq
-; CHECK: fcmp ole
- %c = or i1 %a, %b
- %retval = zext i1 %c to i8
- ret i8 %retval
-}
-
-; CHECK-LABEL: @t3(
-define zeroext i8 @t3(float %x, float %y) nounwind {
- %a = fcmp ult float %x, %y ; <i1> [#uses=1]
- %b = fcmp uge float %x, %y ; <i1> [#uses=1]
- %c = or i1 %a, %b
- %retval = zext i1 %c to i8
-; CHECK: ret i8 1
- ret i8 %retval
-}
-
-; CHECK-LABEL: @t4(
-define zeroext i8 @t4(float %x, float %y) nounwind {
- %a = fcmp ult float %x, %y ; <i1> [#uses=1]
- %b = fcmp ugt float %x, %y ; <i1> [#uses=1]
- %c = or i1 %a, %b
-; CHECK-NOT: fcmp ult
-; CHECK-NOT: fcmp ugt
-; CHECK: fcmp une
- %retval = zext i1 %c to i8
- ret i8 %retval
-}
-
-; CHECK-LABEL: @t5(
-define zeroext i8 @t5(float %x, float %y) nounwind {
- %a = fcmp olt float %x, %y ; <i1> [#uses=1]
- %b = fcmp oge float %x, %y ; <i1> [#uses=1]
- %c = or i1 %a, %b
-; CHECK-NOT: fcmp olt
-; CHECK-NOT: fcmp oge
-; CHECK: fcmp ord
- %retval = zext i1 %c to i8
- ret i8 %retval
+define i1 @auto_gen_0(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_0(
+; CHECK-NEXT: ret i1 false
+;
+ %cmp = fcmp false double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_1(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_1(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp oeq double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_2(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_2(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oeq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oeq double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_3(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_3(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp ogt double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_4(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_4(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ogt double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_5(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_5(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ogt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ogt double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_6(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_6(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_7(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_7(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_8(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_8(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_9(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_9(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp oge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp oge double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_10(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_10(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_11(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_11(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_12(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_12(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_13(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_13(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_14(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_14(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp olt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp olt double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_15(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_15(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_16(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_16(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_17(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_17(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_18(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_18(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_19(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_19(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_20(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_20(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ole double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ole double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_21(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_21(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_22(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_22(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_23(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_23(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_24(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_24(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_25(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_25(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_26(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_26(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_27(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_27(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp one double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp one double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_28(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_28(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_29(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_29(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_30(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_30(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_31(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_31(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_32(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_32(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_33(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_33(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_34(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_34(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_35(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_35(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ord double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ord double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_36(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_36(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_37(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_37(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_38(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_38(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_39(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_39(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_40(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_40(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_41(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_41(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_42(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_42(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_43(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_43(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_44(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_44(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ueq double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_45(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_45(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_46(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_46(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_47(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_47(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_48(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_48(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_49(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_49(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_50(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_50(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_51(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_51(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_52(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_52(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_53(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_53(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_54(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_54(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ugt double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_55(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_55(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_56(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_56(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_57(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_57(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_58(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_58(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_59(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_59(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_60(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_60(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_61(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_61(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_62(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_62(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_63(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_63(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_64(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_64(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_65(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_65(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uge double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_66(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_66(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_67(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_67(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_68(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_68(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_69(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_69(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_70(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_70(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_71(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_71(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_72(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_72(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_73(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_73(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_74(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_74(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_75(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_75(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_76(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_76(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_77(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_77(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ult double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_78(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_78(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_79(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_79(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_80(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_80(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_81(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_81(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_82(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_82(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_83(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_83(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_84(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_84(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_85(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_85(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_86(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_86(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_87(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_87(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_88(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_88(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_89(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_89(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_90(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_90(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp ule double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_91(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_91(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_92(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_92(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_93(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_93(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_94(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_94(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_95(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_95(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_96(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_96(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_97(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_97(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_98(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_98(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_99(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_99(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_100(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_100(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_101(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_101(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_102(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_102(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_103(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_103(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_104(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_104(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp une double %a, %b
+ %cmp1 = fcmp une double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_105(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_105(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[CMP]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_106(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_106(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_107(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_107(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_108(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_108(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_109(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_109(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_110(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_110(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_111(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_111(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_112(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_112(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_113(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_113(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ueq double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_114(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_114(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ugt double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_115(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_115(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uge double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_116(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_116(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ult double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_117(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_117(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp ule double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_118(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_118(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp une double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp une double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_119(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_119(
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %a, %b
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %cmp = fcmp uno double %a, %b
+ %cmp1 = fcmp uno double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_120(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_120(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp false double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_121(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_121(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp oeq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_122(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_122(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ogt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_123(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_123(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp oge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_124(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_124(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp olt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_125(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_125(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ole double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_126(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_126(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp one double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_127(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_127(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ord double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_128(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_128(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ueq double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_129(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_129(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ugt double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_130(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_130(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp uge double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_131(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_131(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ult double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_132(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_132(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp ule double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_133(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_133(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp une double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_134(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_134(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp uno double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
+}
+
+define i1 @auto_gen_135(double %a, double %b) {
+; CHECK-LABEL: @auto_gen_135(
+; CHECK-NEXT: ret i1 true
+;
+ %cmp = fcmp true double %a, %b
+ %cmp1 = fcmp true double %a, %b
+ %retval = or i1 %cmp, %cmp1
+ ret i1 %retval
}
diff --git a/test/Transforms/InstCombine/or-to-xor.ll b/test/Transforms/InstCombine/or-to-xor.ll
index 8847cb73281a..84567906f843 100644
--- a/test/Transforms/InstCombine/or-to-xor.ll
+++ b/test/Transforms/InstCombine/or-to-xor.ll
@@ -1,42 +1,55 @@
-; RUN: opt < %s -instcombine -S | grep "xor i32 %a, %b" | count 4
-; RUN: opt < %s -instcombine -S | not grep "and"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-define i32 @func1(i32 %a, i32 %b) nounwind readnone {
-entry:
- %b_not = xor i32 %b, -1
- %0 = and i32 %a, %b_not
- %a_not = xor i32 %a, -1
- %1 = and i32 %a_not, %b
- %2 = or i32 %0, %1
- ret i32 %2
+define i32 @func1(i32 %a, i32 %b) {
+; CHECK-LABEL: @func1(
+; CHECK-NEXT: [[T2:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: ret i32 [[T2]]
+;
+ %b_not = xor i32 %b, -1
+ %t0 = and i32 %a, %b_not
+ %a_not = xor i32 %a, -1
+ %t1 = and i32 %a_not, %b
+ %t2 = or i32 %t0, %t1
+ ret i32 %t2
}
-define i32 @func2(i32 %a, i32 %b) nounwind readnone {
-entry:
- %b_not = xor i32 %b, -1
- %0 = and i32 %b_not, %a
- %a_not = xor i32 %a, -1
- %1 = and i32 %a_not, %b
- %2 = or i32 %0, %1
- ret i32 %2
+define i32 @func2(i32 %a, i32 %b) {
+; CHECK-LABEL: @func2(
+; CHECK-NEXT: [[T2:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: ret i32 [[T2]]
+;
+ %b_not = xor i32 %b, -1
+ %t0 = and i32 %b_not, %a
+ %a_not = xor i32 %a, -1
+ %t1 = and i32 %a_not, %b
+ %t2 = or i32 %t0, %t1
+ ret i32 %t2
}
-define i32 @func3(i32 %a, i32 %b) nounwind readnone {
-entry:
- %b_not = xor i32 %b, -1
- %0 = and i32 %a, %b_not
- %a_not = xor i32 %a, -1
- %1 = and i32 %b, %a_not
- %2 = or i32 %0, %1
- ret i32 %2
+define i32 @func3(i32 %a, i32 %b) {
+; CHECK-LABEL: @func3(
+; CHECK-NEXT: [[T2:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: ret i32 [[T2]]
+;
+ %b_not = xor i32 %b, -1
+ %t0 = and i32 %a, %b_not
+ %a_not = xor i32 %a, -1
+ %t1 = and i32 %b, %a_not
+ %t2 = or i32 %t0, %t1
+ ret i32 %t2
}
-define i32 @func4(i32 %a, i32 %b) nounwind readnone {
-entry:
- %b_not = xor i32 %b, -1
- %0 = and i32 %b_not, %a
- %a_not = xor i32 %a, -1
- %1 = and i32 %b, %a_not
- %2 = or i32 %0, %1
- ret i32 %2
+define i32 @func4(i32 %a, i32 %b) {
+; CHECK-LABEL: @func4(
+; CHECK-NEXT: [[T2:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: ret i32 [[T2]]
+;
+ %b_not = xor i32 %b, -1
+ %t0 = and i32 %b_not, %a
+ %a_not = xor i32 %a, -1
+ %t1 = and i32 %b, %a_not
+ %t2 = or i32 %t0, %t1
+ ret i32 %t2
}
+
diff --git a/test/Transforms/InstCombine/or.ll b/test/Transforms/InstCombine/or.ll
index a2bc4e7d9832..53cb48809fc3 100644
--- a/test/Transforms/InstCombine/or.ll
+++ b/test/Transforms/InstCombine/or.ll
@@ -1,328 +1,361 @@
-; This test makes sure that these instructions are properly eliminated.
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
define i32 @test1(i32 %A) {
- %B = or i32 %A, 0
- ret i32 %B
; CHECK-LABEL: @test1(
-; CHECK: ret i32 %A
+; CHECK-NEXT: ret i32 %A
+;
+ %B = or i32 %A, 0
+ ret i32 %B
}
define i32 @test2(i32 %A) {
- %B = or i32 %A, -1
- ret i32 %B
; CHECK-LABEL: @test2(
-; CHECK: ret i32 -1
+; CHECK-NEXT: ret i32 -1
+;
+ %B = or i32 %A, -1
+ ret i32 %B
}
define i8 @test2a(i8 %A) {
- %B = or i8 %A, -1
- ret i8 %B
; CHECK-LABEL: @test2a(
-; CHECK: ret i8 -1
+; CHECK-NEXT: ret i8 -1
+;
+ %B = or i8 %A, -1
+ ret i8 %B
}
define i1 @test3(i1 %A) {
- %B = or i1 %A, false
- ret i1 %B
; CHECK-LABEL: @test3(
-; CHECK: ret i1 %A
+; CHECK-NEXT: ret i1 %A
+;
+ %B = or i1 %A, false
+ ret i1 %B
}
define i1 @test4(i1 %A) {
- %B = or i1 %A, true
- ret i1 %B
; CHECK-LABEL: @test4(
-; CHECK: ret i1 true
+; CHECK-NEXT: ret i1 true
+;
+ %B = or i1 %A, true
+ ret i1 %B
}
define i1 @test5(i1 %A) {
- %B = or i1 %A, %A
- ret i1 %B
; CHECK-LABEL: @test5(
-; CHECK: ret i1 %A
+; CHECK-NEXT: ret i1 %A
+;
+ %B = or i1 %A, %A
+ ret i1 %B
}
define i32 @test6(i32 %A) {
- %B = or i32 %A, %A
- ret i32 %B
; CHECK-LABEL: @test6(
-; CHECK: ret i32 %A
+; CHECK-NEXT: ret i32 %A
+;
+ %B = or i32 %A, %A
+ ret i32 %B
}
; A | ~A == -1
define i32 @test7(i32 %A) {
- %NotA = xor i32 -1, %A
- %B = or i32 %A, %NotA
- ret i32 %B
; CHECK-LABEL: @test7(
-; CHECK: ret i32 -1
+; CHECK-NEXT: ret i32 -1
+;
+ %NotA = xor i32 -1, %A
+ %B = or i32 %A, %NotA
+ ret i32 %B
}
define i8 @test8(i8 %A) {
- %B = or i8 %A, -2
- %C = or i8 %B, 1
- ret i8 %C
; CHECK-LABEL: @test8(
-; CHECK: ret i8 -1
+; CHECK-NEXT: ret i8 -1
+;
+ %B = or i8 %A, -2
+ %C = or i8 %B, 1
+ ret i8 %C
}
; Test that (A|c1)|(B|c2) == (A|B)|(c1|c2)
define i8 @test9(i8 %A, i8 %B) {
- %C = or i8 %A, 1
- %D = or i8 %B, -2
- %E = or i8 %C, %D
- ret i8 %E
; CHECK-LABEL: @test9(
-; CHECK: ret i8 -1
+; CHECK-NEXT: ret i8 -1
+;
+ %C = or i8 %A, 1
+ %D = or i8 %B, -2
+ %E = or i8 %C, %D
+ ret i8 %E
}
define i8 @test10(i8 %A) {
- %B = or i8 %A, 1
- %C = and i8 %B, -2
- ; (X & C1) | C2 --> (X | C2) & (C1|C2)
- %D = or i8 %C, -2
- ret i8 %D
; CHECK-LABEL: @test10(
-; CHECK: ret i8 -2
+; CHECK-NEXT: ret i8 -2
+;
+ %B = or i8 %A, 1
+ %C = and i8 %B, -2
+ ; (X & C1) | C2 --> (X | C2) & (C1|C2)
+ %D = or i8 %C, -2
+ ret i8 %D
}
define i8 @test11(i8 %A) {
- %B = or i8 %A, -2
- %C = xor i8 %B, 13
- ; (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
- %D = or i8 %C, 1
- %E = xor i8 %D, 12
- ret i8 %E
; CHECK-LABEL: @test11(
-; CHECK: ret i8 -1
+; CHECK-NEXT: ret i8 -1
+;
+ %B = or i8 %A, -2
+ %C = xor i8 %B, 13
+ ; (X ^ C1) | C2 --> (X | C2) ^ (C1&~C2)
+ %D = or i8 %C, 1
+ %E = xor i8 %D, 12
+ ret i8 %E
}
define i32 @test12(i32 %A) {
; Should be eliminated
- %B = or i32 %A, 4
- %C = and i32 %B, 8
- ret i32 %C
; CHECK-LABEL: @test12(
-; CHECK: %C = and i32 %A, 8
-; CHECK: ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = and i32 %A, 8
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = or i32 %A, 4
+ %C = and i32 %B, 8
+ ret i32 %C
}
define i32 @test13(i32 %A) {
- %B = or i32 %A, 12
- ; Always equal to 8
- %C = and i32 %B, 8
- ret i32 %C
; CHECK-LABEL: @test13(
-; CHECK: ret i32 8
+; CHECK-NEXT: ret i32 8
+;
+ %B = or i32 %A, 12
+ ; Always equal to 8
+ %C = and i32 %B, 8
+ ret i32 %C
}
define i1 @test14(i32 %A, i32 %B) {
- %C1 = icmp ult i32 %A, %B
- %C2 = icmp ugt i32 %A, %B
- ; (A < B) | (A > B) === A != B
- %D = or i1 %C1, %C2
- ret i1 %D
; CHECK-LABEL: @test14(
-; CHECK: icmp ne i32 %A, %B
-; CHECK: ret i1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 %A, %B
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %C1 = icmp ult i32 %A, %B
+ %C2 = icmp ugt i32 %A, %B
+ ; (A < B) | (A > B) === A != B
+ %D = or i1 %C1, %C2
+ ret i1 %D
}
define i1 @test15(i32 %A, i32 %B) {
- %C1 = icmp ult i32 %A, %B
- %C2 = icmp eq i32 %A, %B
- ; (A < B) | (A == B) === A <= B
- %D = or i1 %C1, %C2
- ret i1 %D
; CHECK-LABEL: @test15(
-; CHECK: icmp ule i32 %A, %B
-; CHECK: ret i1
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ule i32 %A, %B
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %C1 = icmp ult i32 %A, %B
+ %C2 = icmp eq i32 %A, %B
+ ; (A < B) | (A == B) === A <= B
+ %D = or i1 %C1, %C2
+ ret i1 %D
}
define i32 @test16(i32 %A) {
- %B = and i32 %A, 1
- ; -2 = ~1
- %C = and i32 %A, -2
- ; %D = and int %B, -1 == %B
- %D = or i32 %B, %C
- ret i32 %D
; CHECK-LABEL: @test16(
-; CHECK: ret i32 %A
+; CHECK-NEXT: ret i32 %A
+;
+ %B = and i32 %A, 1
+ ; -2 = ~1
+ %C = and i32 %A, -2
+ ; %D = and int %B, -1 == %B
+ %D = or i32 %B, %C
+ ret i32 %D
}
define i32 @test17(i32 %A) {
- %B = and i32 %A, 1
- %C = and i32 %A, 4
- ; %D = and int %B, 5
- %D = or i32 %B, %C
- ret i32 %D
; CHECK-LABEL: @test17(
-; CHECK: %D = and i32 %A, 5
-; CHECK: ret i32 %D
+; CHECK-NEXT: [[D:%.*]] = and i32 %A, 5
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %B = and i32 %A, 1
+ %C = and i32 %A, 4
+ ; %D = and int %B, 5
+ %D = or i32 %B, %C
+ ret i32 %D
}
define i1 @test18(i32 %A) {
- %B = icmp sge i32 %A, 100
- %C = icmp slt i32 %A, 50
- ;; (A-50) >u 50
- %D = or i1 %B, %C
- ret i1 %D
; CHECK-LABEL: @test18(
-; CHECK: add i32
-; CHECK: icmp ugt
-; CHECK: ret i1
+; CHECK-NEXT: [[A_OFF:%.*]] = add i32 %A, -50
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt i32 [[A_OFF]], 49
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %B = icmp sge i32 %A, 100
+ %C = icmp slt i32 %A, 50
+ ;; (A-50) >u 50
+ %D = or i1 %B, %C
+ ret i1 %D
}
define i1 @test19(i32 %A) {
- %B = icmp eq i32 %A, 50
- %C = icmp eq i32 %A, 51
- ;; (A&-2) == 50
- %D = or i1 %B, %C
- ret i1 %D
; CHECK-LABEL: @test19(
-; CHECK: or i32
-; CHECK: icmp eq
-; CHECK: ret i1
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %A, 1
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 51
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %B = icmp eq i32 %A, 50
+ %C = icmp eq i32 %A, 51
+ ;; (A&-2) == 50
+ %D = or i1 %B, %C
+ ret i1 %D
}
define i32 @test20(i32 %x) {
- %y = and i32 %x, 123
- %z = or i32 %y, %x
- ret i32 %z
; CHECK-LABEL: @test20(
-; CHECK: ret i32 %x
+; CHECK-NEXT: ret i32 %x
+;
+ %y = and i32 %x, 123
+ %z = or i32 %y, %x
+ ret i32 %z
}
define i32 @test21(i32 %tmp.1) {
- %tmp.1.mask1 = add i32 %tmp.1, 2
- %tmp.3 = and i32 %tmp.1.mask1, -2
- %tmp.5 = and i32 %tmp.1, 1
- ;; add tmp.1, 2
- %tmp.6 = or i32 %tmp.5, %tmp.3
- ret i32 %tmp.6
; CHECK-LABEL: @test21(
-; CHECK: add i32 %{{[^,]*}}, 2
-; CHECK: ret i32
+; CHECK-NEXT: [[TMP_1_MASK1:%.*]] = add i32 %tmp.1, 2
+; CHECK-NEXT: ret i32 [[TMP_1_MASK1]]
+;
+ %tmp.1.mask1 = add i32 %tmp.1, 2
+ %tmp.3 = and i32 %tmp.1.mask1, -2
+ %tmp.5 = and i32 %tmp.1, 1
+ ;; add tmp.1, 2
+ %tmp.6 = or i32 %tmp.5, %tmp.3
+ ret i32 %tmp.6
}
define i32 @test22(i32 %B) {
- %ELIM41 = and i32 %B, 1
- %ELIM7 = and i32 %B, -2
- %ELIM5 = or i32 %ELIM41, %ELIM7
- ret i32 %ELIM5
; CHECK-LABEL: @test22(
-; CHECK: ret i32 %B
+; CHECK-NEXT: ret i32 %B
+;
+ %ELIM41 = and i32 %B, 1
+ %ELIM7 = and i32 %B, -2
+ %ELIM5 = or i32 %ELIM41, %ELIM7
+ ret i32 %ELIM5
}
define i16 @test23(i16 %A) {
- %B = lshr i16 %A, 1
- ;; fold or into xor
- %C = or i16 %B, -32768
- %D = xor i16 %C, 8193
- ret i16 %D
; CHECK-LABEL: @test23(
-; CHECK: %B = lshr i16 %A, 1
-; CHECK: %D = xor i16 %B, -24575
-; CHECK: ret i16 %D
+; CHECK-NEXT: [[B:%.*]] = lshr i16 %A, 1
+; CHECK-NEXT: [[D:%.*]] = xor i16 [[B]], -24575
+; CHECK-NEXT: ret i16 [[D]]
+;
+ %B = lshr i16 %A, 1
+ ;; fold or into xor
+ %C = or i16 %B, -32768
+ %D = xor i16 %C, 8193
+ ret i16 %D
}
; PR1738
define i1 @test24(double %X, double %Y) {
- %tmp9 = fcmp uno double %X, 0.000000e+00 ; <i1> [#uses=1]
- %tmp13 = fcmp uno double %Y, 0.000000e+00 ; <i1> [#uses=1]
- %bothcond = or i1 %tmp13, %tmp9 ; <i1> [#uses=1]
- ret i1 %bothcond
-
; CHECK-LABEL: @test24(
-; CHECK: = fcmp uno double %Y, %X
-; CHECK: ret i1
+; CHECK-NEXT: [[TMP1:%.*]] = fcmp uno double %Y, %X
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %tmp9 = fcmp uno double %X, 0.000000e+00
+ %tmp13 = fcmp uno double %Y, 0.000000e+00
+ %bothcond = or i1 %tmp13, %tmp9
+ ret i1 %bothcond
}
; PR3266 & PR5276
define i1 @test25(i32 %A, i32 %B) {
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: [[NOTLHS:%.*]] = icmp ne i32 %A, 0
+; CHECK-NEXT: [[NOTRHS:%.*]] = icmp ne i32 %B, 57
+; CHECK-NEXT: [[F:%.*]] = and i1 [[NOTRHS]], [[NOTLHS]]
+; CHECK-NEXT: ret i1 [[F]]
+;
%C = icmp eq i32 %A, 0
%D = icmp eq i32 %B, 57
%E = or i1 %C, %D
%F = xor i1 %E, -1
ret i1 %F
-
-; CHECK-LABEL: @test25(
-; CHECK: icmp ne i32 %A, 0
-; CHECK-NEXT: icmp ne i32 %B, 57
-; CHECK-NEXT: %F = and i1
-; CHECK-NEXT: ret i1 %F
}
; PR5634
define i1 @test26(i32 %A, i32 %B) {
- %C1 = icmp eq i32 %A, 0
- %C2 = icmp eq i32 %B, 0
- ; (A == 0) & (A == 0) --> (A|B) == 0
- %D = and i1 %C1, %C2
- ret i1 %D
; CHECK-LABEL: @test26(
-; CHECK: or i32 %A, %B
-; CHECK: icmp eq i32 {{.*}}, 0
-; CHECK: ret i1
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %A, %B
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %C1 = icmp eq i32 %A, 0
+ %C2 = icmp eq i32 %B, 0
+ ; (A == 0) & (A == 0) --> (A|B) == 0
+ %D = and i1 %C1, %C2
+ ret i1 %D
}
define i1 @test27(i32* %A, i32* %B) {
+; CHECK-LABEL: @test27(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq i32* %A, null
+; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32* %B, null
+; CHECK-NEXT: [[E:%.*]] = and i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[E]]
+;
%C1 = ptrtoint i32* %A to i32
%C2 = ptrtoint i32* %B to i32
%D = or i32 %C1, %C2
%E = icmp eq i32 %D, 0
ret i1 %E
-; CHECK-LABEL: @test27(
-; CHECK: icmp eq i32* %A, null
-; CHECK: icmp eq i32* %B, null
-; CHECK: and i1
-; CHECK: ret i1
}
; PR5634
define i1 @test28(i32 %A, i32 %B) {
- %C1 = icmp ne i32 %A, 0
- %C2 = icmp ne i32 %B, 0
- ; (A != 0) | (A != 0) --> (A|B) != 0
- %D = or i1 %C1, %C2
- ret i1 %D
; CHECK-LABEL: @test28(
-; CHECK: or i32 %A, %B
-; CHECK: icmp ne i32 {{.*}}, 0
-; CHECK: ret i1
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %A, %B
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %C1 = icmp ne i32 %A, 0
+ %C2 = icmp ne i32 %B, 0
+ ; (A != 0) | (A != 0) --> (A|B) != 0
+ %D = or i1 %C1, %C2
+ ret i1 %D
}
define i1 @test29(i32* %A, i32* %B) {
+; CHECK-LABEL: @test29(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32* %A, null
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32* %B, null
+; CHECK-NEXT: [[E:%.*]] = or i1 [[TMP1]], [[TMP2]]
+; CHECK-NEXT: ret i1 [[E]]
+;
%C1 = ptrtoint i32* %A to i32
%C2 = ptrtoint i32* %B to i32
%D = or i32 %C1, %C2
%E = icmp ne i32 %D, 0
ret i1 %E
-; CHECK-LABEL: @test29(
-; CHECK: icmp ne i32* %A, null
-; CHECK: icmp ne i32* %B, null
-; CHECK: or i1
-; CHECK: ret i1
}
; PR4216
define i32 @test30(i32 %A) {
-entry:
+; CHECK-LABEL: @test30(
+; CHECK-NEXT: [[D:%.*]] = and i32 %A, -58312
+; CHECK-NEXT: [[E:%.*]] = or i32 [[D]], 32962
+; CHECK-NEXT: ret i32 [[E]]
+;
%B = or i32 %A, 32962
%C = and i32 %A, -65536
%D = and i32 %B, 40186
%E = or i32 %D, %C
ret i32 %E
-; CHECK-LABEL: @test30(
-; CHECK: %D = and i32 %A, -58312
-; CHECK: %E = or i32 %D, 32962
-; CHECK: ret i32 %E
}
; PR4216
-define i64 @test31(i64 %A) nounwind readnone ssp noredzone {
+define i64 @test31(i64 %A) {
+; CHECK-LABEL: @test31(
+; CHECK-NEXT: [[E:%.*]] = and i64 %A, 4294908984
+; CHECK-NEXT: [[F:%.*]] = or i64 [[E]], 32962
+; CHECK-NEXT: ret i64 [[F]]
+;
%B = or i64 %A, 194
%D = and i64 %B, 250
@@ -331,150 +364,199 @@ define i64 @test31(i64 %A) nounwind readnone ssp noredzone {
%F = or i64 %D, %E
ret i64 %F
-; CHECK-LABEL: @test31(
-; CHECK-NEXT: %E = and i64 %A, 4294908984
-; CHECK-NEXT: %F = or i64 %E, 32962
-; CHECK-NEXT: ret i64 %F
}
-define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191) {
- %and.i135 = sext <4 x i1> %and.i1352 to <4 x i32> ; <<4 x i32>> [#uses=2]
- %and.i129 = and <4 x i32> %vecinit6.i176, %and.i135 ; <<4 x i32>> [#uses=1]
- %neg.i = xor <4 x i32> %and.i135, <i32 -1, i32 -1, i32 -1, i32 -1> ; <<4 x i32>> [#uses=1]
- %and.i = and <4 x i32> %vecinit6.i191, %neg.i ; <<4 x i32>> [#uses=1]
- %or.i = or <4 x i32> %and.i, %and.i129 ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %or.i
; codegen is mature enough to handle vector selects.
+define <4 x i32> @test32(<4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191) {
; CHECK-LABEL: @test32(
-; CHECK: select <4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191
+; CHECK-NEXT: [[OR_I:%.*]] = select <4 x i1> %and.i1352, <4 x i32> %vecinit6.i176, <4 x i32> %vecinit6.i191
+; CHECK-NEXT: ret <4 x i32> [[OR_I]]
+;
+ %and.i135 = sext <4 x i1> %and.i1352 to <4 x i32>
+ %and.i129 = and <4 x i32> %vecinit6.i176, %and.i135
+ %neg.i = xor <4 x i32> %and.i135, <i32 -1, i32 -1, i32 -1, i32 -1>
+ %and.i = and <4 x i32> %vecinit6.i191, %neg.i
+ %or.i = or <4 x i32> %and.i, %and.i129
+ ret <4 x i32> %or.i
}
define i1 @test33(i1 %X, i1 %Y) {
+; CHECK-LABEL: @test33(
+; CHECK-NEXT: [[B:%.*]] = or i1 %X, %Y
+; CHECK-NEXT: ret i1 [[B]]
+;
%a = or i1 %X, %Y
%b = or i1 %a, %X
ret i1 %b
-; CHECK-LABEL: @test33(
-; CHECK-NEXT: or i1 %X, %Y
-; CHECK-NEXT: ret
}
define i32 @test34(i32 %X, i32 %Y) {
+; CHECK-LABEL: @test34(
+; CHECK-NEXT: [[B:%.*]] = or i32 %X, %Y
+; CHECK-NEXT: ret i32 [[B]]
+;
%a = or i32 %X, %Y
%b = or i32 %Y, %a
ret i32 %b
-; CHECK-LABEL: @test34(
-; CHECK-NEXT: or i32 %X, %Y
-; CHECK-NEXT: ret
}
define i32 @test35(i32 %a, i32 %b) {
+; CHECK-LABEL: @test35(
+; CHECK-NEXT: [[TMP1:%.*]] = or i32 %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = or i32 [[TMP1]], 1135
+; CHECK-NEXT: ret i32 [[TMP2]]
+;
%1 = or i32 %a, 1135
%2 = or i32 %1, %b
ret i32 %2
- ; CHECK-LABEL: @test35(
- ; CHECK-NEXT: or i32 %a, %b
- ; CHECK-NEXT: or i32 %1, 1135
}
define i1 @test36(i32 %x) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: [[X_OFF:%.*]] = add i32 %x, -23
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[X_OFF]], 3
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%cmp1 = icmp eq i32 %x, 23
%cmp2 = icmp eq i32 %x, 24
%ret1 = or i1 %cmp1, %cmp2
%cmp3 = icmp eq i32 %x, 25
%ret2 = or i1 %ret1, %cmp3
ret i1 %ret2
-; CHECK-LABEL: @test36(
-; CHECK-NEXT: %x.off = add i32 %x, -23
-; CHECK-NEXT: icmp ult i32 %x.off, 3
-; CHECK-NEXT: ret i1
}
-define i32 @test37(i32* %xp, i32 %y) {
-; CHECK-LABEL: @test37(
-; CHECK: select i1 %tobool, i32 -1, i32 %x
- %tobool = icmp ne i32 %y, 0
- %sext = sext i1 %tobool to i32
- %x = load i32, i32* %xp
+define i32 @orsext_to_sel(i32 %x, i1 %y) {
+; CHECK-LABEL: @orsext_to_sel(
+; CHECK-NEXT: [[OR:%.*]] = select i1 %y, i32 -1, i32 %x
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %sext = sext i1 %y to i32
%or = or i32 %sext, %x
ret i32 %or
}
-define i32 @test38(i32* %xp, i32 %y) {
-; CHECK-LABEL: @test38(
-; CHECK: select i1 %tobool, i32 -1, i32 %x
- %tobool = icmp ne i32 %y, 0
- %sext = sext i1 %tobool to i32
- %x = load i32, i32* %xp
+define i32 @orsext_to_sel_swap(i32 %x, i1 %y) {
+; CHECK-LABEL: @orsext_to_sel_swap(
+; CHECK-NEXT: [[OR:%.*]] = select i1 %y, i32 -1, i32 %x
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %sext = sext i1 %y to i32
%or = or i32 %x, %sext
ret i32 %or
}
+define i32 @orsext_to_sel_multi_use(i32 %x, i1 %y) {
+; CHECK-LABEL: @orsext_to_sel_multi_use(
+; CHECK-NEXT: [[SEXT:%.*]] = sext i1 %y to i32
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SEXT]], %x
+; CHECK-NEXT: [[ADD:%.*]] = add i32 [[SEXT]], [[OR]]
+; CHECK-NEXT: ret i32 [[ADD]]
+;
+ %sext = sext i1 %y to i32
+ %or = or i32 %sext, %x
+ %add = add i32 %sext, %or
+ ret i32 %add
+}
+
+define <2 x i32> @orsext_to_sel_vec(<2 x i32> %x, <2 x i1> %y) {
+; CHECK-LABEL: @orsext_to_sel_vec(
+; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> %y, <2 x i32> <i32 -1, i32 -1>, <2 x i32> %x
+; CHECK-NEXT: ret <2 x i32> [[OR]]
+;
+ %sext = sext <2 x i1> %y to <2 x i32>
+ %or = or <2 x i32> %sext, %x
+ ret <2 x i32> %or
+}
+
+define <2 x i132> @orsext_to_sel_vec_swap(<2 x i132> %x, <2 x i1> %y) {
+; CHECK-LABEL: @orsext_to_sel_vec_swap(
+; CHECK-NEXT: [[OR:%.*]] = select <2 x i1> %y, <2 x i132> <i132 -1, i132 -1>, <2 x i132> %x
+; CHECK-NEXT: ret <2 x i132> [[OR]]
+;
+ %sext = sext <2 x i1> %y to <2 x i132>
+ %or = or <2 x i132> %x, %sext
+ ret <2 x i132> %or
+}
+
define i32 @test39(i32 %a, i32 %b) {
-; CHECK-LABEL: test39(
-; CHECK-NEXT: %or = or i32 %a, %b
- %xor = xor i32 %a, -1
- %and = and i32 %xor, %b
- %or = or i32 %and, %a
- ret i32 %or
+; CHECK-LABEL: @test39(
+; CHECK-NEXT: [[OR:%.*]] = or i32 %a, %b
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %xor = xor i32 %a, -1
+ %and = and i32 %xor, %b
+ %or = or i32 %and, %a
+ ret i32 %or
}
define i32 @test40(i32 %a, i32 %b) {
-; CHECK-LABEL: test40(
-; CHECK-NEXT: %1 = xor i32 %a, -1
-; CHECK-NEXT: %or = or i32 %1, %b
- %and = and i32 %a, %b
- %xor = xor i32 %a, -1
- %or = or i32 %and, %xor
- ret i32 %or
+; CHECK-LABEL: @test40(
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, -1
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], %b
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %and = and i32 %a, %b
+ %xor = xor i32 %a, -1
+ %or = or i32 %and, %xor
+ ret i32 %or
}
define i32 @test41(i32 %a, i32 %b) {
-; CHECK-LABEL: test41(
-; CHECK-NEXT: %1 = xor i32 %a, -1
-; CHECK-NEXT: %or = xor i32 %1, %b
- %and = and i32 %a, %b
- %nega = xor i32 %a, -1
- %xor = xor i32 %nega, %b
- %or = or i32 %and, %xor
- ret i32 %or
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, -1
+; CHECK-NEXT: [[OR:%.*]] = xor i32 [[TMP1]], %b
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %and = and i32 %a, %b
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
}
define i32 @test42(i32 %a, i32 %b) {
-; CHECK-LABEL: test42(
-; CHECK-NEXT: %1 = xor i32 %a, -1
-; CHECK-NEXT: %or = xor i32 %1, %b
- %nega = xor i32 %a, -1
- %xor = xor i32 %nega, %b
- %and = and i32 %a, %b
- %or = or i32 %xor, %and
- ret i32 %or
+; CHECK-LABEL: @test42(
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 %a, -1
+; CHECK-NEXT: [[OR:%.*]] = xor i32 [[TMP1]], %b
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %nega = xor i32 %a, -1
+ %xor = xor i32 %nega, %b
+ %and = and i32 %a, %b
+ %or = or i32 %xor, %and
+ ret i32 %or
}
define i32 @test43(i32 %a, i32 %b) {
-; CHECK-LABEL: test43(
-; CHECK-NEXT: %or = xor i32 %a, %b
- %neg = xor i32 %b, -1
- %and = and i32 %a, %neg
- %xor = xor i32 %a, %b
- %or = or i32 %and, %xor
- ret i32 %or
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: [[OR:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %xor = xor i32 %a, %b
+ %or = or i32 %and, %xor
+ ret i32 %or
}
define i32 @test44(i32 %a, i32 %b) {
-; CHECK-LABEL: test44(
-; CHECK-NEXT: %or = xor i32 %a, %b
- %xor = xor i32 %a, %b
- %neg = xor i32 %b, -1
- %and = and i32 %a, %neg
- %or = or i32 %xor, %and
- ret i32 %or
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: [[OR:%.*]] = xor i32 %a, %b
+; CHECK-NEXT: ret i32 [[OR]]
+;
+ %xor = xor i32 %a, %b
+ %neg = xor i32 %b, -1
+ %and = and i32 %a, %neg
+ %or = or i32 %xor, %and
+ ret i32 %or
}
define i32 @test45(i32 %x, i32 %y, i32 %z) {
-; CHECK-LABEL: test45(
-; CHECK-NEXT: %1 = and i32 %x, %z
-; CHECK-NEXT: %or1 = or i32 %1, %y
-; CHECK-NEXT: ret i32 %or1
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x, %z
+; CHECK-NEXT: [[OR1:%.*]] = or i32 [[TMP1]], %y
+; CHECK-NEXT: ret i32 [[OR1]]
+;
%or = or i32 %y, %z
%and = and i32 %x, %or
%or1 = or i32 %and, %y
@@ -482,37 +564,42 @@ define i32 @test45(i32 %x, i32 %y, i32 %z) {
}
define i1 @test46(i8 signext %c) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 %c, -33
+; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -65
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 26
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
%c.off = add i8 %c, -97
%cmp1 = icmp ult i8 %c.off, 26
%c.off17 = add i8 %c, -65
%cmp2 = icmp ult i8 %c.off17, 26
%or = or i1 %cmp1, %cmp2
ret i1 %or
-; CHECK-LABEL: @test46(
-; CHECK-NEXT: and i8 %c, -33
-; CHECK-NEXT: add i8 %1, -65
-; CHECK-NEXT: icmp ult i8 %2, 26
}
define i1 @test47(i8 signext %c) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT: [[TMP1:%.*]] = and i8 %c, -33
+; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], -65
+; CHECK-NEXT: [[TMP3:%.*]] = icmp ult i8 [[TMP2]], 27
+; CHECK-NEXT: ret i1 [[TMP3]]
+;
%c.off = add i8 %c, -65
%cmp1 = icmp ule i8 %c.off, 26
%c.off17 = add i8 %c, -97
%cmp2 = icmp ule i8 %c.off17, 26
%or = or i1 %cmp1, %cmp2
ret i1 %or
-; CHECK-LABEL: @test47(
-; CHECK-NEXT: and i8 %c, -33
-; CHECK-NEXT: add i8 %1, -65
-; CHECK-NEXT: icmp ult i8 %2, 27
}
define i1 @test48(i64 %x, i1 %b) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT: ret i1 true
+;
%1 = icmp ult i64 %x, 2305843009213693952
%2 = icmp ugt i64 %x, 2305843009213693951
%.b = or i1 %2, %b
%3 = or i1 %1, %.b
ret i1 %3
-; CHECK-LABEL: @test48(
-; CHECK-NEXT: ret i1 true
}
diff --git a/test/Transforms/InstCombine/phi-preserve-ir-flags.ll b/test/Transforms/InstCombine/phi-preserve-ir-flags.ll
new file mode 100644
index 000000000000..6e3ae8087cb8
--- /dev/null
+++ b/test/Transforms/InstCombine/phi-preserve-ir-flags.ll
@@ -0,0 +1,89 @@
+; RUN: opt < %s -instcombine -S -o - | FileCheck %s
+
+target datalayout = "e-m:e-p:32:32-i64:64-v128:64:128-a:0:32-n32-S64"
+
+; CHECK-LABEL: define float @func1(
+define float @func1(float %a, float %b, float %c, i1 %cond) {
+entry:
+ br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+ %sub0 = fsub fast float %a, %b
+ br label %cond.end
+
+cond.false:
+ %sub1 = fsub fast float %a, %c
+ br label %cond.end
+
+; The fast-math flags should always be transfered if possible.
+; CHECK-LABEL: cond.end
+; CHECK [[PHI:%[^ ]*]] = phi float [ %b, %cond.true ], [ %c, %cond.false ]
+; CHECK fsub fast float %a, [[PHI]]
+cond.end:
+ %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+ ret float %e
+}
+
+; CHECK-LABEL: define float @func2(
+define float @func2(float %a, float %b, float %c, i1 %cond) {
+entry:
+ br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+ %sub0 = fsub fast float %a, %b
+ br label %cond.end
+
+cond.false:
+ %sub1 = fsub float %a, %c
+ br label %cond.end
+
+; The fast-math flags should always be transfered if possible.
+; CHECK-LABEL: cond.end
+; CHECK [[PHI:%[^ ]*]] = phi float [ %b, %cond.true ], [ %c, %cond.false ]
+; CHECK fsub float %a, [[PHI]]
+cond.end:
+ %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+ ret float %e
+}
+
+; CHECK-LABEL: define float @func3(
+define float @func3(float %a, float %b, float %c, i1 %cond) {
+entry:
+ br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+ %sub0 = fsub fast float %a, 2.0
+ br label %cond.end
+
+cond.false:
+ %sub1 = fsub fast float %b, 2.0
+ br label %cond.end
+
+; CHECK-LABEL: cond.end
+; CHECK [[PHI:%[^ ]*]] = phi float [ %a, %cond.true ], [ %b, %cond.false ]
+; CHECK fadd fast float %a, [[PHI]]
+cond.end:
+ %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+ ret float %e
+}
+
+; CHECK-LABEL: define float @func4(
+define float @func4(float %a, float %b, float %c, i1 %cond) {
+entry:
+ br i1 %cond, label %cond.true, label %cond.false
+
+cond.true:
+ %sub0 = fsub fast float %a, 2.0
+ br label %cond.end
+
+cond.false:
+ %sub1 = fsub float %b, 2.0
+ br label %cond.end
+
+; CHECK-LABEL: cond.end
+; CHECK [[PHI:%[^ ]*]] = phi float [ %a, %cond.true ], [ %b, %cond.false ]
+; CHECK fadd float %a, [[PHI]]
+cond.end:
+ %e = phi float [ %sub0, %cond.true ], [ %sub1, %cond.false ]
+ ret float %e
+}
diff --git a/test/Transforms/InstCombine/phi.ll b/test/Transforms/InstCombine/phi.ll
index d0441d76d399..c417737fdf23 100644
--- a/test/Transforms/InstCombine/phi.ll
+++ b/test/Transforms/InstCombine/phi.ll
@@ -760,3 +760,122 @@ epilog:
; CHECK-NEXT: ret i1 %[[RES]]
}
+; CHECK-LABEL: phi_allnonzeroconstant
+; CHECK-NOT: phi i32
+; CHECK: ret i1 false
+define i1 @phi_allnonzeroconstant(i1 %c, i32 %a, i32 %b) {
+entry:
+ br i1 %c, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ br label %if.end
+
+if.else: ; preds = %entry
+ call void @dummy()
+
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ %x.0 = phi i32 [ 1, %if.then ], [ 2, %if.else ]
+ %or = or i32 %x.0, %a
+ %cmp1 = icmp eq i32 %or, 0
+ ret i1 %cmp1
+}
+
+declare void @dummy()
+
+; CHECK-LABEL: @phi_knownnonzero_eq
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 1, %if.then ]
+define i1 @phi_knownnonzero_eq(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+ %tobool = icmp slt i32 %n, %s
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* %P
+ %cmp = icmp eq i32 %n, %0
+ %1 = select i1 %cmp, i32 1, i32 2
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i32 [ %1, %if.then ], [ %n, %entry ]
+ %cmp1 = icmp eq i32 %a.0, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @phi_knownnonzero_ne
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 1, %if.then ]
+define i1 @phi_knownnonzero_ne(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+ %tobool = icmp slt i32 %n, %s
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %0 = load i32, i32* %P
+ %cmp = icmp eq i32 %n, %0
+ %1 = select i1 %cmp, i32 1, i32 2
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i32 [ %1, %if.then ], [ %n, %entry ]
+ %cmp1 = icmp ne i32 %a.0, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @phi_knownnonzero_eq_2
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 2, %if.else ]
+define i1 @phi_knownnonzero_eq_2(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+ %tobool = icmp slt i32 %n, %s
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ %tobool2 = icmp slt i32 %n, %s
+ br i1 %tobool2, label %if.else, label %if.end
+
+if.else: ; preds = %entry
+ %0 = load i32, i32* %P
+ %cmp = icmp eq i32 %n, %0
+ %1 = select i1 %cmp, i32 1, i32 2
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i32 [ %1, %if.else], [ %n, %entry ], [2, %if.then]
+ %cmp1 = icmp eq i32 %a.0, 0
+ ret i1 %cmp1
+}
+
+; CHECK-LABEL: @phi_knownnonzero_ne_2
+; CHECK-LABEL: if.then:
+; CHECK-NOT: select
+; CHECK-LABEL: if.end:
+; CHECK: phi i32 [ 2, %if.else ]
+define i1 @phi_knownnonzero_ne_2(i32 %n, i32 %s, i32* nocapture readonly %P) {
+entry:
+ %tobool = icmp slt i32 %n, %s
+ br i1 %tobool, label %if.then, label %if.end
+
+if.then:
+ %tobool2 = icmp slt i32 %n, %s
+ br i1 %tobool2, label %if.else, label %if.end
+
+if.else: ; preds = %entry
+ %0 = load i32, i32* %P
+ %cmp = icmp eq i32 %n, %0
+ %1 = select i1 %cmp, i32 1, i32 2
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %a.0 = phi i32 [ %1, %if.else], [ %n, %entry ], [2, %if.then]
+ %cmp1 = icmp ne i32 %a.0, 0
+ ret i1 %cmp1
+}
diff --git a/test/Transforms/InstCombine/pow-4.ll b/test/Transforms/InstCombine/pow-4.ll
index 76ef4c5de923..911ab4d94c6a 100644
--- a/test/Transforms/InstCombine/pow-4.ll
+++ b/test/Transforms/InstCombine/pow-4.ll
@@ -7,40 +7,40 @@ declare double @llvm.pow.f64(double, double)
declare float @llvm.pow.f32(float, float)
; pow(x, 4.0f)
-define float @test_simplify_4f(float %x) #0 {
+define float @test_simplify_4f(float %x) {
; CHECK-LABEL: @test_simplify_4f(
; CHECK-NOT: pow
; CHECK-NEXT: %1 = fmul float %x, %x
; CHECK-NEXT: %2 = fmul float %1, %1
; CHECK-NEXT: ret float %2
- %1 = call float @llvm.pow.f32(float %x, float 4.000000e+00)
+ %1 = call fast float @llvm.pow.f32(float %x, float 4.000000e+00)
ret float %1
}
; pow(x, 3.0)
-define double @test_simplify_3(double %x) #0 {
+define double @test_simplify_3(double %x) {
; CHECK-LABEL: @test_simplify_3(
; CHECK-NOT: pow
; CHECK-NEXT: %1 = fmul double %x, %x
; CHECK-NEXT: %2 = fmul double %1, %x
; CHECK-NEXT: ret double %2
- %1 = call double @llvm.pow.f64(double %x, double 3.000000e+00)
+ %1 = call fast double @llvm.pow.f64(double %x, double 3.000000e+00)
ret double %1
}
; pow(x, 4.0)
-define double @test_simplify_4(double %x) #0 {
+define double @test_simplify_4(double %x) {
; CHECK-LABEL: @test_simplify_4(
; CHECK-NOT: pow
; CHECK-NEXT: %1 = fmul double %x, %x
; CHECK-NEXT: %2 = fmul double %1, %1
; CHECK-NEXT: ret double %2
- %1 = call double @llvm.pow.f64(double %x, double 4.000000e+00)
+ %1 = call fast double @llvm.pow.f64(double %x, double 4.000000e+00)
ret double %1
}
; pow(x, 15.0)
-define double @test_simplify_15(double %x) #0 {
+define double @test_simplify_15(double %x) {
; CHECK-LABEL: @test_simplify_15(
; CHECK-NOT: pow
; CHECK-NEXT: %1 = fmul double %x, %x
@@ -49,12 +49,12 @@ define double @test_simplify_15(double %x) #0 {
; CHECK-NEXT: %4 = fmul double %3, %3
; CHECK-NEXT: %5 = fmul double %2, %4
; CHECK-NEXT: ret double %5
- %1 = call double @llvm.pow.f64(double %x, double 1.500000e+01)
+ %1 = call fast double @llvm.pow.f64(double %x, double 1.500000e+01)
ret double %1
}
; pow(x, -7.0)
-define double @test_simplify_neg_7(double %x) #0 {
+define double @test_simplify_neg_7(double %x) {
; CHECK-LABEL: @test_simplify_neg_7(
; CHECK-NOT: pow
; CHECK-NEXT: %1 = fmul double %x, %x
@@ -63,12 +63,12 @@ define double @test_simplify_neg_7(double %x) #0 {
; CHECK-NEXT: %4 = fmul double %1, %3
; CHECK-NEXT: %5 = fdiv double 1.000000e+00, %4
; CHECK-NEXT: ret double %5
- %1 = call double @llvm.pow.f64(double %x, double -7.000000e+00)
+ %1 = call fast double @llvm.pow.f64(double %x, double -7.000000e+00)
ret double %1
}
; pow(x, -19.0)
-define double @test_simplify_neg_19(double %x) #0 {
+define double @test_simplify_neg_19(double %x) {
; CHECK-LABEL: @test_simplify_neg_19(
; CHECK-NOT: pow
; CHECK-NEXT: %1 = fmul double %x, %x
@@ -79,22 +79,22 @@ define double @test_simplify_neg_19(double %x) #0 {
; CHECK-NEXT: %6 = fmul double %5, %x
; CHECK-NEXT: %7 = fdiv double 1.000000e+00, %6
; CHECK-NEXT: ret double %7
- %1 = call double @llvm.pow.f64(double %x, double -1.900000e+01)
+ %1 = call fast double @llvm.pow.f64(double %x, double -1.900000e+01)
ret double %1
}
; pow(x, 11.23)
-define double @test_simplify_11_23(double %x) #0 {
+define double @test_simplify_11_23(double %x) {
; CHECK-LABEL: @test_simplify_11_23(
; CHECK-NOT: fmul
-; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+; CHECK-NEXT: %1 = call fast double @llvm.pow.f64(double %x, double 1.123000e+01)
; CHECK-NEXT: ret double %1
- %1 = call double @llvm.pow.f64(double %x, double 1.123000e+01)
+ %1 = call fast double @llvm.pow.f64(double %x, double 1.123000e+01)
ret double %1
}
; pow(x, 32.0)
-define double @test_simplify_32(double %x) #0 {
+define double @test_simplify_32(double %x) {
; CHECK-LABEL: @test_simplify_32(
; CHECK-NOT: pow
; CHECK-NEXT: %1 = fmul double %x, %x
@@ -103,18 +103,17 @@ define double @test_simplify_32(double %x) #0 {
; CHECK-NEXT: %4 = fmul double %3, %3
; CHECK-NEXT: %5 = fmul double %4, %4
; CHECK-NEXT: ret double %5
- %1 = call double @llvm.pow.f64(double %x, double 3.200000e+01)
+ %1 = call fast double @llvm.pow.f64(double %x, double 3.200000e+01)
ret double %1
}
; pow(x, 33.0)
-define double @test_simplify_33(double %x) #0 {
+define double @test_simplify_33(double %x) {
; CHECK-LABEL: @test_simplify_33(
; CHECK-NOT: fmul
-; CHECK-NEXT: %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+; CHECK-NEXT: %1 = call fast double @llvm.pow.f64(double %x, double 3.300000e+01)
; CHECK-NEXT: ret double %1
- %1 = call double @llvm.pow.f64(double %x, double 3.300000e+01)
+ %1 = call fast double @llvm.pow.f64(double %x, double 3.300000e+01)
ret double %1
}
-attributes #0 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+neon" "unsafe-fp-math"="true" "use-soft-float"="false" }
diff --git a/test/Transforms/InstCombine/pr20678.ll b/test/Transforms/InstCombine/pr20678.ll
new file mode 100644
index 000000000000..4b5fac79449b
--- /dev/null
+++ b/test/Transforms/InstCombine/pr20678.ll
@@ -0,0 +1,8 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define i1 @test1() {
+entry:
+ ret i1 icmp ne (i16 bitcast (<16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false> to i16), i16 0)
+}
+; CHECK-LABEL: define i1 @test1(
+; CHECK: ret i1 true
diff --git a/test/Transforms/InstCombine/pr21210.ll b/test/Transforms/InstCombine/pr21210.ll
index 1db87949dda0..ac229a89ca50 100644
--- a/test/Transforms/InstCombine/pr21210.ll
+++ b/test/Transforms/InstCombine/pr21210.ll
@@ -31,9 +31,10 @@ entry:
%cmp = icmp ult i32 %len, 4
br i1 %cmp, label %bb, label %b1
bb:
- %cond = select i1 %cmp, i32 %len, i32 8
- %cmp11 = icmp eq i32 %cond, 8
- br i1 %cmp11, label %b0, label %b1
+ %cmp2 = icmp ult i32 %0, 2
+ %cond = select i1 %cmp2, i32 %len, i32 8
+ %cmp3 = icmp eq i32 %cond, 8
+ br i1 %cmp3, label %b0, label %b1
b0:
call void @foo(i32 %len)
diff --git a/test/Transforms/InstCombine/pr21651.ll b/test/Transforms/InstCombine/pr21651.ll
index d2b8d312d8c6..bc8fe6177262 100644
--- a/test/Transforms/InstCombine/pr21651.ll
+++ b/test/Transforms/InstCombine/pr21651.ll
@@ -1,24 +1,24 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; Provide legal integer types.
target datalayout = "n8:16:32:64"
-
define void @PR21651() {
+; CHECK-LABEL: @PR21651(
+; CHECK-NEXT: switch i1 false, label %out [
+; CHECK-NEXT: i1 false, label %out
+; CHECK-NEXT: i1 true, label %out
+; CHECK-NEXT: ]
+; CHECK: out:
+; CHECK-NEXT: ret void
+;
switch i2 0, label %out [
- i2 0, label %out
- i2 1, label %out
+ i2 0, label %out
+ i2 1, label %out
]
out:
ret void
}
-; CHECK-LABEL: define void @PR21651(
-; CHECK: switch i2 0, label %out [
-; CHECK: i2 0, label %out
-; CHECK: i2 1, label %out
-; CHECK: ]
-; CHECK: out: ; preds = %0, %0, %0
-; CHECK: ret void
-; CHECK: }
diff --git a/test/Transforms/InstCombine/pr26992.ll b/test/Transforms/InstCombine/pr26992.ll
new file mode 100644
index 000000000000..e5bfb5c0e40a
--- /dev/null
+++ b/test/Transforms/InstCombine/pr26992.ll
@@ -0,0 +1,37 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target triple = "x86_64-pc-windows-msvc"
+
+define i1 @test1(i8* %p) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %a = getelementptr i8, i8* %p, i64 1
+ invoke void @may_throw()
+ to label %invoke.cont unwind label %catch.dispatch
+
+invoke.cont:
+ %b = getelementptr inbounds i8, i8* %a, i64 1
+ invoke void @may_throw()
+ to label %exit unwind label %catch.dispatch
+
+catch.dispatch:
+ %c = phi i8* [ %b, %invoke.cont ], [ %a, %entry ]
+ %tmp1 = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %tmp2 = catchpad within %tmp1 [i8* null, i32 64, i8* null]
+ catchret from %tmp2 to label %exit
+
+exit:
+ %d = phi i8* [ %a, %invoke.cont ], [ %c, %catch ]
+ %cmp = icmp eq i8* %d, %a
+ ret i1 %cmp
+}
+
+; CHECK-LABEL: define i1 @test1(
+; CHECK: %[[gep_a:.*]] = getelementptr i8, i8* %p, i64 1
+; CHECK: %[[gep_b:.*]] = getelementptr inbounds i8, i8* %p, i64 2
+; CHECK: phi i8* [ %[[gep_b]], {{.*}} ], [ %[[gep_a]], {{.*}} ]
+; CHECK: %tmp1 = catchswitch within none [label %catch] unwind to caller
+
+declare void @may_throw()
+
+declare i32 @__CxxFrameHandler3(...)
diff --git a/test/Transforms/InstCombine/pr26993.ll b/test/Transforms/InstCombine/pr26993.ll
new file mode 100644
index 000000000000..14b33d10cc3a
--- /dev/null
+++ b/test/Transforms/InstCombine/pr26993.ll
@@ -0,0 +1,24 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+
+define double @test1() {
+ %sin = call double @__sinpi(double 1.0)
+ ret double %sin
+}
+
+; CHECK-LABEL: define double @test1(
+; CHECK: %[[sin:.*]] = call double @__sinpi(double 1.000000e+00)
+; CHECK-NEXT: ret double %[[sin]]
+
+define double @test2() {
+ %cos = call double @__cospi(double 1.0)
+ ret double %cos
+}
+
+; CHECK-LABEL: define double @test2(
+; CHECK: %[[cos:.*]] = call double @__cospi(double 1.000000e+00)
+; CHECK-NEXT: ret double %[[cos]]
+
+declare double @__sinpi(double %x) #0
+declare double @__cospi(double %x) #0
+
+attributes #0 = { readnone nounwind }
diff --git a/test/Transforms/InstCombine/pr27236.ll b/test/Transforms/InstCombine/pr27236.ll
new file mode 100644
index 000000000000..0b086cd7b8ca
--- /dev/null
+++ b/test/Transforms/InstCombine/pr27236.ll
@@ -0,0 +1,19 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define float @test1(i32 %scale) {
+entry:
+ %tmp1 = icmp sgt i32 1, %scale
+ %tmp2 = select i1 %tmp1, i32 1, i32 %scale
+ %tmp3 = sitofp i32 %tmp2 to float
+ %tmp4 = icmp sgt i32 %tmp2, 0
+ %sel = select i1 %tmp4, float %tmp3, float 0.000000e+00
+ ret float %sel
+}
+
+; CHECK-LABEL: define float @test1(
+; CHECK: %[[tmp1:.*]] = icmp slt i32 %scale, 1
+; CHECK: %[[tmp2:.*]] = select i1 %[[tmp1]], i32 1, i32 %scale
+; CHECK: %[[tmp3:.*]] = sitofp i32 %[[tmp2]] to float
+; CHECK: %[[tmp4:.*]] = icmp sgt i32 %[[tmp2]], 0
+; CHECK: %[[sel:.*]] = select i1 %[[tmp4]], float %[[tmp3]], float 0.000000e+00
+; CHECK: ret float %[[sel]]
diff --git a/test/Transforms/InstCombine/pr27332.ll b/test/Transforms/InstCombine/pr27332.ll
new file mode 100644
index 000000000000..87e440eed1cf
--- /dev/null
+++ b/test/Transforms/InstCombine/pr27332.ll
@@ -0,0 +1,23 @@
+; RUN: opt -instcombine -S -o - < %s | FileCheck %s
+declare <4 x float> @llvm.fabs.v4f32(<4 x float>)
+
+define <4 x i1> @test1(<4 x float> %V) {
+entry:
+ %abs = call <4 x float> @llvm.fabs.v4f32(<4 x float> %V)
+ %cmp = fcmp olt <4 x float> %abs, zeroinitializer
+ ret <4 x i1> %cmp
+}
+; CHECK-LABEL: define <4 x i1> @test1(
+; CHECK: ret <4 x i1> zeroinitializer
+
+declare float @fabsf()
+
+define i1 @test2() {
+ %call = call float @fabsf()
+ %cmp = fcmp olt float %call, 0.000000e+00
+ ret i1 %cmp
+}
+; CHECK-LABEL: define i1 @test2(
+; CHECK: %[[call:.*]] = call float @fabsf()
+; CHECK: %[[cmp:.*]] = fcmp olt float %[[call]], 0.000000e+00
+; CHECK: ret i1 %[[cmp]]
diff --git a/test/Transforms/InstCombine/pr28143.ll b/test/Transforms/InstCombine/pr28143.ll
new file mode 100644
index 000000000000..9ef273e5ed49
--- /dev/null
+++ b/test/Transforms/InstCombine/pr28143.ll
@@ -0,0 +1,12 @@
+; RUN: opt -S -instcombine < %s | FileCheck %s
+
+define void @test1() {
+entry:
+ call void @tan()
+ ret void
+}
+; CHECK-LABEL: define void @test1(
+; CHECK: call void @tan()
+; CHECK-NEXT: ret void
+
+declare void @tan()
diff --git a/test/Transforms/InstCombine/pr8547.ll b/test/Transforms/InstCombine/pr8547.ll
deleted file mode 100644
index 6d74b4002c11..000000000000
--- a/test/Transforms/InstCombine/pr8547.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; Converting the 2 shifts to SHL 6 without the AND is wrong. PR 8547.
-
-@g_2 = global i32 0, align 4
-@.str = constant [10 x i8] c"g_2 = %d\0A\00"
-
-declare i32 @printf(i8*, ...)
-
-define i32 @main() nounwind {
-codeRepl:
- br label %for.cond
-
-for.cond: ; preds = %for.cond, %codeRepl
- %storemerge = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ]
- store i32 %storemerge, i32* @g_2, align 4
- %shl = shl i32 %storemerge, 30
- %conv2 = lshr i32 %shl, 24
-; CHECK: %0 = shl nuw nsw i32 %storemerge, 6
-; CHECK: %conv2 = and i32 %0, 64
- %tobool = icmp eq i32 %conv2, 0
- br i1 %tobool, label %for.cond, label %codeRepl2
-
-codeRepl2: ; preds = %for.cond
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([10 x i8], [10 x i8]* @.str, i64 0, i64 0), i32 %conv2) nounwind
- ret i32 0
-}
diff --git a/test/Transforms/InstCombine/prevent-cmp-merge.ll b/test/Transforms/InstCombine/prevent-cmp-merge.ll
new file mode 100644
index 000000000000..ab37c7d56232
--- /dev/null
+++ b/test/Transforms/InstCombine/prevent-cmp-merge.ll
@@ -0,0 +1,41 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+;
+; This test makes sure that InstCombine does not replace the sequence of
+; xor/sub instruction followed by cmp instruction into a single cmp instruction
+; if there is more than one use of xor/sub.
+
+define zeroext i1 @test1(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: %xor = xor i32 %lhs, 5
+; CHECK-NEXT: %cmp1 = icmp eq i32 %xor, 10
+
+ %xor = xor i32 %lhs, 5
+ %cmp1 = icmp eq i32 %xor, 10
+ %cmp2 = icmp eq i32 %xor, %rhs
+ %sel = or i1 %cmp1, %cmp2
+ ret i1 %sel
+}
+
+define zeroext i1 @test2(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: %xor = xor i32 %lhs, %rhs
+; CHECK-NEXT: %cmp1 = icmp eq i32 %xor, 0
+
+ %xor = xor i32 %lhs, %rhs
+ %cmp1 = icmp eq i32 %xor, 0
+ %cmp2 = icmp eq i32 %xor, 32
+ %sel = xor i1 %cmp1, %cmp2
+ ret i1 %sel
+}
+
+define zeroext i1 @test3(i32 %lhs, i32 %rhs) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: %sub = sub nsw i32 %lhs, %rhs
+; CHECK-NEXT: %cmp1 = icmp eq i32 %sub, 0
+
+ %sub = sub nsw i32 %lhs, %rhs
+ %cmp1 = icmp eq i32 %sub, 0
+ %cmp2 = icmp eq i32 %sub, 31
+ %sel = or i1 %cmp1, %cmp2
+ ret i1 %sel
+}
diff --git a/test/Transforms/InstCombine/printf-1.ll b/test/Transforms/InstCombine/printf-1.ll
index 75e11ce7b7b4..2a513d0de248 100644
--- a/test/Transforms/InstCombine/printf-1.ll
+++ b/test/Transforms/InstCombine/printf-1.ll
@@ -7,6 +7,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
@hello_world = constant [13 x i8] c"hello world\0A\00"
@h = constant [2 x i8] c"h\00"
+@h2 = constant [3 x i8] c"%%\00"
@percent = constant [2 x i8] c"%\00"
@percent_c = constant [3 x i8] c"%c\00"
@percent_d = constant [3 x i8] c"%d\00"
@@ -38,6 +39,17 @@ define void @test_simplify2() {
; CHECK-NEXT: ret void
}
+; Special case: printf("%%") -> putchar('%').
+
+define void @test_simplify2b() {
+; CHECK-LABEL: @test_simplify2b(
+ %fmt = getelementptr [3 x i8], [3 x i8]* @h2, i32 0, i32 0
+ call i32 (i8*, ...) @printf(i8* %fmt)
+; CHECK-NEXT: call i32 @putchar(i32 37)
+ ret void
+; CHECK-NEXT: ret void
+}
+
define void @test_simplify3() {
; CHECK-LABEL: @test_simplify3(
%fmt = getelementptr [2 x i8], [2 x i8]* @percent, i32 0, i32 0
diff --git a/test/Transforms/InstCombine/printf-2.ll b/test/Transforms/InstCombine/printf-2.ll
index d6769856e3d8..fbd5b1bb96c4 100644
--- a/test/Transforms/InstCombine/printf-2.ll
+++ b/test/Transforms/InstCombine/printf-2.ll
@@ -7,6 +7,8 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
@hello_world = constant [13 x i8] c"hello world\0A\00"
@h = constant [2 x i8] c"h\00"
@percent_s = constant [4 x i8] c"%s\0A\00"
+@format_str = constant [3 x i8] c"%s\00"
+@charstr = constant [2 x i8] c"a\00"
declare void @printf(i8*, ...)
@@ -39,3 +41,13 @@ define void @test_simplify6() {
ret void
; CHECK-NEXT: ret void
}
+
+define void @test_simplify7() {
+; CHECK-LABEL: @test_simplify7(
+ %fmt = getelementptr [3 x i8], [3 x i8]* @format_str, i32 0, i32 0
+ %str = getelementptr [2 x i8], [2 x i8]* @charstr, i32 0, i32 0
+ call void (i8*, ...) @printf(i8* %fmt, i8* %str)
+; CHECK-NEXT: call i32 @putchar(i32 97)
+ ret void
+; CHECK-NEXT: ret void
+}
diff --git a/test/Transforms/InstCombine/r600-intrinsics.ll b/test/Transforms/InstCombine/r600-intrinsics.ll
deleted file mode 100644
index 1db6b0d28bf5..000000000000
--- a/test/Transforms/InstCombine/r600-intrinsics.ll
+++ /dev/null
@@ -1,47 +0,0 @@
-; RUN: opt -instcombine -S < %s | FileCheck %s
-
-declare float @llvm.AMDGPU.rcp.f32(float) nounwind readnone
-declare double @llvm.AMDGPU.rcp.f64(double) nounwind readnone
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_1
-; CHECK-NEXT: ret float 1.000000e+00
-define float @test_constant_fold_rcp_f32_1() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 1.0) nounwind readnone
- ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_1
-; CHECK-NEXT: ret double 1.000000e+00
-define double @test_constant_fold_rcp_f64_1() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 1.0) nounwind readnone
- ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_half
-; CHECK-NEXT: ret float 2.000000e+00
-define float @test_constant_fold_rcp_f32_half() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 0.5) nounwind readnone
- ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_half
-; CHECK-NEXT: ret double 2.000000e+00
-define double @test_constant_fold_rcp_f64_half() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 0.5) nounwind readnone
- ret double %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f32_43
-; CHECK-NEXT: call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01)
-define float @test_constant_fold_rcp_f32_43() nounwind {
- %val = call float @llvm.AMDGPU.rcp.f32(float 4.300000e+01) nounwind readnone
- ret float %val
-}
-
-; CHECK-LABEL: @test_constant_fold_rcp_f64_43
-; CHECK-NEXT: call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01)
-define double @test_constant_fold_rcp_f64_43() nounwind {
- %val = call double @llvm.AMDGPU.rcp.f64(double 4.300000e+01) nounwind readnone
- ret double %val
-}
-
diff --git a/test/Transforms/InstCombine/rem.ll b/test/Transforms/InstCombine/rem.ll
index 0595a67393a6..ae331eed1a58 100644
--- a/test/Transforms/InstCombine/rem.ll
+++ b/test/Transforms/InstCombine/rem.ll
@@ -1,4 +1,4 @@
-; This test makes sure that urem instructions are properly eliminated.
+; This test makes sure that rem instructions are properly eliminated.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
; END.
@@ -25,6 +25,24 @@ define i32 @test3(i32 %A) {
ret i32 %B
}
+define <2 x i32> @vec_power_of_2_constant_splat_divisor(<2 x i32> %A) {
+; CHECK-LABEL: @vec_power_of_2_constant_splat_divisor(
+; CHECK-NEXT: [[B:%.*]] = and <2 x i32> %A, <i32 7, i32 7>
+; CHECK-NEXT: ret <2 x i32> [[B]]
+;
+ %B = urem <2 x i32> %A, <i32 8, i32 8>
+ ret <2 x i32> %B
+}
+
+define <2 x i19> @weird_vec_power_of_2_constant_splat_divisor(<2 x i19> %A) {
+; CHECK-LABEL: @weird_vec_power_of_2_constant_splat_divisor(
+; CHECK-NEXT: [[B:%.*]] = and <2 x i19> %A, <i19 7, i19 7>
+; CHECK-NEXT: ret <2 x i19> [[B]]
+;
+ %B = urem <2 x i19> %A, <i19 8, i19 8>
+ ret <2 x i19> %B
+}
+
define i1 @test3a(i32 %A) {
; CHECK-LABEL: @test3a(
; CHECK-NEXT: [[AND:%.*]] = and i32 %A, 7
@@ -213,3 +231,142 @@ define <2 x i64> @test20(<2 x i64> %X, <2 x i1> %C) {
%R = urem <2 x i64> %V, <i64 2, i64 3>
ret <2 x i64> %R
}
+
+define i32 @test21(i1 %c0, i32* %val) {
+; CHECK-LABEL: @test21(
+entry:
+ br i1 %c0, label %if.then, label %if.end
+
+if.then:
+; CHECK: if.then:
+; CHECK-NEXT: %v = load volatile i32, i32* %val, align 4
+; CHECK-NEXT: %phitmp = srem i32 %v, 5
+
+ %v = load volatile i32, i32* %val
+ br label %if.end
+
+if.end:
+; CHECK: if.end:
+; CHECK-NEXT: %lhs = phi i32 [ %phitmp, %if.then ], [ 0, %entry ]
+; CHECK-NEXT: ret i32 %lhs
+
+ %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+ %rem = srem i32 %lhs, 5
+ ret i32 %rem
+}
+
+@a = common global [5 x i16] zeroinitializer, align 2
+@b = common global i16 0, align 2
+
+define i32 @pr27968_0(i1 %c0, i32* %val) {
+; CHECK-LABEL: @pr27968_0(
+entry:
+ br i1 %c0, label %if.then, label %if.end
+
+if.then:
+ %v = load volatile i32, i32* %val
+ br label %if.end
+
+; CHECK: if.then:
+; CHECK-NOT: srem
+; CHECK: br label %if.end
+
+if.end:
+ %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+ br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+; CHECK: rem.is.safe:
+; CHECK-NEXT: %rem = srem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+; CHECK-NEXT: ret i32 %rem
+
+ %rem = srem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+ ret i32 %rem
+
+rem.is.unsafe:
+ ret i32 0
+}
+
+define i32 @pr27968_1(i1 %c0, i1 %always_false, i32* %val) {
+; CHECK-LABEL: @pr27968_1(
+entry:
+ br i1 %c0, label %if.then, label %if.end
+
+if.then:
+ %v = load volatile i32, i32* %val
+ br label %if.end
+
+; CHECK: if.then:
+; CHECK-NOT: srem
+; CHECK: br label %if.end
+
+if.end:
+ %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+ br i1 %always_false, label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+ %rem = srem i32 %lhs, -2147483648
+ ret i32 %rem
+
+; CHECK: rem.is.safe:
+; CHECK-NEXT: %rem = srem i32 %lhs, -2147483648
+; CHECK-NEXT: ret i32 %rem
+
+rem.is.unsafe:
+ ret i32 0
+}
+
+define i32 @pr27968_2(i1 %c0, i32* %val) {
+; CHECK-LABEL: @pr27968_2(
+entry:
+ br i1 %c0, label %if.then, label %if.end
+
+if.then:
+ %v = load volatile i32, i32* %val
+ br label %if.end
+
+; CHECK: if.then:
+; CHECK-NOT: urem
+; CHECK: br label %if.end
+
+if.end:
+ %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+ br i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b), label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+; CHECK: rem.is.safe:
+; CHECK-NEXT: %rem = urem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+; CHECK-NEXT: ret i32 %rem
+
+ %rem = urem i32 %lhs, zext (i1 icmp eq (i16* getelementptr inbounds ([5 x i16], [5 x i16]* @a, i64 0, i64 4), i16* @b) to i32)
+ ret i32 %rem
+
+rem.is.unsafe:
+ ret i32 0
+}
+
+define i32 @pr27968_3(i1 %c0, i1 %always_false, i32* %val) {
+; CHECK-LABEL: @pr27968_3(
+entry:
+ br i1 %c0, label %if.then, label %if.end
+
+if.then:
+ %v = load volatile i32, i32* %val
+ br label %if.end
+
+; CHECK: if.then:
+; CHECK-NEXT: %v = load volatile i32, i32* %val, align 4
+; CHECK-NEXT: %phitmp = and i32 %v, 2147483647
+; CHECK-NEXT: br label %if.end
+
+if.end:
+ %lhs = phi i32 [ %v, %if.then ], [ 5, %entry ]
+ br i1 %always_false, label %rem.is.safe, label %rem.is.unsafe
+
+rem.is.safe:
+ %rem = urem i32 %lhs, -2147483648
+ ret i32 %rem
+
+rem.is.unsafe:
+ ret i32 0
+}
diff --git a/test/Transforms/InstCombine/select-implied.ll b/test/Transforms/InstCombine/select-implied.ll
new file mode 100644
index 000000000000..2100e3eae008
--- /dev/null
+++ b/test/Transforms/InstCombine/select-implied.ll
@@ -0,0 +1,123 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; A == B implies A >u B is false.
+; CHECK-LABEL: @test1
+; CHECK-NOT: select
+; CHECK: call void @foo(i32 10)
+define void @test1(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %end
+
+taken:
+ %cmp2 = icmp ugt i32 %a, %b
+ %c = select i1 %cmp2, i32 0, i32 10
+ call void @foo(i32 %c)
+ br label %end
+
+end:
+ ret void
+}
+
+; If A == B is false then A != B is true.
+; CHECK-LABEL: @test2
+; CHECK-NOT: select
+; CHECK: call void @foo(i32 20)
+define void @test2(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %end, label %taken
+
+taken:
+ %cmp2 = icmp ne i32 %a, %b
+ %c = select i1 %cmp2, i32 20, i32 0
+ call void @foo(i32 %c)
+ br label %end
+
+end:
+ ret void
+}
+
+; A >u 10 implies A >u 10 is true.
+; CHECK-LABEL: @test3
+; CHECK-NOT: select
+; CHECK: call void @foo(i32 30)
+define void @test3(i32 %a) {
+ %cmp1 = icmp ugt i32 %a, 10
+ br i1 %cmp1, label %taken, label %end
+
+taken:
+ %cmp2 = icmp ugt i32 %a, 10
+ %c = select i1 %cmp2, i32 30, i32 0
+ call void @foo(i32 %c)
+ br label %end
+
+end:
+ ret void
+}
+
+; CHECK-LABEL: @PR23333
+; CHECK-NOT: select
+; CHECK: ret i8 1
+define i8 @PR23333(i8 addrspace(1)* %ptr) {
+ %cmp = icmp eq i8 addrspace(1)* %ptr, null
+ br i1 %cmp, label %taken, label %end
+
+taken:
+ %cmp2 = icmp ne i8 addrspace(1)* %ptr, null
+ %res = select i1 %cmp2, i8 2, i8 1
+ ret i8 %res
+
+end:
+ ret i8 0
+}
+
+; We know the condition of the select is true based on a dominating condition.
+; Therefore, we can replace %cond with %len. However, now the inner icmp is
+; always false and can be elided.
+; CHECK-LABEL: @test4
+; CHECK-NOT: select
+define void @test4(i32 %len) {
+entry:
+ %0 = call i32 @bar(i32 %len);
+ %cmp = icmp ult i32 %len, 4
+ br i1 %cmp, label %bb, label %b1
+bb:
+ %cond = select i1 %cmp, i32 %len, i32 8
+; CHECK-NOT: %cmp11 = icmp eq i32 %{{.*}}, 8
+ %cmp11 = icmp eq i32 %cond, 8
+; CHECK: br i1 false, label %b0, label %b1
+ br i1 %cmp11, label %b0, label %b1
+
+b0:
+ call void @foo(i32 %len)
+ br label %b1
+
+b1:
+; CHECK: phi i32 [ %len, %bb ], [ undef, %b0 ], [ %0, %entry ]
+ %1 = phi i32 [ %cond, %bb ], [ undef, %b0 ], [ %0, %entry ]
+ br label %ret
+
+ret:
+ call void @foo(i32 %1)
+ ret void
+}
+
+; A >u 10 implies A >u 9 is true.
+; CHECK-LABEL: @test5
+; CHECK-NOT: select
+; CHECK: call void @foo(i32 30)
+define void @test5(i32 %a) {
+ %cmp1 = icmp ugt i32 %a, 10
+ br i1 %cmp1, label %taken, label %end
+
+taken:
+ %cmp2 = icmp ugt i32 %a, 9
+ %c = select i1 %cmp2, i32 30, i32 0
+ call void @foo(i32 %c)
+ br label %end
+
+end:
+ ret void
+}
+
+declare void @foo(i32)
+declare i32 @bar(i32)
diff --git a/test/Transforms/InstCombine/select.ll b/test/Transforms/InstCombine/select.ll
index fdf1199a66fb..e0e7bfccff93 100644
--- a/test/Transforms/InstCombine/select.ll
+++ b/test/Transforms/InstCombine/select.ll
@@ -1,18 +1,19 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
-; This test makes sure that these instructions are properly eliminated.
+
; PR1822
target datalayout = "e-p:64:64-p1:16:16-p2:32:32:32-p3:64:64:64"
define i32 @test1(i32 %A, i32 %B) {
- %C = select i1 false, i32 %A, i32 %B
+ %C = select i1 false, i32 %A, i32 %B
ret i32 %C
; CHECK-LABEL: @test1(
; CHECK: ret i32 %B
}
define i32 @test2(i32 %A, i32 %B) {
- %C = select i1 true, i32 %A, i32 %B
+ %C = select i1 true, i32 %A, i32 %B
ret i32 %C
; CHECK-LABEL: @test2(
; CHECK: ret i32 %A
@@ -21,7 +22,7 @@ define i32 @test2(i32 %A, i32 %B) {
define i32 @test3(i1 %C, i32 %I) {
; V = I
- %V = select i1 %C, i32 %I, i32 %I
+ %V = select i1 %C, i32 %I, i32 %I
ret i32 %V
; CHECK-LABEL: @test3(
; CHECK: ret i32 %I
@@ -29,7 +30,7 @@ define i32 @test3(i1 %C, i32 %I) {
define i1 @test4(i1 %C) {
; V = C
- %V = select i1 %C, i1 true, i1 false
+ %V = select i1 %C, i1 true, i1 false
ret i1 %V
; CHECK-LABEL: @test4(
; CHECK: ret i1 %C
@@ -37,16 +38,16 @@ define i1 @test4(i1 %C) {
define i1 @test5(i1 %C) {
; V = !C
- %V = select i1 %C, i1 false, i1 true
+ %V = select i1 %C, i1 false, i1 true
ret i1 %V
; CHECK-LABEL: @test5(
; CHECK: xor i1 %C, true
; CHECK: ret i1
}
-define i32 @test6(i1 %C) {
+define i32 @test6(i1 %C) {
; V = cast C to int
- %V = select i1 %C, i32 1, i32 0
+ %V = select i1 %C, i32 1, i32 0
ret i32 %V
; CHECK-LABEL: @test6(
; CHECK: %V = zext i1 %C to i32
@@ -54,46 +55,164 @@ define i32 @test6(i1 %C) {
}
define i1 @test7(i1 %C, i1 %X) {
- ; R = or C, X
- %R = select i1 %C, i1 true, i1 %X
- ret i1 %R
; CHECK-LABEL: @test7(
-; CHECK: %R = or i1 %C, %X
-; CHECK: ret i1 %R
+; CHECK-NEXT: [[R:%.*]] = or i1 %C, %X
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %R = select i1 %C, i1 true, i1 %X
+ ret i1 %R
+}
+
+define <2 x i1> @test7vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @test7vec(
+; CHECK-NEXT: [[R:%.*]] = or <2 x i1> %C, %X
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %R = select <2 x i1> %C, <2 x i1> <i1 true, i1 true>, <2 x i1> %X
+ ret <2 x i1> %R
}
define i1 @test8(i1 %C, i1 %X) {
- ; R = and C, X
- %R = select i1 %C, i1 %X, i1 false
- ret i1 %R
; CHECK-LABEL: @test8(
-; CHECK: %R = and i1 %C, %X
-; CHECK: ret i1 %R
+; CHECK-NEXT: [[R:%.*]] = and i1 %C, %X
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %R = select i1 %C, i1 %X, i1 false
+ ret i1 %R
+}
+
+define <2 x i1> @test8vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @test8vec(
+; CHECK-NEXT: [[R:%.*]] = and <2 x i1> %C, %X
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> <i1 false, i1 false>
+ ret <2 x i1> %R
}
define i1 @test9(i1 %C, i1 %X) {
- ; R = and !C, X
- %R = select i1 %C, i1 false, i1 %X
- ret i1 %R
; CHECK-LABEL: @test9(
-; CHECK: xor i1 %C, true
-; CHECK: %R = and i1
-; CHECK: ret i1 %R
+; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 %C, true
+; CHECK-NEXT: [[R:%.*]] = and i1 [[NOT_C]], %X
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %R = select i1 %C, i1 false, i1 %X
+ ret i1 %R
+}
+
+define <2 x i1> @test9vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @test9vec(
+; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> %C, <i1 true, i1 true>
+; CHECK-NEXT: [[R:%.*]] = and <2 x i1> [[NOT_C]], %X
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %R = select <2 x i1> %C, <2 x i1> <i1 false, i1 false>, <2 x i1> %X
+ ret <2 x i1> %R
}
define i1 @test10(i1 %C, i1 %X) {
- ; R = or !C, X
- %R = select i1 %C, i1 %X, i1 true
- ret i1 %R
; CHECK-LABEL: @test10(
-; CHECK: xor i1 %C, true
-; CHECK: %R = or i1
-; CHECK: ret i1 %R
+; CHECK-NEXT: [[NOT_C:%.*]] = xor i1 %C, true
+; CHECK-NEXT: [[R:%.*]] = or i1 [[NOT_C]], %X
+; CHECK-NEXT: ret i1 [[R]]
+;
+ %R = select i1 %C, i1 %X, i1 true
+ ret i1 %R
+}
+
+define <2 x i1> @test10vec(<2 x i1> %C, <2 x i1> %X) {
+; CHECK-LABEL: @test10vec(
+; CHECK-NEXT: [[NOT_C:%.*]] = xor <2 x i1> %C, <i1 true, i1 true>
+; CHECK-NEXT: [[R:%.*]] = or <2 x i1> [[NOT_C]], %X
+; CHECK-NEXT: ret <2 x i1> [[R]]
+;
+ %R = select <2 x i1> %C, <2 x i1> %X, <2 x i1> <i1 true, i1 true>
+ ret <2 x i1> %R
+}
+
+define i1 @test23(i1 %a, i1 %b) {
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: [[C:%.*]] = and i1 %a, %b
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %c = select i1 %a, i1 %b, i1 %a
+ ret i1 %c
+}
+
+define <2 x i1> @test23vec(<2 x i1> %a, <2 x i1> %b) {
+; CHECK-LABEL: @test23vec(
+; CHECK-NEXT: [[C:%.*]] = and <2 x i1> %a, %b
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %c = select <2 x i1> %a, <2 x i1> %b, <2 x i1> %a
+ ret <2 x i1> %c
+}
+
+define i1 @test24(i1 %a, i1 %b) {
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: [[C:%.*]] = or i1 %a, %b
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %c = select i1 %a, i1 %a, i1 %b
+ ret i1 %c
+}
+
+define <2 x i1> @test24vec(<2 x i1> %a, <2 x i1> %b) {
+; CHECK-LABEL: @test24vec(
+; CHECK-NEXT: [[C:%.*]] = or <2 x i1> %a, %b
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %c = select <2 x i1> %a, <2 x i1> %a, <2 x i1> %b
+ ret <2 x i1> %c
+}
+
+define i1 @test62(i1 %A, i1 %B) {
+; CHECK-LABEL: @test62(
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 %A, true
+; CHECK-NEXT: [[C:%.*]] = and i1 [[NOT]], %B
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %not = xor i1 %A, true
+ %C = select i1 %A, i1 %not, i1 %B
+ ret i1 %C
+}
+
+define <2 x i1> @test62vec(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test62vec(
+; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> %A, <i1 true, i1 true>
+; CHECK-NEXT: [[C:%.*]] = and <2 x i1> [[NOT]], %B
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %not = xor <2 x i1> %A, <i1 true, i1 true>
+ %C = select <2 x i1> %A, <2 x i1> %not, <2 x i1> %B
+ ret <2 x i1> %C
+}
+
+define i1 @test63(i1 %A, i1 %B) {
+; CHECK-LABEL: @test63(
+; CHECK-NEXT: [[NOT:%.*]] = xor i1 %A, true
+; CHECK-NEXT: [[C:%.*]] = or i1 %B, [[NOT]]
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %not = xor i1 %A, true
+ %C = select i1 %A, i1 %B, i1 %not
+ ret i1 %C
+}
+
+define <2 x i1> @test63vec(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test63vec(
+; CHECK-NEXT: [[NOT:%.*]] = xor <2 x i1> %A, <i1 true, i1 true>
+; CHECK-NEXT: [[C:%.*]] = or <2 x i1> %B, [[NOT]]
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %not = xor <2 x i1> %A, <i1 true, i1 true>
+ %C = select <2 x i1> %A, <2 x i1> %B, <2 x i1> %not
+ ret <2 x i1> %C
}
define i32 @test11(i32 %a) {
- %C = icmp eq i32 %a, 0
- %R = select i1 %C, i32 0, i32 1
+ %C = icmp eq i32 %a, 0
+ %R = select i1 %C, i32 0, i32 1
ret i32 %R
; CHECK-LABEL: @test11(
; CHECK: icmp ne i32 %a, 0
@@ -102,8 +221,8 @@ define i32 @test11(i32 %a) {
}
define i32 @test12(i1 %cond, i32 %a) {
- %b = or i32 %a, 1
- %c = select i1 %cond, i32 %b, i32 %a
+ %b = or i32 %a, 1
+ %c = select i1 %cond, i32 %b, i32 %a
ret i32 %c
; CHECK-LABEL: @test12(
; CHECK: %b = zext i1 %cond to i32
@@ -112,8 +231,8 @@ define i32 @test12(i1 %cond, i32 %a) {
}
define i32 @test12a(i1 %cond, i32 %a) {
- %b = ashr i32 %a, 1
- %c = select i1 %cond, i32 %b, i32 %a
+ %b = ashr i32 %a, 1
+ %c = select i1 %cond, i32 %b, i32 %a
ret i32 %c
; CHECK-LABEL: @test12a(
; CHECK: %b = zext i1 %cond to i32
@@ -122,8 +241,8 @@ define i32 @test12a(i1 %cond, i32 %a) {
}
define i32 @test12b(i1 %cond, i32 %a) {
- %b = ashr i32 %a, 1
- %c = select i1 %cond, i32 %a, i32 %b
+ %b = ashr i32 %a, 1
+ %c = select i1 %cond, i32 %a, i32 %b
ret i32 %c
; CHECK-LABEL: @test12b(
; CHECK: zext i1 %cond to i32
@@ -133,33 +252,33 @@ define i32 @test12b(i1 %cond, i32 %a) {
}
define i32 @test13(i32 %a, i32 %b) {
- %C = icmp eq i32 %a, %b
- %V = select i1 %C, i32 %a, i32 %b
+ %C = icmp eq i32 %a, %b
+ %V = select i1 %C, i32 %a, i32 %b
ret i32 %V
; CHECK-LABEL: @test13(
; CHECK: ret i32 %b
}
define i32 @test13a(i32 %a, i32 %b) {
- %C = icmp ne i32 %a, %b
- %V = select i1 %C, i32 %a, i32 %b
+ %C = icmp ne i32 %a, %b
+ %V = select i1 %C, i32 %a, i32 %b
ret i32 %V
; CHECK-LABEL: @test13a(
; CHECK: ret i32 %a
}
define i32 @test13b(i32 %a, i32 %b) {
- %C = icmp eq i32 %a, %b
- %V = select i1 %C, i32 %b, i32 %a
+ %C = icmp eq i32 %a, %b
+ %V = select i1 %C, i32 %b, i32 %a
ret i32 %V
; CHECK-LABEL: @test13b(
; CHECK: ret i32 %a
}
define i1 @test14a(i1 %C, i32 %X) {
- %V = select i1 %C, i32 %X, i32 0
+ %V = select i1 %C, i32 %X, i32 0
; (X < 1) | !C
- %R = icmp slt i32 %V, 1
+ %R = icmp slt i32 %V, 1
ret i1 %R
; CHECK-LABEL: @test14a(
; CHECK: icmp slt i32 %X, 1
@@ -169,9 +288,9 @@ define i1 @test14a(i1 %C, i32 %X) {
}
define i1 @test14b(i1 %C, i32 %X) {
- %V = select i1 %C, i32 0, i32 %X
+ %V = select i1 %C, i32 0, i32 %X
; (X < 1) | C
- %R = icmp slt i32 %V, 1
+ %R = icmp slt i32 %V, 1
ret i1 %R
; CHECK-LABEL: @test14b(
; CHECK: icmp slt i32 %X, 1
@@ -181,9 +300,9 @@ define i1 @test14b(i1 %C, i32 %X) {
;; Code sequence for (X & 16) ? 16 : 0
define i32 @test15a(i32 %X) {
- %t1 = and i32 %X, 16
- %t2 = icmp eq i32 %t1, 0
- %t3 = select i1 %t2, i32 0, i32 16
+ %t1 = and i32 %X, 16
+ %t2 = icmp eq i32 %t1, 0
+ %t3 = select i1 %t2, i32 0, i32 16
ret i32 %t3
; CHECK-LABEL: @test15a(
; CHECK: %t1 = and i32 %X, 16
@@ -192,9 +311,9 @@ define i32 @test15a(i32 %X) {
;; Code sequence for (X & 32) ? 0 : 24
define i32 @test15b(i32 %X) {
- %t1 = and i32 %X, 32
- %t2 = icmp eq i32 %t1, 0
- %t3 = select i1 %t2, i32 32, i32 0
+ %t1 = and i32 %X, 32
+ %t2 = icmp eq i32 %t1, 0
+ %t3 = select i1 %t2, i32 32, i32 0
ret i32 %t3
; CHECK-LABEL: @test15b(
; CHECK: %t1 = and i32 %X, 32
@@ -204,9 +323,9 @@ define i32 @test15b(i32 %X) {
;; Alternate code sequence for (X & 16) ? 16 : 0
define i32 @test15c(i32 %X) {
- %t1 = and i32 %X, 16
- %t2 = icmp eq i32 %t1, 16
- %t3 = select i1 %t2, i32 16, i32 0
+ %t1 = and i32 %X, 16
+ %t2 = icmp eq i32 %t1, 16
+ %t3 = select i1 %t2, i32 16, i32 0
ret i32 %t3
; CHECK-LABEL: @test15c(
; CHECK: %t1 = and i32 %X, 16
@@ -215,9 +334,9 @@ define i32 @test15c(i32 %X) {
;; Alternate code sequence for (X & 16) ? 16 : 0
define i32 @test15d(i32 %X) {
- %t1 = and i32 %X, 16
- %t2 = icmp ne i32 %t1, 0
- %t3 = select i1 %t2, i32 16, i32 0
+ %t1 = and i32 %X, 16
+ %t2 = icmp ne i32 %t1, 0
+ %t3 = select i1 %t2, i32 16, i32 0
ret i32 %t3
; CHECK-LABEL: @test15d(
; CHECK: %t1 = and i32 %X, 16
@@ -300,8 +419,8 @@ define i32 @test15j(i32 %X) {
}
define i32 @test16(i1 %C, i32* %P) {
- %P2 = select i1 %C, i32* %P, i32* null
- %V = load i32, i32* %P2
+ %P2 = select i1 %C, i32* %P, i32* null
+ %V = load i32, i32* %P2
ret i32 %V
; CHECK-LABEL: @test16(
; CHECK-NEXT: %V = load i32, i32* %P
@@ -329,8 +448,8 @@ define i32 @test16_neg2(i1 %C, i32 addrspace(1)* %P) {
}
define i1 @test17(i32* %X, i1 %C) {
- %R = select i1 %C, i32* %X, i32* null
- %RV = icmp eq i32* %R, null
+ %R = select i1 %C, i32* %X, i32* null
+ %RV = icmp eq i32* %R, null
ret i1 %RV
; CHECK-LABEL: @test17(
; CHECK: icmp eq i32* %X, null
@@ -340,8 +459,8 @@ define i1 @test17(i32* %X, i1 %C) {
}
define i32 @test18(i32 %X, i32 %Y, i1 %C) {
- %R = select i1 %C, i32 %X, i32 0
- %V = sdiv i32 %Y, %R
+ %R = select i1 %C, i32 %X, i32 0
+ %V = sdiv i32 %Y, %R
ret i32 %V
; CHECK-LABEL: @test18(
; CHECK: %V = sdiv i32 %Y, %X
@@ -349,64 +468,48 @@ define i32 @test18(i32 %X, i32 %Y, i1 %C) {
}
define i32 @test19(i32 %x) {
- %tmp = icmp ugt i32 %x, 2147483647
- %retval = select i1 %tmp, i32 -1, i32 0
+ %tmp = icmp ugt i32 %x, 2147483647
+ %retval = select i1 %tmp, i32 -1, i32 0
ret i32 %retval
; CHECK-LABEL: @test19(
; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: ret i32
+; CHECK-NEXT: ret i32
}
define i32 @test20(i32 %x) {
- %tmp = icmp slt i32 %x, 0
- %retval = select i1 %tmp, i32 -1, i32 0
+ %tmp = icmp slt i32 %x, 0
+ %retval = select i1 %tmp, i32 -1, i32 0
ret i32 %retval
; CHECK-LABEL: @test20(
; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: ret i32
+; CHECK-NEXT: ret i32
}
define i64 @test21(i32 %x) {
- %tmp = icmp slt i32 %x, 0
- %retval = select i1 %tmp, i64 -1, i64 0
+ %tmp = icmp slt i32 %x, 0
+ %retval = select i1 %tmp, i64 -1, i64 0
ret i64 %retval
; CHECK-LABEL: @test21(
; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: sext i32
+; CHECK-NEXT: sext i32
; CHECK-NEXT: ret i64
}
define i16 @test22(i32 %x) {
- %tmp = icmp slt i32 %x, 0
- %retval = select i1 %tmp, i16 -1, i16 0
+ %tmp = icmp slt i32 %x, 0
+ %retval = select i1 %tmp, i16 -1, i16 0
ret i16 %retval
; CHECK-LABEL: @test22(
; CHECK-NEXT: ashr i32 %x, 31
-; CHECK-NEXT: trunc i32
+; CHECK-NEXT: trunc i32
; CHECK-NEXT: ret i16
}
-define i1 @test23(i1 %a, i1 %b) {
- %c = select i1 %a, i1 %b, i1 %a
- ret i1 %c
-; CHECK-LABEL: @test23(
-; CHECK-NEXT: %c = and i1 %a, %b
-; CHECK-NEXT: ret i1 %c
-}
-
-define i1 @test24(i1 %a, i1 %b) {
- %c = select i1 %a, i1 %a, i1 %b
- ret i1 %c
-; CHECK-LABEL: @test24(
-; CHECK-NEXT: %c = or i1 %a, %b
-; CHECK-NEXT: ret i1 %c
-}
-
define i32 @test25(i1 %c) {
entry:
br i1 %c, label %jump, label %ret
jump:
- br label %ret
+ br label %ret
ret:
%a = phi i1 [true, %jump], [false, %entry]
%b = select i1 %a, i32 10, i32 20
@@ -421,7 +524,7 @@ entry:
br i1 %cond, label %jump, label %ret
jump:
%c = or i1 false, false
- br label %ret
+ br label %ret
ret:
%a = phi i1 [true, %entry], [%c, %jump]
%b = select i1 %a, i32 20, i32 10
@@ -435,7 +538,7 @@ define i32 @test27(i1 %c, i32 %A, i32 %B) {
entry:
br i1 %c, label %jump, label %ret
jump:
- br label %ret
+ br label %ret
ret:
%a = phi i1 [true, %jump], [false, %entry]
%b = select i1 %a, i32 %A, i32 %B
@@ -449,7 +552,7 @@ define i32 @test28(i1 %cond, i32 %A, i32 %B) {
entry:
br i1 %cond, label %jump, label %ret
jump:
- br label %ret
+ br label %ret
ret:
%c = phi i32 [%A, %jump], [%B, %entry]
%a = phi i1 [true, %jump], [false, %entry]
@@ -464,12 +567,12 @@ define i32 @test29(i1 %cond, i32 %A, i32 %B) {
entry:
br i1 %cond, label %jump, label %ret
jump:
- br label %ret
+ br label %ret
ret:
%c = phi i32 [%A, %jump], [%B, %entry]
%a = phi i1 [true, %jump], [false, %entry]
br label %next
-
+
next:
%b = select i1 %a, i32 %A, i32 %c
ret i32 %b
@@ -483,7 +586,7 @@ next:
define i32 @test30(i32 %x, i32 %y) {
%cmp = icmp sgt i32 %x, %y
%cond = select i1 %cmp, i32 %x, i32 %y
-
+
%cmp5 = icmp sgt i32 %cond, %x
%retval = select i1 %cmp5, i32 %cond, i32 %x
ret i32 %retval
@@ -493,7 +596,7 @@ define i32 @test30(i32 %x, i32 %y) {
; UMAX(UMAX(x, y), x) -> UMAX(x, y)
define i32 @test31(i32 %x, i32 %y) {
- %cmp = icmp ugt i32 %x, %y
+ %cmp = icmp ugt i32 %x, %y
%cond = select i1 %cmp, i32 %x, i32 %y
%cmp5 = icmp ugt i32 %cond, %x
%retval = select i1 %cmp5, i32 %cond, i32 %x
@@ -723,6 +826,53 @@ define i48 @test51(<3 x i1> %icmp, <3 x i16> %tmp) {
ret i48 %tmp2
}
+; Allow select promotion even if there are multiple uses of bitcasted ops.
+; Hoisting the selects allows later pattern matching to see that these are min/max ops.
+
+define void @min_max_bitcast(<4 x float> %a, <4 x float> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @min_max_bitcast(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> %a, %b
+; CHECK-NEXT: [[SEL1_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %a, <4 x float> %b
+; CHECK-NEXT: [[SEL2_V:%.*]] = select <4 x i1> [[CMP]], <4 x float> %b, <4 x float> %a
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <4 x i32>* %ptr1 to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[SEL1_V]], <4 x float>* [[TMP1]], align 16
+; CHECK-NEXT: [[TMP2:%.*]] = bitcast <4 x i32>* %ptr2 to <4 x float>*
+; CHECK-NEXT: store <4 x float> [[SEL2_V]], <4 x float>* [[TMP2]], align 16
+; CHECK-NEXT: ret void
+;
+ %cmp = fcmp olt <4 x float> %a, %b
+ %bc1 = bitcast <4 x float> %a to <4 x i32>
+ %bc2 = bitcast <4 x float> %b to <4 x i32>
+ %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+ %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+ store <4 x i32> %sel1, <4 x i32>* %ptr1
+ store <4 x i32> %sel2, <4 x i32>* %ptr2
+ ret void
+}
+
+; To avoid potential backend problems, we don't do the same transform for other casts.
+
+define void @truncs_before_selects(<4 x float> %f1, <4 x float> %f2, <4 x i64> %a, <4 x i64> %b, <4 x i32>* %ptr1, <4 x i32>* %ptr2) {
+; CHECK-LABEL: @truncs_before_selects(
+; CHECK-NEXT: [[CMP:%.*]] = fcmp olt <4 x float> %f1, %f2
+; CHECK-NEXT: [[BC1:%.*]] = trunc <4 x i64> %a to <4 x i32>
+; CHECK-NEXT: [[BC2:%.*]] = trunc <4 x i64> %b to <4 x i32>
+; CHECK-NEXT: [[SEL1:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC1]], <4 x i32> [[BC2]]
+; CHECK-NEXT: [[SEL2:%.*]] = select <4 x i1> [[CMP]], <4 x i32> [[BC2]], <4 x i32> [[BC1]]
+; CHECK-NEXT: store <4 x i32> [[SEL1]], <4 x i32>* %ptr1, align 16
+; CHECK-NEXT: store <4 x i32> [[SEL2]], <4 x i32>* %ptr2, align 16
+; CHECK-NEXT: ret void
+;
+ %cmp = fcmp olt <4 x float> %f1, %f2
+ %bc1 = trunc <4 x i64> %a to <4 x i32>
+ %bc2 = trunc <4 x i64> %b to <4 x i32>
+ %sel1 = select <4 x i1> %cmp, <4 x i32> %bc1, <4 x i32> %bc2
+ %sel2 = select <4 x i1> %cmp, <4 x i32> %bc2, <4 x i32> %bc1
+ store <4 x i32> %sel1, <4 x i32>* %ptr1, align 16
+ store <4 x i32> %sel2, <4 x i32>* %ptr2, align 16
+ ret void
+}
+
; PR8575
define i32 @test52(i32 %n, i32 %m) nounwind {
@@ -755,7 +905,7 @@ define i32 @test54(i32 %X, i32 %Y) {
; CHECK-NOT: ashr
; CHECK-NOT: select
; CHECK: icmp ne i32 %X, 0
-; CHECK: zext
+; CHECK: zext
; CHECK: ret
}
@@ -831,26 +981,6 @@ define i32 @test61(i32* %ptr) {
; CHECK: ret i32 10
}
-define i1 @test62(i1 %A, i1 %B) {
- %not = xor i1 %A, true
- %C = select i1 %A, i1 %not, i1 %B
- ret i1 %C
-; CHECK-LABEL: @test62(
-; CHECK: %not = xor i1 %A, true
-; CHECK: %C = and i1 %not, %B
-; CHECK: ret i1 %C
-}
-
-define i1 @test63(i1 %A, i1 %B) {
- %not = xor i1 %A, true
- %C = select i1 %A, i1 %B, i1 %not
- ret i1 %C
-; CHECK-LABEL: @test63(
-; CHECK: %not = xor i1 %A, true
-; CHECK: %C = or i1 %B, %not
-; CHECK: ret i1 %C
-}
-
; PR14131
define void @test64(i32 %p, i16 %b) noreturn nounwind {
entry:
@@ -1296,6 +1426,20 @@ entry:
ret i32 %v
}
+define i32 @test78_deref(i1 %flag, i32* dereferenceable(4) %x, i32* dereferenceable(4) %y, i32* %z) {
+; Test that we can speculate the loads around the select even when we can't
+; fold the load completely away.
+; CHECK-LABEL: @test78_deref(
+; CHECK: %[[V1:.*]] = load i32, i32* %x
+; CHECK-NEXT: %[[V2:.*]] = load i32, i32* %y
+; CHECK-NEXT: %[[S:.*]] = select i1 %flag, i32 %[[V1]], i32 %[[V2]]
+; CHECK-NEXT: ret i32 %[[S]]
+entry:
+ %p = select i1 %flag, i32* %x, i32* %y
+ %v = load i32, i32* %p
+ ret i32 %v
+}
+
define i32 @test78_neg(i1 %flag, i32* %x, i32* %y, i32* %z) {
; The same as @test78 but we can't speculate the load because it can trap
; if under-aligned.
@@ -1313,6 +1457,19 @@ entry:
ret i32 %v
}
+define i32 @test78_deref_neg(i1 %flag, i32* dereferenceable(2) %x, i32* dereferenceable(4) %y, i32* %z) {
+; The same as @test78_deref but we can't speculate the load because
+; one of the arguments is not sufficiently dereferenceable.
+; CHECK-LABEL: @test78_deref_neg(
+; CHECK: %p = select i1 %flag, i32* %x, i32* %y
+; CHECK-NEXT: %v = load i32, i32* %p
+; CHECK-NEXT: ret i32 %v
+entry:
+ %p = select i1 %flag, i32* %x, i32* %y
+ %v = load i32, i32* %p
+ ret i32 %v
+}
+
define float @test79(i1 %flag, float* %x, i32* %y, i32* %z) {
; Test that we can speculate the loads around the select even when we can't
; fold the load completely away.
@@ -1562,3 +1719,21 @@ define i32 @PR23757(i32 %x) {
%sel = select i1 %cmp, i32 -2147483648, i32 %add
ret i32 %sel
}
+
+
+define i32 @PR27137(i32 %a) {
+; CHECK-LABEL: @PR27137(
+; CHECK-NEXT: %not_a = xor i32 %a, -1
+; CHECK-NEXT: %c0 = icmp slt i32 %a, 0
+; CHECK-NEXT: %s0 = select i1 %c0, i32 %not_a, i32 -1
+; CHECK-NEXT: %c1 = icmp sgt i32 %s0, -1
+; CHECK-NEXT: %s1 = select i1 %c1, i32 %s0, i32 -1
+; CHECK-NEXT: ret i32 %s1
+
+ %not_a = xor i32 %a, -1
+ %c0 = icmp slt i32 %a, 0
+ %s0 = select i1 %c0, i32 %not_a, i32 -1
+ %c1 = icmp sgt i32 %s0, -1
+ %s1 = select i1 %c1, i32 %s0, i32 -1
+ ret i32 %s1
+}
diff --git a/test/Transforms/InstCombine/set.ll b/test/Transforms/InstCombine/set.ll
index daa9148f6ad9..494a60379011 100644
--- a/test/Transforms/InstCombine/set.ll
+++ b/test/Transforms/InstCombine/set.ll
@@ -1,171 +1,273 @@
-; This test makes sure that these instructions are properly eliminated.
-;
-; RUN: opt < %s -instcombine -S | not grep icmp
-; END.
-
-@X = external global i32 ; <i32*> [#uses=2]
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; This test makes sure that all icmp instructions are eliminated.
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@X = external global i32
define i1 @test1(i32 %A) {
- %B = icmp eq i32 %A, %A ; <i1> [#uses=1]
- ; Never true
- %C = icmp eq i32* @X, null ; <i1> [#uses=1]
- %D = and i1 %B, %C ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: ret i1 false
+;
+ %B = icmp eq i32 %A, %A
+ ; Never true
+ %C = icmp eq i32* @X, null
+ %D = and i1 %B, %C
+ ret i1 %D
}
define i1 @test2(i32 %A) {
- %B = icmp ne i32 %A, %A ; <i1> [#uses=1]
- ; Never false
- %C = icmp ne i32* @X, null ; <i1> [#uses=1]
- %D = or i1 %B, %C ; <i1> [#uses=1]
- ret i1 %D
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: ret i1 true
+;
+ %B = icmp ne i32 %A, %A
+ ; Never false
+ %C = icmp ne i32* @X, null
+ %D = or i1 %B, %C
+ ret i1 %D
}
define i1 @test3(i32 %A) {
- %B = icmp slt i32 %A, %A ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: ret i1 false
+;
+ %B = icmp slt i32 %A, %A
+ ret i1 %B
}
define i1 @test4(i32 %A) {
- %B = icmp sgt i32 %A, %A ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: ret i1 false
+;
+ %B = icmp sgt i32 %A, %A
+ ret i1 %B
}
define i1 @test5(i32 %A) {
- %B = icmp sle i32 %A, %A ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: ret i1 true
+;
+ %B = icmp sle i32 %A, %A
+ ret i1 %B
}
define i1 @test6(i32 %A) {
- %B = icmp sge i32 %A, %A ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test6(
+; CHECK-NEXT: ret i1 true
+;
+ %B = icmp sge i32 %A, %A
+ ret i1 %B
}
define i1 @test7(i32 %A) {
- ; true
- %B = icmp uge i32 %A, 0 ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test7(
+; CHECK-NEXT: ret i1 true
+;
+ %B = icmp uge i32 %A, 0
+ ret i1 %B
}
define i1 @test8(i32 %A) {
- ; false
- %B = icmp ult i32 %A, 0 ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test8(
+; CHECK-NEXT: ret i1 false
+;
+ %B = icmp ult i32 %A, 0
+ ret i1 %B
}
;; test operations on boolean values these should all be eliminated$a
define i1 @test9(i1 %A) {
- ; false
- %B = icmp ult i1 %A, false ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test9(
+; CHECK-NEXT: ret i1 false
+;
+ %B = icmp ult i1 %A, false
+ ret i1 %B
}
define i1 @test10(i1 %A) {
- ; false
- %B = icmp ugt i1 %A, true ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test10(
+; CHECK-NEXT: ret i1 false
+;
+ %B = icmp ugt i1 %A, true
+ ret i1 %B
}
define i1 @test11(i1 %A) {
- ; true
- %B = icmp ule i1 %A, true ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test11(
+; CHECK-NEXT: ret i1 true
+;
+ %B = icmp ule i1 %A, true
+ ret i1 %B
}
define i1 @test12(i1 %A) {
- ; true
- %B = icmp uge i1 %A, false ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-LABEL: @test12(
+; CHECK-NEXT: ret i1 true
+;
+ %B = icmp uge i1 %A, false
+ ret i1 %B
}
define i1 @test13(i1 %A, i1 %B) {
- ; A | ~B
- %C = icmp uge i1 %A, %B ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test13(
+; CHECK-NEXT: [[CTMP:%.*]] = xor i1 %B, true
+; CHECK-NEXT: [[C:%.*]] = or i1 [[CTMP]], %A
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %C = icmp uge i1 %A, %B
+ ret i1 %C
+}
+
+define <2 x i1> @test13vec(<2 x i1> %A, <2 x i1> %B) {
+; CHECK-LABEL: @test13vec(
+; CHECK-NEXT: [[CTMP:%.*]] = xor <2 x i1> %B, <i1 true, i1 true>
+; CHECK-NEXT: [[C:%.*]] = or <2 x i1> [[CTMP]], %A
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %C = icmp uge <2 x i1> %A, %B
+ ret <2 x i1> %C
}
define i1 @test14(i1 %A, i1 %B) {
- ; ~(A ^ B)
- %C = icmp eq i1 %A, %B ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test14(
+; CHECK-NEXT: [[CTMP:%.*]] = xor i1 %A, %B
+; CHECK-NEXT: [[C:%.*]] = xor i1 [[CTMP]], true
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %C = icmp eq i1 %A, %B
+ ret i1 %C
+}
+
+define <3 x i1> @test14vec(<3 x i1> %A, <3 x i1> %B) {
+; CHECK-LABEL: @test14vec(
+; CHECK-NEXT: [[CTMP:%.*]] = xor <3 x i1> %A, %B
+; CHECK-NEXT: [[C:%.*]] = xor <3 x i1> [[CTMP]], <i1 true, i1 true, i1 true>
+; CHECK-NEXT: ret <3 x i1> [[C]]
+;
+ %C = icmp eq <3 x i1> %A, %B
+ ret <3 x i1> %C
}
define i1 @test16(i32 %A) {
- %B = and i32 %A, 5 ; <i32> [#uses=1]
- ; Is never true
- %C = icmp eq i32 %B, 8 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test16(
+; CHECK-NEXT: ret i1 false
+;
+ %B = and i32 %A, 5
+ ; Is never true
+ %C = icmp eq i32 %B, 8
+ ret i1 %C
}
define i1 @test17(i8 %A) {
- %B = or i8 %A, 1 ; <i8> [#uses=1]
- ; Always false
- %C = icmp eq i8 %B, 2 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test17(
+; CHECK-NEXT: ret i1 false
+;
+ %B = or i8 %A, 1
+ ; Always false
+ %C = icmp eq i8 %B, 2
+ ret i1 %C
}
define i1 @test18(i1 %C, i32 %a) {
+; CHECK-LABEL: @test18(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br i1 %C, label %endif, label %else
+; CHECK: else:
+; CHECK-NEXT: br label %endif
+; CHECK: endif:
+; CHECK-NEXT: ret i1 true
+;
entry:
- br i1 %C, label %endif, label %else
+ br i1 %C, label %endif, label %else
-else: ; preds = %entry
- br label %endif
+else:
+ br label %endif
-endif: ; preds = %else, %entry
- %b.0 = phi i32 [ 0, %entry ], [ 1, %else ] ; <i32> [#uses=1]
- %tmp.4 = icmp slt i32 %b.0, 123 ; <i1> [#uses=1]
- ret i1 %tmp.4
+endif:
+ %b.0 = phi i32 [ 0, %entry ], [ 1, %else ]
+ %tmp.4 = icmp slt i32 %b.0, 123
+ ret i1 %tmp.4
}
define i1 @test19(i1 %A, i1 %B) {
- %a = zext i1 %A to i32 ; <i32> [#uses=1]
- %b = zext i1 %B to i32 ; <i32> [#uses=1]
- %C = icmp eq i32 %a, %b ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test19(
+; CHECK-NEXT: [[CTMP:%.*]] = xor i1 %A, %B
+; CHECK-NEXT: [[C:%.*]] = xor i1 [[CTMP]], true
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %a = zext i1 %A to i32
+ %b = zext i1 %B to i32
+ %C = icmp eq i32 %a, %b
+ ret i1 %C
}
define i32 @test20(i32 %A) {
- %B = and i32 %A, 1 ; <i32> [#uses=1]
- %C = icmp ne i32 %B, 0 ; <i1> [#uses=1]
- %D = zext i1 %C to i32 ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-LABEL: @test20(
+; CHECK-NEXT: [[B:%.*]] = and i32 %A, 1
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %B = and i32 %A, 1
+ %C = icmp ne i32 %B, 0
+ %D = zext i1 %C to i32
+ ret i32 %D
}
define i32 @test21(i32 %a) {
- %tmp.6 = and i32 %a, 4 ; <i32> [#uses=1]
- %not.tmp.7 = icmp ne i32 %tmp.6, 0 ; <i1> [#uses=1]
- %retval = zext i1 %not.tmp.7 to i32 ; <i32> [#uses=1]
- ret i32 %retval
+; CHECK-LABEL: @test21(
+; CHECK-NEXT: [[TMP_6:%.*]] = lshr i32 %a, 2
+; CHECK-NEXT: [[TMP_6_LOBIT:%.*]] = and i32 [[TMP_6]], 1
+; CHECK-NEXT: ret i32 [[TMP_6_LOBIT]]
+;
+ %tmp.6 = and i32 %a, 4
+ %not.tmp.7 = icmp ne i32 %tmp.6, 0
+ %retval = zext i1 %not.tmp.7 to i32
+ ret i32 %retval
}
define i1 @test22(i32 %A, i32 %X) {
- %B = and i32 %A, 100663295 ; <i32> [#uses=1]
- %C = icmp ult i32 %B, 268435456 ; <i1> [#uses=1]
- %Y = and i32 %X, 7 ; <i32> [#uses=1]
- %Z = icmp sgt i32 %Y, -1 ; <i1> [#uses=1]
- %R = or i1 %C, %Z ; <i1> [#uses=1]
- ret i1 %R
+; CHECK-LABEL: @test22(
+; CHECK-NEXT: ret i1 true
+;
+ %B = and i32 %A, 100663295
+ %C = icmp ult i32 %B, 268435456
+ %Y = and i32 %X, 7
+ %Z = icmp sgt i32 %Y, -1
+ %R = or i1 %C, %Z
+ ret i1 %R
}
define i32 @test23(i32 %a) {
- %tmp.1 = and i32 %a, 1 ; <i32> [#uses=1]
- %tmp.2 = icmp eq i32 %tmp.1, 0 ; <i1> [#uses=1]
- %tmp.3 = zext i1 %tmp.2 to i32 ; <i32> [#uses=1]
- ret i32 %tmp.3
+; CHECK-LABEL: @test23(
+; CHECK-NEXT: [[TMP_1:%.*]] = and i32 %a, 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[TMP_1]], 1
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %tmp.1 = and i32 %a, 1
+ %tmp.2 = icmp eq i32 %tmp.1, 0
+ %tmp.3 = zext i1 %tmp.2 to i32
+ ret i32 %tmp.3
}
define i32 @test24(i32 %a) {
- %tmp1 = and i32 %a, 4 ; <i32> [#uses=1]
- %tmp.1 = lshr i32 %tmp1, 2 ; <i32> [#uses=1]
- %tmp.2 = icmp eq i32 %tmp.1, 0 ; <i1> [#uses=1]
- %tmp.3 = zext i1 %tmp.2 to i32 ; <i32> [#uses=1]
- ret i32 %tmp.3
+; CHECK-LABEL: @test24(
+; CHECK-NEXT: [[TMP_1:%.*]] = lshr i32 %a, 2
+; CHECK-NEXT: [[TMP_1_LOBIT:%.*]] = and i32 [[TMP_1]], 1
+; CHECK-NEXT: [[TMP1:%.*]] = xor i32 [[TMP_1_LOBIT]], 1
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %tmp1 = and i32 %a, 4
+ %tmp.1 = lshr i32 %tmp1, 2
+ %tmp.2 = icmp eq i32 %tmp.1, 0
+ %tmp.3 = zext i1 %tmp.2 to i32
+ ret i32 %tmp.3
}
define i1 @test25(i32 %A) {
- %B = and i32 %A, 2 ; <i32> [#uses=1]
- %C = icmp ugt i32 %B, 2 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-LABEL: @test25(
+; CHECK-NEXT: ret i1 false
+;
+ %B = and i32 %A, 2
+ %C = icmp ugt i32 %B, 2
+ ret i1 %C
}
diff --git a/test/Transforms/InstCombine/shift-shift.ll b/test/Transforms/InstCombine/shift-shift.ll
new file mode 100644
index 000000000000..6aa262fd931d
--- /dev/null
+++ b/test/Transforms/InstCombine/shift-shift.ll
@@ -0,0 +1,75 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+; These would crash if we didn't check for a negative shift.
+
+; https://llvm.org/bugs/show_bug.cgi?id=12967
+
+define void @pr12967() {
+; CHECK-LABEL: @pr12967(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: br label %loop
+;
+entry:
+ br label %loop
+
+loop:
+ %c = phi i32 [ %shl, %loop ], [ undef, %entry ]
+ %shr = shl i32 %c, 7
+ %shl = lshr i32 %shr, -2
+ br label %loop
+}
+
+; https://llvm.org/bugs/show_bug.cgi?id=26760
+
+define void @pr26760() {
+; CHECK-LABEL: @pr26760(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: br label %loop
+; CHECK: loop:
+; CHECK-NEXT: br label %loop
+;
+entry:
+ br label %loop
+
+loop:
+ %c = phi i32 [ %shl, %loop ], [ undef, %entry ]
+ %shr = lshr i32 %c, 7
+ %shl = shl i32 %shr, -2
+ br label %loop
+}
+
+; Converting the 2 shifts to SHL 6 without the AND is wrong.
+; https://llvm.org/bugs/show_bug.cgi?id=8547
+
+define i32 @pr8547(i32* %g) {
+; CHECK-LABEL: @pr8547(
+; CHECK-NEXT: codeRepl:
+; CHECK-NEXT: br label %for.cond
+; CHECK: for.cond:
+; CHECK-NEXT: [[STOREMERGE:%.*]] = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ]
+; CHECK-NEXT: store i32 [[STOREMERGE]], i32* %g, align 4
+; CHECK-NEXT: [[TMP0:%.*]] = shl nuw nsw i32 [[STOREMERGE]], 6
+; CHECK-NEXT: [[CONV2:%.*]] = and i32 [[TMP0]], 64
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[CONV2]], 0
+; CHECK-NEXT: br i1 [[TOBOOL]], label %for.cond, label %codeRepl2
+; CHECK: codeRepl2:
+; CHECK-NEXT: ret i32 [[CONV2]]
+;
+codeRepl:
+ br label %for.cond
+
+for.cond:
+ %storemerge = phi i32 [ 0, %codeRepl ], [ 5, %for.cond ]
+ store i32 %storemerge, i32* %g, align 4
+ %shl = shl i32 %storemerge, 30
+ %conv2 = lshr i32 %shl, 24
+ %tobool = icmp eq i32 %conv2, 0
+ br i1 %tobool, label %for.cond, label %codeRepl2
+
+codeRepl2:
+ ret i32 %conv2
+}
+
diff --git a/test/Transforms/InstCombine/shift.ll b/test/Transforms/InstCombine/shift.ll
index 0b5b5deb68c5..755c4a7f9f1f 100644
--- a/test/Transforms/InstCombine/shift.ll
+++ b/test/Transforms/InstCombine/shift.ll
@@ -1,144 +1,165 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; This test makes sure that these instructions are properly eliminated.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
define i32 @test1(i32 %A) {
; CHECK-LABEL: @test1(
-; CHECK: ret i32 %A
- %B = shl i32 %A, 0 ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-NEXT: ret i32 %A
+;
+ %B = shl i32 %A, 0 ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test2(i8 %A) {
; CHECK-LABEL: @test2(
-; CHECK: ret i32 0
- %shift.upgrd.1 = zext i8 %A to i32 ; <i32> [#uses=1]
- %B = shl i32 0, %shift.upgrd.1 ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-NEXT: ret i32 0
+;
+ %shift.upgrd.1 = zext i8 %A to i32 ; <i32> [#uses=1]
+ %B = shl i32 0, %shift.upgrd.1 ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test3(i32 %A) {
; CHECK-LABEL: @test3(
-; CHECK: ret i32 %A
- %B = ashr i32 %A, 0 ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-NEXT: ret i32 %A
+;
+ %B = ashr i32 %A, 0 ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test4(i8 %A) {
; CHECK-LABEL: @test4(
-; CHECK: ret i32 0
- %shift.upgrd.2 = zext i8 %A to i32 ; <i32> [#uses=1]
- %B = ashr i32 0, %shift.upgrd.2 ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-NEXT: ret i32 0
+;
+ %shift.upgrd.2 = zext i8 %A to i32 ; <i32> [#uses=1]
+ %B = ashr i32 0, %shift.upgrd.2 ; <i32> [#uses=1]
+ ret i32 %B
}
define i32 @test5(i32 %A) {
; CHECK-LABEL: @test5(
-; CHECK: ret i32 undef
- %B = lshr i32 %A, 32 ;; shift all bits out
- ret i32 %B
+; CHECK-NEXT: ret i32 undef
+;
+ %B = lshr i32 %A, 32 ;; shift all bits out
+ ret i32 %B
}
define <4 x i32> @test5_splat_vector(<4 x i32> %A) {
; CHECK-LABEL: @test5_splat_vector(
-; CHECK: ret <4 x i32> undef
+; CHECK-NEXT: ret <4 x i32> undef
+;
%B = lshr <4 x i32> %A, <i32 32, i32 32, i32 32, i32 32> ;; shift all bits out
ret <4 x i32> %B
}
define <4 x i32> @test5_zero_vector(<4 x i32> %A) {
; CHECK-LABEL: @test5_zero_vector(
-; CHECK-NEXT: ret <4 x i32> %A
+; CHECK-NEXT: ret <4 x i32> %A
+;
%B = lshr <4 x i32> %A, zeroinitializer
ret <4 x i32> %B
}
define <4 x i32> @test5_non_splat_vector(<4 x i32> %A) {
; CHECK-LABEL: @test5_non_splat_vector(
-; CHECK-NOT: ret <4 x i32> undef
+; CHECK-NEXT: [[B:%.*]] = lshr <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i32> [[B]]
+;
%B = lshr <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
ret <4 x i32> %B
}
define i32 @test5a(i32 %A) {
; CHECK-LABEL: @test5a(
-; CHECK: ret i32 undef
- %B = shl i32 %A, 32 ;; shift all bits out
- ret i32 %B
+; CHECK-NEXT: ret i32 undef
+;
+ %B = shl i32 %A, 32 ;; shift all bits out
+ ret i32 %B
}
define <4 x i32> @test5a_splat_vector(<4 x i32> %A) {
; CHECK-LABEL: @test5a_splat_vector(
-; CHECK: ret <4 x i32> undef
+; CHECK-NEXT: ret <4 x i32> undef
+;
%B = shl <4 x i32> %A, <i32 32, i32 32, i32 32, i32 32> ;; shift all bits out
ret <4 x i32> %B
}
define <4 x i32> @test5a_non_splat_vector(<4 x i32> %A) {
; CHECK-LABEL: @test5a_non_splat_vector(
-; CHECK-NOT: ret <4 x i32> undef
+; CHECK-NEXT: [[B:%.*]] = shl <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
+; CHECK-NEXT: ret <4 x i32> [[B]]
+;
%B = shl <4 x i32> %A, <i32 32, i32 1, i32 2, i32 3>
ret <4 x i32> %B
}
define i32 @test5b() {
; CHECK-LABEL: @test5b(
-; CHECK: ret i32 0
- %B = ashr i32 undef, 2 ;; top two bits must be equal, so not undef
- ret i32 %B
+; CHECK-NEXT: ret i32 0
+;
+ %B = ashr i32 undef, 2 ;; top two bits must be equal, so not undef
+ ret i32 %B
}
define i32 @test5b2(i32 %A) {
; CHECK-LABEL: @test5b2(
-; CHECK: ret i32 0
- %B = ashr i32 undef, %A ;; top %A bits must be equal, so not undef
- ret i32 %B
+; CHECK-NEXT: ret i32 0
+;
+ %B = ashr i32 undef, %A ;; top %A bits must be equal, so not undef
+ ret i32 %B
}
define i32 @test6(i32 %A) {
; CHECK-LABEL: @test6(
-; CHECK-NEXT: mul i32 %A, 6
-; CHECK-NEXT: ret i32
- %B = shl i32 %A, 1 ;; convert to an mul instruction
- %C = mul i32 %B, 3
- ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = mul i32 %A, 6
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = shl i32 %A, 1 ;; convert to an mul instruction
+ %C = mul i32 %B, 3
+ ret i32 %C
}
define i32 @test6a(i32 %A) {
; CHECK-LABEL: @test6a(
-; CHECK-NEXT: mul i32 %A, 6
-; CHECK-NEXT: ret i32
- %B = mul i32 %A, 3
- %C = shl i32 %B, 1 ;; convert to an mul instruction
- ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = mul i32 %A, 6
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = mul i32 %A, 3
+ %C = shl i32 %B, 1 ;; convert to an mul instruction
+ ret i32 %C
}
define i32 @test7(i8 %A) {
; CHECK-LABEL: @test7(
-; CHECK-NEXT: ret i32 -1
- %shift.upgrd.3 = zext i8 %A to i32
- %B = ashr i32 -1, %shift.upgrd.3 ;; Always equal to -1
- ret i32 %B
+; CHECK-NEXT: ret i32 -1
+;
+ %shift.upgrd.3 = zext i8 %A to i32
+ %B = ashr i32 -1, %shift.upgrd.3 ;; Always equal to -1
+ ret i32 %B
}
;; (A << 5) << 3 === A << 8 == 0
define i8 @test8(i8 %A) {
; CHECK-LABEL: @test8(
-; CHECK: ret i8 0
- %B = shl i8 %A, 5 ; <i8> [#uses=1]
- %C = shl i8 %B, 3 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: ret i8 0
+;
+ %B = shl i8 %A, 5 ; <i8> [#uses=1]
+ %C = shl i8 %B, 3 ; <i8> [#uses=1]
+ ret i8 %C
}
;; (A << 7) >> 7 === A & 1
define i8 @test9(i8 %A) {
; CHECK-LABEL: @test9(
-; CHECK-NEXT: and i8 %A, 1
-; CHECK-NEXT: ret i8
- %B = shl i8 %A, 7 ; <i8> [#uses=1]
- %C = lshr i8 %B, 7 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: [[B:%.*]] = and i8 %A, 1
+; CHECK-NEXT: ret i8 [[B]]
+;
+ %B = shl i8 %A, 7 ; <i8> [#uses=1]
+ %C = lshr i8 %B, 7 ; <i8> [#uses=1]
+ ret i8 %C
}
;; This transformation is deferred to DAGCombine:
@@ -146,20 +167,22 @@ define i8 @test9(i8 %A) {
;; The shl may be valuable to scalar evolution.
define i8 @test10(i8 %A) {
; CHECK-LABEL: @test10(
-; CHECK-NEXT: and i8 %A, -128
-; CHECK-NEXT: ret i8
- %B = lshr i8 %A, 7 ; <i8> [#uses=1]
- %C = shl i8 %B, 7 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: [[B:%.*]] = and i8 %A, -128
+; CHECK-NEXT: ret i8 [[B]]
+;
+ %B = lshr i8 %A, 7 ; <i8> [#uses=1]
+ %C = shl i8 %B, 7 ; <i8> [#uses=1]
+ ret i8 %C
}
;; Allow the simplification when the lshr shift is exact.
define i8 @test10a(i8 %A) {
; CHECK-LABEL: @test10a(
-; CHECK-NEXT: ret i8 %A
- %B = lshr exact i8 %A, 7
- %C = shl i8 %B, 7
- ret i8 %C
+; CHECK-NEXT: ret i8 %A
+;
+ %B = lshr exact i8 %A, 7
+ %C = shl i8 %B, 7
+ ret i8 %C
}
;; This transformation is deferred to DAGCombine:
@@ -167,34 +190,39 @@ define i8 @test10a(i8 %A) {
;; The shl may be valuable to scalar evolution.
define i8 @test11(i8 %A) {
; CHECK-LABEL: @test11(
-; CHECK: shl i8
-; CHECK-NEXT: ret i8
- %a = mul i8 %A, 3 ; <i8> [#uses=1]
- %B = lshr i8 %a, 3 ; <i8> [#uses=1]
- %C = shl i8 %B, 4 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: [[A:%.*]] = mul i8 %A, 3
+; CHECK-NEXT: [[B:%.*]] = lshr i8 [[A]], 3
+; CHECK-NEXT: [[C:%.*]] = shl i8 [[B]], 4
+; CHECK-NEXT: ret i8 [[C]]
+;
+ %a = mul i8 %A, 3 ; <i8> [#uses=1]
+ %B = lshr i8 %a, 3 ; <i8> [#uses=1]
+ %C = shl i8 %B, 4 ; <i8> [#uses=1]
+ ret i8 %C
}
;; Allow the simplification in InstCombine when the lshr shift is exact.
define i8 @test11a(i8 %A) {
; CHECK-LABEL: @test11a(
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: ret i8
- %a = mul i8 %A, 3
- %B = lshr exact i8 %a, 3
- %C = shl i8 %B, 4
- ret i8 %C
+; CHECK-NEXT: [[C:%.*]] = mul i8 %A, 6
+; CHECK-NEXT: ret i8 [[C]]
+;
+ %a = mul i8 %A, 3
+ %B = lshr exact i8 %a, 3
+ %C = shl i8 %B, 4
+ ret i8 %C
}
;; This is deferred to DAGCombine unless %B is single-use.
;; (A >> 8) << 8 === A & -256
define i32 @test12(i32 %A) {
; CHECK-LABEL: @test12(
-; CHECK-NEXT: and i32 %A, -256
-; CHECK-NEXT: ret i32
- %B = ashr i32 %A, 8 ; <i32> [#uses=1]
- %C = shl i32 %B, 8 ; <i32> [#uses=1]
- ret i32 %C
+; CHECK-NEXT: [[B1:%.*]] = and i32 %A, -256
+; CHECK-NEXT: ret i32 [[B1]]
+;
+ %B = ashr i32 %A, 8 ; <i32> [#uses=1]
+ %C = shl i32 %B, 8 ; <i32> [#uses=1]
+ ret i32 %C
}
;; This transformation is deferred to DAGCombine:
@@ -202,190 +230,208 @@ define i32 @test12(i32 %A) {
;; The shl may be valuable to scalar evolution.
define i8 @test13(i8 %A) {
; CHECK-LABEL: @test13(
-; CHECK: shl i8
-; CHECK-NEXT: ret i8
- %a = mul i8 %A, 3 ; <i8> [#uses=1]
- %B = ashr i8 %a, 3 ; <i8> [#uses=1]
- %C = shl i8 %B, 4 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: [[A:%.*]] = mul i8 %A, 3
+; CHECK-NEXT: [[B1:%.*]] = lshr i8 [[A]], 3
+; CHECK-NEXT: [[C:%.*]] = shl i8 [[B1]], 4
+; CHECK-NEXT: ret i8 [[C]]
+;
+ %a = mul i8 %A, 3 ; <i8> [#uses=1]
+ %B = ashr i8 %a, 3 ; <i8> [#uses=1]
+ %C = shl i8 %B, 4 ; <i8> [#uses=1]
+ ret i8 %C
}
define i8 @test13a(i8 %A) {
; CHECK-LABEL: @test13a(
-; CHECK-NEXT: mul i8 %A, 6
-; CHECK-NEXT: ret i8
- %a = mul i8 %A, 3
- %B = ashr exact i8 %a, 3
- %C = shl i8 %B, 4
- ret i8 %C
+; CHECK-NEXT: [[C:%.*]] = mul i8 %A, 6
+; CHECK-NEXT: ret i8 [[C]]
+;
+ %a = mul i8 %A, 3
+ %B = ashr exact i8 %a, 3
+ %C = shl i8 %B, 4
+ ret i8 %C
}
;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
define i32 @test14(i32 %A) {
; CHECK-LABEL: @test14(
-; CHECK-NEXT: %B = and i32 %A, -19760
-; CHECK-NEXT: or i32 %B, 19744
-; CHECK-NEXT: ret i32
- %B = lshr i32 %A, 4 ; <i32> [#uses=1]
- %C = or i32 %B, 1234 ; <i32> [#uses=1]
- %D = shl i32 %C, 4 ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-NEXT: [[B:%.*]] = and i32 %A, -19760
+; CHECK-NEXT: [[C:%.*]] = or i32 [[B]], 19744
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = lshr i32 %A, 4 ; <i32> [#uses=1]
+ %C = or i32 %B, 1234 ; <i32> [#uses=1]
+ %D = shl i32 %C, 4 ; <i32> [#uses=1]
+ ret i32 %D
}
;; D = ((B | 1234) << 4) === ((B << 4)|(1234 << 4)
define i32 @test14a(i32 %A) {
; CHECK-LABEL: @test14a(
-; CHECK-NEXT: and i32 %A, 77
-; CHECK-NEXT: ret i32
- %B = shl i32 %A, 4 ; <i32> [#uses=1]
- %C = and i32 %B, 1234 ; <i32> [#uses=1]
- %D = lshr i32 %C, 4 ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-NEXT: [[C:%.*]] = and i32 %A, 77
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = shl i32 %A, 4 ; <i32> [#uses=1]
+ %C = and i32 %B, 1234 ; <i32> [#uses=1]
+ %D = lshr i32 %C, 4 ; <i32> [#uses=1]
+ ret i32 %D
}
define i32 @test15(i1 %C) {
; CHECK-LABEL: @test15(
-; CHECK-NEXT: select i1 %C, i32 12, i32 4
-; CHECK-NEXT: ret i32
- %A = select i1 %C, i32 3, i32 1 ; <i32> [#uses=1]
- %V = shl i32 %A, 2 ; <i32> [#uses=1]
- ret i32 %V
+; CHECK-NEXT: [[A:%.*]] = select i1 %C, i32 12, i32 4
+; CHECK-NEXT: ret i32 [[A]]
+;
+ %A = select i1 %C, i32 3, i32 1 ; <i32> [#uses=1]
+ %V = shl i32 %A, 2 ; <i32> [#uses=1]
+ ret i32 %V
}
define i32 @test15a(i1 %C) {
; CHECK-LABEL: @test15a(
-; CHECK-NEXT: select i1 %C, i32 512, i32 128
-; CHECK-NEXT: ret i32
- %A = select i1 %C, i8 3, i8 1 ; <i8> [#uses=1]
- %shift.upgrd.4 = zext i8 %A to i32 ; <i32> [#uses=1]
- %V = shl i32 64, %shift.upgrd.4 ; <i32> [#uses=1]
- ret i32 %V
+; CHECK-NEXT: [[V:%.*]] = select i1 %C, i32 512, i32 128
+; CHECK-NEXT: ret i32 [[V]]
+;
+ %A = select i1 %C, i8 3, i8 1 ; <i8> [#uses=1]
+ %shift.upgrd.4 = zext i8 %A to i32 ; <i32> [#uses=1]
+ %V = shl i32 64, %shift.upgrd.4 ; <i32> [#uses=1]
+ ret i32 %V
}
define i1 @test16(i32 %X) {
; CHECK-LABEL: @test16(
-; CHECK-NEXT: and i32 %X, 16
-; CHECK-NEXT: icmp ne i32
-; CHECK-NEXT: ret i1
- %tmp.3 = ashr i32 %X, 4
- %tmp.6 = and i32 %tmp.3, 1
- %tmp.7 = icmp ne i32 %tmp.6, 0
- ret i1 %tmp.7
+; CHECK-NEXT: [[TMP_6:%.*]] = and i32 %X, 16
+; CHECK-NEXT: [[TMP_7:%.*]] = icmp ne i32 [[TMP_6]], 0
+; CHECK-NEXT: ret i1 [[TMP_7]]
+;
+ %tmp.3 = ashr i32 %X, 4
+ %tmp.6 = and i32 %tmp.3, 1
+ %tmp.7 = icmp ne i32 %tmp.6, 0
+ ret i1 %tmp.7
}
define i1 @test17(i32 %A) {
; CHECK-LABEL: @test17(
-; CHECK-NEXT: and i32 %A, -8
-; CHECK-NEXT: icmp eq i32
-; CHECK-NEXT: ret i1
- %B = lshr i32 %A, 3 ; <i32> [#uses=1]
- %C = icmp eq i32 %B, 1234 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: [[B_MASK:%.*]] = and i32 %A, -8
+; CHECK-NEXT: [[C:%.*]] = icmp eq i32 [[B_MASK]], 9872
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = lshr i32 %A, 3 ; <i32> [#uses=1]
+ %C = icmp eq i32 %B, 1234 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test18(i8 %A) {
; CHECK-LABEL: @test18(
-; CHECK: ret i1 false
-
- %B = lshr i8 %A, 7 ; <i8> [#uses=1]
- ;; false
- %C = icmp eq i8 %B, 123 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: ret i1 false
+;
+ %B = lshr i8 %A, 7 ; <i8> [#uses=1]
+ ;; false
+ %C = icmp eq i8 %B, 123 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test19(i32 %A) {
; CHECK-LABEL: @test19(
-; CHECK-NEXT: icmp ult i32 %A, 4
-; CHECK-NEXT: ret i1
- %B = ashr i32 %A, 2 ; <i32> [#uses=1]
- ;; (X & -4) == 0
- %C = icmp eq i32 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: [[C:%.*]] = icmp ult i32 %A, 4
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = ashr i32 %A, 2 ; <i32> [#uses=1]
+ ;; (X & -4) == 0
+ %C = icmp eq i32 %B, 0 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test19a(i32 %A) {
; CHECK-LABEL: @test19a(
-; CHECK-NEXT: icmp ugt i32 %A, -5
-; CHECK-NEXT: ret i1
- %B = ashr i32 %A, 2 ; <i32> [#uses=1]
- ;; X >u ~4
- %C = icmp eq i32 %B, -1 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: [[C:%.*]] = icmp ugt i32 %A, -5
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = ashr i32 %A, 2 ; <i32> [#uses=1]
+ ;; X >u ~4
+ %C = icmp eq i32 %B, -1 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test20(i8 %A) {
; CHECK-LABEL: @test20(
-; CHECK: ret i1 false
- %B = ashr i8 %A, 7 ; <i8> [#uses=1]
- ;; false
- %C = icmp eq i8 %B, 123 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: ret i1 false
+;
+ %B = ashr i8 %A, 7 ; <i8> [#uses=1]
+ ;; false
+ %C = icmp eq i8 %B, 123 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test21(i8 %A) {
; CHECK-LABEL: @test21(
-; CHECK-NEXT: and i8 %A, 15
-; CHECK-NEXT: icmp eq i8
-; CHECK-NEXT: ret i1
- %B = shl i8 %A, 4 ; <i8> [#uses=1]
- %C = icmp eq i8 %B, -128 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: [[B_MASK:%.*]] = and i8 %A, 15
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[B_MASK]], 8
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = shl i8 %A, 4 ; <i8> [#uses=1]
+ %C = icmp eq i8 %B, -128 ; <i1> [#uses=1]
+ ret i1 %C
}
define i1 @test22(i8 %A) {
; CHECK-LABEL: @test22(
-; CHECK-NEXT: and i8 %A, 15
-; CHECK-NEXT: icmp eq i8
-; CHECK-NEXT: ret i1
- %B = shl i8 %A, 4 ; <i8> [#uses=1]
- %C = icmp eq i8 %B, 0 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: [[B_MASK:%.*]] = and i8 %A, 15
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 [[B_MASK]], 0
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = shl i8 %A, 4 ; <i8> [#uses=1]
+ %C = icmp eq i8 %B, 0 ; <i1> [#uses=1]
+ ret i1 %C
}
define i8 @test23(i32 %A) {
; CHECK-LABEL: @test23(
-; CHECK-NEXT: trunc i32 %A to i8
-; CHECK-NEXT: ret i8
-
- ;; casts not needed
- %B = shl i32 %A, 24 ; <i32> [#uses=1]
- %C = ashr i32 %B, 24 ; <i32> [#uses=1]
- %D = trunc i32 %C to i8 ; <i8> [#uses=1]
- ret i8 %D
+; CHECK-NEXT: [[D:%.*]] = trunc i32 %A to i8
+; CHECK-NEXT: ret i8 [[D]]
+;
+ ;; casts not needed
+ %B = shl i32 %A, 24 ; <i32> [#uses=1]
+ %C = ashr i32 %B, 24 ; <i32> [#uses=1]
+ %D = trunc i32 %C to i8 ; <i8> [#uses=1]
+ ret i8 %D
}
define i8 @test24(i8 %X) {
; CHECK-LABEL: @test24(
-; CHECK-NEXT: and i8 %X, 3
-; CHECK-NEXT: ret i8
- %Y = and i8 %X, -5 ; <i8> [#uses=1]
- %Z = shl i8 %Y, 5 ; <i8> [#uses=1]
- %Q = ashr i8 %Z, 5 ; <i8> [#uses=1]
- ret i8 %Q
+; CHECK-NEXT: [[Z:%.*]] = and i8 %X, 3
+; CHECK-NEXT: ret i8 [[Z]]
+;
+ %Y = and i8 %X, -5 ; <i8> [#uses=1]
+ %Z = shl i8 %Y, 5 ; <i8> [#uses=1]
+ %Q = ashr i8 %Z, 5 ; <i8> [#uses=1]
+ ret i8 %Q
}
define i32 @test25(i32 %tmp.2, i32 %AA) {
; CHECK-LABEL: @test25(
-; CHECK-NEXT: and i32 %tmp.2, -131072
-; CHECK-NEXT: add i32 %{{[^,]*}}, %AA
-; CHECK-NEXT: and i32 %{{[^,]*}}, -131072
-; CHECK-NEXT: ret i32
- %x = lshr i32 %AA, 17 ; <i32> [#uses=1]
- %tmp.3 = lshr i32 %tmp.2, 17 ; <i32> [#uses=1]
- %tmp.5 = add i32 %tmp.3, %x ; <i32> [#uses=1]
- %tmp.6 = shl i32 %tmp.5, 17 ; <i32> [#uses=1]
- ret i32 %tmp.6
+; CHECK-NEXT: [[TMP_3:%.*]] = and i32 %tmp.2, -131072
+; CHECK-NEXT: [[X2:%.*]] = add i32 [[TMP_3]], %AA
+; CHECK-NEXT: [[TMP_6:%.*]] = and i32 [[X2]], -131072
+; CHECK-NEXT: ret i32 [[TMP_6]]
+;
+ %x = lshr i32 %AA, 17 ; <i32> [#uses=1]
+ %tmp.3 = lshr i32 %tmp.2, 17 ; <i32> [#uses=1]
+ %tmp.5 = add i32 %tmp.3, %x ; <i32> [#uses=1]
+ %tmp.6 = shl i32 %tmp.5, 17 ; <i32> [#uses=1]
+ ret i32 %tmp.6
}
define <2 x i32> @test25_vector(<2 x i32> %tmp.2, <2 x i32> %AA) {
; CHECK-LABEL: @test25_vector(
-; CHECK: %tmp.3 = lshr <2 x i32> %tmp.2, <i32 17, i32 17>
-; CHECK-NEXT: shl <2 x i32> %tmp.3, <i32 17, i32 17>
-; CHECK-NEXT: add <2 x i32> %tmp.51, %AA
-; CHECK-NEXT: and <2 x i32> %x2, <i32 -131072, i32 -131072>
-; CHECK-NEXT: ret <2 x i32>
+; CHECK-NEXT: [[TMP_3:%.*]] = lshr <2 x i32> %tmp.2, <i32 17, i32 17>
+; CHECK-NEXT: [[TMP_51:%.*]] = shl <2 x i32> [[TMP_3]], <i32 17, i32 17>
+; CHECK-NEXT: [[X2:%.*]] = add <2 x i32> [[TMP_51]], %AA
+; CHECK-NEXT: [[TMP_6:%.*]] = and <2 x i32> [[X2]], <i32 -131072, i32 -131072>
+; CHECK-NEXT: ret <2 x i32> [[TMP_6]]
+;
%x = lshr <2 x i32> %AA, <i32 17, i32 17>
%tmp.3 = lshr <2 x i32> %tmp.2, <i32 17, i32 17>
%tmp.5 = add <2 x i32> %tmp.3, %x
@@ -396,129 +442,162 @@ define <2 x i32> @test25_vector(<2 x i32> %tmp.2, <2 x i32> %AA) {
;; handle casts between shifts.
define i32 @test26(i32 %A) {
; CHECK-LABEL: @test26(
-; CHECK-NEXT: and i32 %A, -2
-; CHECK-NEXT: ret i32
- %B = lshr i32 %A, 1 ; <i32> [#uses=1]
- %C = bitcast i32 %B to i32 ; <i32> [#uses=1]
- %D = shl i32 %C, 1 ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-NEXT: [[B:%.*]] = and i32 %A, -2
+; CHECK-NEXT: ret i32 [[B]]
+;
+ %B = lshr i32 %A, 1 ; <i32> [#uses=1]
+ %C = bitcast i32 %B to i32 ; <i32> [#uses=1]
+ %D = shl i32 %C, 1 ; <i32> [#uses=1]
+ ret i32 %D
}
define i1 @test27(i32 %x) nounwind {
; CHECK-LABEL: @test27(
-; CHECK-NEXT: and i32 %x, 8
-; CHECK-NEXT: icmp ne i32
-; CHECK-NEXT: ret i1
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %x, 8
+; CHECK-NEXT: [[Z:%.*]] = icmp ne i32 [[TMP1]], 0
+; CHECK-NEXT: ret i1 [[Z]]
+;
%y = lshr i32 %x, 3
%z = trunc i32 %y to i1
ret i1 %z
}
define i8 @test28(i8 %x) {
-entry:
; CHECK-LABEL: @test28(
-; CHECK: icmp slt i8 %x, 0
-; CHECK-NEXT: br i1
- %tmp1 = lshr i8 %x, 7
- %cond1 = icmp ne i8 %tmp1, 0
- br i1 %cond1, label %bb1, label %bb2
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[COND1:%.*]] = icmp slt i8 %x, 0
+; CHECK-NEXT: br i1 [[COND1]], label %bb1, label %bb2
+; CHECK: bb1:
+; CHECK-NEXT: ret i8 0
+; CHECK: bb2:
+; CHECK-NEXT: ret i8 1
+;
+entry:
+ %tmp1 = lshr i8 %x, 7
+ %cond1 = icmp ne i8 %tmp1, 0
+ br i1 %cond1, label %bb1, label %bb2
bb1:
- ret i8 0
+ ret i8 0
bb2:
- ret i8 1
+ ret i8 1
}
define i8 @test28a(i8 %x, i8 %y) {
+; CHECK-LABEL: @test28a(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP1:%.*]] = lshr i8 %x, 7
+; CHECK-NEXT: [[COND1:%.*]] = icmp eq i8 [[TMP1]], 0
+; CHECK-NEXT: br i1 [[COND1]], label %bb2, label %bb1
+; CHECK: bb1:
+; CHECK-NEXT: ret i8 [[TMP1]]
+; CHECK: bb2:
+; CHECK-NEXT: [[TMP2:%.*]] = add i8 [[TMP1]], %y
+; CHECK-NEXT: ret i8 [[TMP2]]
+;
entry:
; This shouldn't be transformed.
-; CHECK-LABEL: @test28a(
-; CHECK: %tmp1 = lshr i8 %x, 7
-; CHECK: %cond1 = icmp eq i8 %tmp1, 0
-; CHECK: br i1 %cond1, label %bb2, label %bb1
- %tmp1 = lshr i8 %x, 7
- %cond1 = icmp ne i8 %tmp1, 0
- br i1 %cond1, label %bb1, label %bb2
+ %tmp1 = lshr i8 %x, 7
+ %cond1 = icmp ne i8 %tmp1, 0
+ br i1 %cond1, label %bb1, label %bb2
bb1:
- ret i8 %tmp1
+ ret i8 %tmp1
bb2:
- %tmp2 = add i8 %tmp1, %y
- ret i8 %tmp2
+ %tmp2 = add i8 %tmp1, %y
+ ret i8 %tmp2
}
define i32 @test29(i64 %d18) {
-entry:
- %tmp916 = lshr i64 %d18, 32
- %tmp917 = trunc i64 %tmp916 to i32
- %tmp10 = lshr i32 %tmp917, 31
- ret i32 %tmp10
; CHECK-LABEL: @test29(
-; CHECK: %tmp916 = lshr i64 %d18, 63
-; CHECK: %tmp10 = trunc i64 %tmp916 to i32
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP916:%.*]] = lshr i64 %d18, 63
+; CHECK-NEXT: [[TMP10:%.*]] = trunc i64 [[TMP916]] to i32
+; CHECK-NEXT: ret i32 [[TMP10]]
+;
+entry:
+ %tmp916 = lshr i64 %d18, 32
+ %tmp917 = trunc i64 %tmp916 to i32
+ %tmp10 = lshr i32 %tmp917, 31
+ ret i32 %tmp10
}
define i32 @test30(i32 %A, i32 %B, i32 %C) {
- %X = shl i32 %A, %C
- %Y = shl i32 %B, %C
- %Z = and i32 %X, %Y
- ret i32 %Z
; CHECK-LABEL: @test30(
-; CHECK: %X1 = and i32 %A, %B
-; CHECK: %Z = shl i32 %X1, %C
+; CHECK-NEXT: [[X1:%.*]] = and i32 %A, %B
+; CHECK-NEXT: [[Z:%.*]] = shl i32 [[X1]], %C
+; CHECK-NEXT: ret i32 [[Z]]
+;
+ %X = shl i32 %A, %C
+ %Y = shl i32 %B, %C
+ %Z = and i32 %X, %Y
+ ret i32 %Z
}
define i32 @test31(i32 %A, i32 %B, i32 %C) {
- %X = lshr i32 %A, %C
- %Y = lshr i32 %B, %C
- %Z = or i32 %X, %Y
- ret i32 %Z
; CHECK-LABEL: @test31(
-; CHECK: %X1 = or i32 %A, %B
-; CHECK: %Z = lshr i32 %X1, %C
+; CHECK-NEXT: [[X1:%.*]] = or i32 %A, %B
+; CHECK-NEXT: [[Z:%.*]] = lshr i32 [[X1]], %C
+; CHECK-NEXT: ret i32 [[Z]]
+;
+ %X = lshr i32 %A, %C
+ %Y = lshr i32 %B, %C
+ %Z = or i32 %X, %Y
+ ret i32 %Z
}
define i32 @test32(i32 %A, i32 %B, i32 %C) {
- %X = ashr i32 %A, %C
- %Y = ashr i32 %B, %C
- %Z = xor i32 %X, %Y
- ret i32 %Z
; CHECK-LABEL: @test32(
-; CHECK: %X1 = xor i32 %A, %B
-; CHECK: %Z = ashr i32 %X1, %C
-; CHECK: ret i32 %Z
+; CHECK-NEXT: [[X1:%.*]] = xor i32 %A, %B
+; CHECK-NEXT: [[Z:%.*]] = ashr i32 [[X1]], %C
+; CHECK-NEXT: ret i32 [[Z]]
+;
+ %X = ashr i32 %A, %C
+ %Y = ashr i32 %B, %C
+ %Z = xor i32 %X, %Y
+ ret i32 %Z
}
define i1 @test33(i32 %X) {
- %tmp1 = shl i32 %X, 7
- %tmp2 = icmp slt i32 %tmp1, 0
- ret i1 %tmp2
; CHECK-LABEL: @test33(
-; CHECK: %tmp1.mask = and i32 %X, 16777216
-; CHECK: %tmp2 = icmp ne i32 %tmp1.mask, 0
+; CHECK-NEXT: [[TMP1_MASK:%.*]] = and i32 %X, 16777216
+; CHECK-NEXT: [[TMP2:%.*]] = icmp ne i32 [[TMP1_MASK]], 0
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %tmp1 = shl i32 %X, 7
+ %tmp2 = icmp slt i32 %tmp1, 0
+ ret i1 %tmp2
}
define i1 @test34(i32 %X) {
- %tmp1 = lshr i32 %X, 7
- %tmp2 = icmp slt i32 %tmp1, 0
- ret i1 %tmp2
; CHECK-LABEL: @test34(
-; CHECK: ret i1 false
+; CHECK-NEXT: ret i1 false
+;
+ %tmp1 = lshr i32 %X, 7
+ %tmp2 = icmp slt i32 %tmp1, 0
+ ret i1 %tmp2
}
define i1 @test35(i32 %X) {
- %tmp1 = ashr i32 %X, 7
- %tmp2 = icmp slt i32 %tmp1, 0
- ret i1 %tmp2
; CHECK-LABEL: @test35(
-; CHECK: %tmp2 = icmp slt i32 %X, 0
-; CHECK: ret i1 %tmp2
+; CHECK-NEXT: [[TMP2:%.*]] = icmp slt i32 %X, 0
+; CHECK-NEXT: ret i1 [[TMP2]]
+;
+ %tmp1 = ashr i32 %X, 7
+ %tmp2 = icmp slt i32 %tmp1, 0
+ ret i1 %tmp2
}
define i128 @test36(i128 %A, i128 %B) {
+; CHECK-LABEL: @test36(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP231:%.*]] = or i128 %B, %A
+; CHECK-NEXT: [[INS:%.*]] = and i128 [[TMP231]], 18446744073709551615
+; CHECK-NEXT: ret i128 [[INS]]
+;
entry:
%tmp27 = shl i128 %A, 64
%tmp23 = shl i128 %B, 64
@@ -526,13 +605,17 @@ entry:
%tmp45 = lshr i128 %ins, 64
ret i128 %tmp45
-; CHECK-LABEL: @test36(
-; CHECK: %tmp231 = or i128 %B, %A
-; CHECK: %ins = and i128 %tmp231, 18446744073709551615
-; CHECK: ret i128 %ins
}
define i64 @test37(i128 %A, i32 %B) {
+; CHECK-LABEL: @test37(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP22:%.*]] = zext i32 %B to i128
+; CHECK-NEXT: [[TMP23:%.*]] = shl nuw nsw i128 [[TMP22]], 32
+; CHECK-NEXT: [[INS:%.*]] = or i128 [[TMP23]], %A
+; CHECK-NEXT: [[TMP46:%.*]] = trunc i128 [[INS]] to i64
+; CHECK-NEXT: ret i64 [[TMP46]]
+;
entry:
%tmp27 = shl i128 %A, 64
%tmp22 = zext i32 %B to i128
@@ -542,28 +625,35 @@ entry:
%tmp46 = trunc i128 %tmp45 to i64
ret i64 %tmp46
-; CHECK-LABEL: @test37(
-; CHECK: %tmp23 = shl nuw nsw i128 %tmp22, 32
-; CHECK: %ins = or i128 %tmp23, %A
-; CHECK: %tmp46 = trunc i128 %ins to i64
}
define i32 @test38(i32 %x) nounwind readnone {
+; CHECK-LABEL: @test38(
+; CHECK-NEXT: [[REM1:%.*]] = and i32 %x, 31
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 1, [[REM1]]
+; CHECK-NEXT: ret i32 [[SHL]]
+;
%rem = srem i32 %x, 32
%shl = shl i32 1, %rem
ret i32 %shl
-; CHECK-LABEL: @test38(
-; CHECK-NEXT: and i32 %x, 31
-; CHECK-NEXT: shl i32 1
-; CHECK-NEXT: ret i32
}
; <rdar://problem/8756731>
-; CHECK-LABEL: @test39(
define i8 @test39(i32 %a0) {
+; CHECK-LABEL: @test39(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP4:%.*]] = trunc i32 %a0 to i8
+; CHECK-NEXT: [[TMP5:%.*]] = shl i8 [[TMP4]], 5
+; CHECK-NEXT: [[TMP49:%.*]] = shl i8 [[TMP4]], 6
+; CHECK-NEXT: [[TMP50:%.*]] = and i8 [[TMP49]], 64
+; CHECK-NEXT: [[TMP51:%.*]] = xor i8 [[TMP50]], [[TMP5]]
+; CHECK-NEXT: [[TMP0:%.*]] = shl i8 [[TMP4]], 2
+; CHECK-NEXT: [[TMP54:%.*]] = and i8 [[TMP0]], 16
+; CHECK-NEXT: [[TMP551:%.*]] = or i8 [[TMP54]], [[TMP51]]
+; CHECK-NEXT: ret i8 [[TMP551]]
+;
entry:
%tmp4 = trunc i32 %a0 to i8
-; CHECK: and i8 %tmp49, 64
%tmp5 = shl i8 %tmp4, 5
%tmp48 = and i8 %tmp5, 32
%tmp49 = lshr i8 %tmp48, 5
@@ -572,215 +662,278 @@ entry:
%tmp52 = and i8 %tmp51, -128
%tmp53 = lshr i8 %tmp52, 7
%tmp54 = mul i8 %tmp53, 16
-; CHECK: %0 = shl i8 %tmp4, 2
-; CHECK: %tmp54 = and i8 %0, 16
%tmp55 = xor i8 %tmp54, %tmp51
-; CHECK: ret i8 %tmp551
ret i8 %tmp55
}
; PR9809
define i32 @test40(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test40(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 %b, 2
+; CHECK-NEXT: [[DIV:%.*]] = lshr i32 %a, [[TMP1]]
+; CHECK-NEXT: ret i32 [[DIV]]
+;
%shl1 = shl i32 1, %b
%shl2 = shl i32 %shl1, 2
%div = udiv i32 %a, %shl2
ret i32 %div
-; CHECK-LABEL: @test40(
-; CHECK-NEXT: add i32 %b, 2
-; CHECK-NEXT: lshr i32 %a
-; CHECK-NEXT: ret i32
}
define i32 @test41(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test41(
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 8, %b
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
%1 = shl i32 1, %b
%2 = shl i32 %1, 3
ret i32 %2
-; CHECK-LABEL: @test41(
-; CHECK-NEXT: shl i32 8, %b
-; CHECK-NEXT: ret i32
}
define i32 @test42(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test42(
+; CHECK-NEXT: [[DIV:%.*]] = lshr exact i32 4096, %b
+; CHECK-NEXT: [[DIV2:%.*]] = udiv i32 %a, [[DIV]]
+; CHECK-NEXT: ret i32 [[DIV2]]
+;
%div = lshr i32 4096, %b ; must be exact otherwise we'd divide by zero
%div2 = udiv i32 %a, %div
ret i32 %div2
-; CHECK-LABEL: @test42(
-; CHECK-NEXT: lshr exact i32 4096, %b
+}
+
+define <2 x i32> @test42vec(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @test42vec(
+; CHECK-NEXT: [[DIV:%.*]] = lshr exact <2 x i32> <i32 4096, i32 4096>, %b
+; CHECK-NEXT: [[DIV2:%.*]] = udiv <2 x i32> %a, [[DIV]]
+; CHECK-NEXT: ret <2 x i32> [[DIV2]]
+;
+ %div = lshr <2 x i32> <i32 4096, i32 4096>, %b ; must be exact otherwise we'd divide by zero
+ %div2 = udiv <2 x i32> %a, %div
+ ret <2 x i32> %div2
}
define i32 @test43(i32 %a, i32 %b) nounwind {
+; CHECK-LABEL: @test43(
+; CHECK-NEXT: [[TMP1:%.*]] = add i32 %b, 12
+; CHECK-NEXT: [[DIV2:%.*]] = lshr i32 %a, [[TMP1]]
+; CHECK-NEXT: ret i32 [[DIV2]]
+;
%div = shl i32 4096, %b ; must be exact otherwise we'd divide by zero
%div2 = udiv i32 %a, %div
ret i32 %div2
-; CHECK-LABEL: @test43(
-; CHECK-NEXT: add i32 %b, 12
-; CHECK-NEXT: lshr
-; CHECK-NEXT: ret
}
define i32 @test44(i32 %a) nounwind {
+; CHECK-LABEL: @test44(
+; CHECK-NEXT: [[Y:%.*]] = shl i32 %a, 5
+; CHECK-NEXT: ret i32 [[Y]]
+;
%y = shl nuw i32 %a, 1
%z = shl i32 %y, 4
ret i32 %z
-; CHECK-LABEL: @test44(
-; CHECK-NEXT: %y = shl i32 %a, 5
-; CHECK-NEXT: ret i32 %y
}
define i32 @test45(i32 %a) nounwind {
+; CHECK-LABEL: @test45(
+; CHECK-NEXT: [[Y:%.*]] = lshr i32 %a, 5
+; CHECK-NEXT: ret i32 [[Y]]
+;
%y = lshr exact i32 %a, 1
%z = lshr i32 %y, 4
ret i32 %z
-; CHECK-LABEL: @test45(
-; CHECK-NEXT: %y = lshr i32 %a, 5
-; CHECK-NEXT: ret i32 %y
}
define i32 @test46(i32 %a) {
+; CHECK-LABEL: @test46(
+; CHECK-NEXT: [[Z:%.*]] = ashr exact i32 %a, 2
+; CHECK-NEXT: ret i32 [[Z]]
+;
%y = ashr exact i32 %a, 3
%z = shl i32 %y, 1
ret i32 %z
-; CHECK-LABEL: @test46(
-; CHECK-NEXT: %z = ashr exact i32 %a, 2
-; CHECK-NEXT: ret i32 %z
}
define i32 @test47(i32 %a) {
+; CHECK-LABEL: @test47(
+; CHECK-NEXT: [[Z:%.*]] = lshr exact i32 %a, 2
+; CHECK-NEXT: ret i32 [[Z]]
+;
%y = lshr exact i32 %a, 3
%z = shl i32 %y, 1
ret i32 %z
-; CHECK-LABEL: @test47(
-; CHECK-NEXT: %z = lshr exact i32 %a, 2
-; CHECK-NEXT: ret i32 %z
}
define i32 @test48(i32 %x) {
+; CHECK-LABEL: @test48(
+; CHECK-NEXT: [[B:%.*]] = shl i32 %x, 2
+; CHECK-NEXT: ret i32 [[B]]
+;
%A = lshr exact i32 %x, 1
%B = shl i32 %A, 3
ret i32 %B
-; CHECK-LABEL: @test48(
-; CHECK-NEXT: %B = shl i32 %x, 2
-; CHECK-NEXT: ret i32 %B
}
define i32 @test49(i32 %x) {
+; CHECK-LABEL: @test49(
+; CHECK-NEXT: [[B:%.*]] = shl i32 %x, 2
+; CHECK-NEXT: ret i32 [[B]]
+;
%A = ashr exact i32 %x, 1
%B = shl i32 %A, 3
ret i32 %B
-; CHECK-LABEL: @test49(
-; CHECK-NEXT: %B = shl i32 %x, 2
-; CHECK-NEXT: ret i32 %B
}
define i32 @test50(i32 %x) {
+; CHECK-LABEL: @test50(
+; CHECK-NEXT: [[B:%.*]] = ashr i32 %x, 2
+; CHECK-NEXT: ret i32 [[B]]
+;
%A = shl nsw i32 %x, 1
%B = ashr i32 %A, 3
ret i32 %B
-; CHECK-LABEL: @test50(
-; CHECK-NEXT: %B = ashr i32 %x, 2
-; CHECK-NEXT: ret i32 %B
}
define i32 @test51(i32 %x) {
+; CHECK-LABEL: @test51(
+; CHECK-NEXT: [[B:%.*]] = lshr i32 %x, 2
+; CHECK-NEXT: ret i32 [[B]]
+;
%A = shl nuw i32 %x, 1
%B = lshr i32 %A, 3
ret i32 %B
-; CHECK-LABEL: @test51(
-; CHECK-NEXT: %B = lshr i32 %x, 2
-; CHECK-NEXT: ret i32 %B
}
define i32 @test52(i32 %x) {
+; CHECK-LABEL: @test52(
+; CHECK-NEXT: [[B:%.*]] = shl nsw i32 %x, 2
+; CHECK-NEXT: ret i32 [[B]]
+;
%A = shl nsw i32 %x, 3
%B = ashr i32 %A, 1
ret i32 %B
-; CHECK-LABEL: @test52(
-; CHECK-NEXT: %B = shl nsw i32 %x, 2
-; CHECK-NEXT: ret i32 %B
}
define i32 @test53(i32 %x) {
+; CHECK-LABEL: @test53(
+; CHECK-NEXT: [[B:%.*]] = shl nuw i32 %x, 2
+; CHECK-NEXT: ret i32 [[B]]
+;
%A = shl nuw i32 %x, 3
%B = lshr i32 %A, 1
ret i32 %B
-; CHECK-LABEL: @test53(
-; CHECK-NEXT: %B = shl nuw i32 %x, 2
-; CHECK-NEXT: ret i32 %B
}
define i32 @test54(i32 %x) {
+; CHECK-LABEL: @test54(
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 %x, 3
+; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP1]], 16
+; CHECK-NEXT: ret i32 [[AND]]
+;
%shr2 = lshr i32 %x, 1
%shl = shl i32 %shr2, 4
%and = and i32 %shl, 16
ret i32 %and
-; CHECK-LABEL: @test54(
-; CHECK: shl i32 %x, 3
}
define i32 @test55(i32 %x) {
+; CHECK-LABEL: @test55(
+; CHECK-NEXT: [[TMP1:%.*]] = shl i32 %x, 3
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 8
+; CHECK-NEXT: ret i32 [[OR]]
+;
%shr2 = lshr i32 %x, 1
%shl = shl i32 %shr2, 4
%or = or i32 %shl, 8
ret i32 %or
-; CHECK-LABEL: @test55(
-; CHECK: shl i32 %x, 3
}
define i32 @test56(i32 %x) {
+; CHECK-LABEL: @test56(
+; CHECK-NEXT: [[SHR2:%.*]] = lshr i32 %x, 1
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SHR2]], 4
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 7
+; CHECK-NEXT: ret i32 [[OR]]
+;
%shr2 = lshr i32 %x, 1
%shl = shl i32 %shr2, 4
%or = or i32 %shl, 7
ret i32 %or
-; CHECK-LABEL: @test56(
-; CHECK: shl i32 %shr2, 4
}
define i32 @test57(i32 %x) {
+; CHECK-LABEL: @test57(
+; CHECK-NEXT: [[SHR1:%.*]] = lshr i32 %x, 1
+; CHECK-NEXT: [[SHL:%.*]] = shl i32 [[SHR1]], 4
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 7
+; CHECK-NEXT: ret i32 [[OR]]
+;
%shr = ashr i32 %x, 1
%shl = shl i32 %shr, 4
%or = or i32 %shl, 7
ret i32 %or
-; CHECK-LABEL: @test57(
-; CHECK: %shl = shl i32 %shr1, 4
}
define i32 @test58(i32 %x) {
+; CHECK-LABEL: @test58(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr i32 %x, 3
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[TMP1]], 1
+; CHECK-NEXT: ret i32 [[OR]]
+;
%shr = ashr i32 %x, 4
%shl = shl i32 %shr, 1
%or = or i32 %shl, 1
ret i32 %or
-; CHECK-LABEL: @test58(
-; CHECK: ashr i32 %x, 3
}
define i32 @test59(i32 %x) {
+; CHECK-LABEL: @test59(
+; CHECK-NEXT: [[SHR:%.*]] = ashr i32 %x, 4
+; CHECK-NEXT: [[SHL:%.*]] = shl nsw i32 [[SHR]], 1
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 2
+; CHECK-NEXT: ret i32 [[OR]]
+;
%shr = ashr i32 %x, 4
%shl = shl i32 %shr, 1
%or = or i32 %shl, 2
ret i32 %or
-; CHECK-LABEL: @test59(
-; CHECK: ashr i32 %x, 4
}
; propagate "exact" trait
define i32 @test60(i32 %x) {
+; CHECK-LABEL: @test60(
+; CHECK-NEXT: [[SHL:%.*]] = ashr exact i32 %x, 3
+; CHECK-NEXT: [[OR:%.*]] = or i32 [[SHL]], 1
+; CHECK-NEXT: ret i32 [[OR]]
+;
%shr = ashr exact i32 %x, 4
%shl = shl i32 %shr, 1
%or = or i32 %shl, 1
ret i32 %or
-; CHECK-LABEL: @test60(
-; CHECK: ashr exact i32 %x, 3
}
; PR17026
-; CHECK-LABEL: @test61(
-; CHECK-NOT: sh
-; CHECK: ret
define void @test61(i128 %arg) {
+; CHECK-LABEL: @test61(
+; CHECK-NEXT: bb:
+; CHECK-NEXT: br i1 undef, label %bb1, label %bb12
+; CHECK: bb1:
+; CHECK-NEXT: br label %bb2
+; CHECK: bb2:
+; CHECK-NEXT: br i1 undef, label %bb3, label %bb7
+; CHECK: bb3:
+; CHECK-NEXT: br label %bb8
+; CHECK: bb7:
+; CHECK-NEXT: br i1 undef, label %bb8, label %bb2
+; CHECK: bb8:
+; CHECK-NEXT: br i1 undef, label %bb11, label %bb12
+; CHECK: bb11:
+; CHECK-NEXT: br i1 undef, label %bb1, label %bb12
+; CHECK: bb12:
+; CHECK-NEXT: ret void
+;
bb:
br i1 undef, label %bb1, label %bb12
@@ -814,27 +967,36 @@ bb12: ; preds = %bb11, %bb8, %bb
define i32 @test62(i32 %a) {
; CHECK-LABEL: @test62(
-; CHECK-NEXT: ret i32 undef
+; CHECK-NEXT: ret i32 undef
+;
%b = ashr i32 %a, 32 ; shift all bits out
ret i32 %b
}
define <4 x i32> @test62_splat_vector(<4 x i32> %a) {
-; CHECK-LABEL: @test62_splat_vector
-; CHECK-NEXT: ret <4 x i32> undef
+; CHECK-LABEL: @test62_splat_vector(
+; CHECK-NEXT: ret <4 x i32> undef
+;
%b = ashr <4 x i32> %a, <i32 32, i32 32, i32 32, i32 32> ; shift all bits out
ret <4 x i32> %b
}
define <4 x i32> @test62_non_splat_vector(<4 x i32> %a) {
-; CHECK-LABEL: @test62_non_splat_vector
-; CHECK-NOT: ret <4 x i32> undef
+; CHECK-LABEL: @test62_non_splat_vector(
+; CHECK-NEXT: [[B:%.*]] = ashr <4 x i32> %a, <i32 32, i32 0, i32 1, i32 2>
+; CHECK-NEXT: ret <4 x i32> [[B]]
+;
%b = ashr <4 x i32> %a, <i32 32, i32 0, i32 1, i32 2> ; shift all bits out
ret <4 x i32> %b
}
define <2 x i65> @test_63(<2 x i64> %t) {
-; CHECK-LABEL: @test_63
+; CHECK-LABEL: @test_63(
+; CHECK-NEXT: [[A:%.*]] = zext <2 x i64> %t to <2 x i65>
+; CHECK-NEXT: [[SEXT:%.*]] = shl <2 x i65> [[A]], <i65 33, i65 33>
+; CHECK-NEXT: [[B:%.*]] = ashr <2 x i65> [[SEXT]], <i65 33, i65 33>
+; CHECK-NEXT: ret <2 x i65> [[B]]
+;
%a = zext <2 x i64> %t to <2 x i65>
%sext = shl <2 x i65> %a, <i65 33, i65 33>
%b = ashr <2 x i65> %sext, <i65 33, i65 33>
diff --git a/test/Transforms/InstCombine/shufflevec-constant.ll b/test/Transforms/InstCombine/shufflevec-constant.ll
index a002b2a853f5..37efba1f5c10 100644
--- a/test/Transforms/InstCombine/shufflevec-constant.ll
+++ b/test/Transforms/InstCombine/shufflevec-constant.ll
@@ -1,14 +1,17 @@
-; RUN: opt < %s -instcombine -S | grep "ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>"
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin9"
define <4 x float> @__inff4() nounwind readnone {
-entry:
- %tmp14 = extractelement <1 x double> bitcast (<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000> to <1 x double>), i32 0 ; <double> [#uses=1]
- %tmp4 = bitcast double %tmp14 to i64 ; <i64> [#uses=1]
- %tmp3 = bitcast i64 %tmp4 to <2 x float> ; <<2 x float>> [#uses=1]
- %tmp8 = shufflevector <2 x float> %tmp3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> ; <<4 x float>> [#uses=1]
- %tmp9 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp8, <4 x i32> <i32 0, i32 1, i32 4, i32 5> ; <<4 x float>> [#uses=0]
- ret <4 x float> %tmp9
+; CHECK-LABEL: @__inff4(
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 0.000000e+00, float 0x7FF0000000000000, float 0x7FF0000000000000>
+;
+ %tmp14 = extractelement <1 x double> bitcast (<2 x float> <float 0x7FF0000000000000, float 0x7FF0000000000000> to <1 x double>), i32 0
+ %tmp4 = bitcast double %tmp14 to i64
+ %tmp3 = bitcast i64 %tmp4 to <2 x float>
+ %tmp8 = shufflevector <2 x float> %tmp3, <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
+ %tmp9 = shufflevector <4 x float> zeroinitializer, <4 x float> %tmp8, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
+ ret <4 x float> %tmp9
}
diff --git a/test/Transforms/InstCombine/signed-comparison.ll b/test/Transforms/InstCombine/signed-comparison.ll
index 922f4dcb2d42..1fbfc2d14633 100644
--- a/test/Transforms/InstCombine/signed-comparison.ll
+++ b/test/Transforms/InstCombine/signed-comparison.ll
@@ -1,28 +1,25 @@
-; RUN: opt < %s -instcombine -S > %t
-; RUN: not grep zext %t
-; RUN: not grep slt %t
-; RUN: grep "icmp ult" %t
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-; Instcombine should convert the zext+slt into a simple ult.
+; Convert the zext+slt into a simple ult.
-define void @foo(double* %p) nounwind {
-entry:
- br label %bb
-
-bb:
- %indvar = phi i64 [ 0, %entry ], [ %indvar.next, %bb ]
- %t0 = and i64 %indvar, 65535
- %t1 = getelementptr double, double* %p, i64 %t0
- %t2 = load double, double* %t1, align 8
- %t3 = fmul double %t2, 2.2
- store double %t3, double* %t1, align 8
- %i.04 = trunc i64 %indvar to i16
- %t4 = add i16 %i.04, 1
- %t5 = zext i16 %t4 to i32
- %t6 = icmp slt i32 %t5, 500
- %indvar.next = add i64 %indvar, 1
- br i1 %t6, label %bb, label %return
+define i1 @scalar_zext_slt(i16 %t4) {
+; CHECK-LABEL: @scalar_zext_slt(
+; CHECK-NEXT: [[T6:%.*]] = icmp ult i16 %t4, 500
+; CHECK-NEXT: ret i1 [[T6]]
+;
+ %t5 = zext i16 %t4 to i32
+ %t6 = icmp slt i32 %t5, 500
+ ret i1 %t6
+}
-return:
- ret void
+define <4 x i1> @vector_zext_slt(<4 x i16> %t4) {
+; CHECK-LABEL: @vector_zext_slt(
+; CHECK-NEXT: [[T6:%.*]] = icmp ult <4 x i16> %t4, <i16 500, i16 0, i16 501, i16 -1>
+; CHECK-NEXT: ret <4 x i1> [[T6]]
+;
+ %t5 = zext <4 x i16> %t4 to <4 x i32>
+ %t6 = icmp slt <4 x i32> %t5, <i32 500, i32 0, i32 501, i32 65535>
+ ret <4 x i1> %t6
}
+
diff --git a/test/Transforms/InstCombine/signext.ll b/test/Transforms/InstCombine/signext.ll
index 3a714d7046d3..bccadeb396f2 100644
--- a/test/Transforms/InstCombine/signext.ll
+++ b/test/Transforms/InstCombine/signext.ll
@@ -1,78 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128:n8:16:32:64"
define i32 @test1(i32 %x) {
- %tmp.1 = and i32 %x, 65535 ; <i32> [#uses=1]
- %tmp.2 = xor i32 %tmp.1, -32768 ; <i32> [#uses=1]
- %tmp.3 = add i32 %tmp.2, 32768 ; <i32> [#uses=1]
- ret i32 %tmp.3
; CHECK-LABEL: @test1(
-; CHECK: %sext = shl i32 %x, 16
-; CHECK: %tmp.3 = ashr exact i32 %sext, 16
-; CHECK: ret i32 %tmp.3
+; CHECK-NEXT: [[SEXT:%.*]] = shl i32 %x, 16
+; CHECK-NEXT: [[TMP_3:%.*]] = ashr exact i32 [[SEXT]], 16
+; CHECK-NEXT: ret i32 [[TMP_3]]
+;
+ %tmp.1 = and i32 %x, 65535
+ %tmp.2 = xor i32 %tmp.1, -32768
+ %tmp.3 = add i32 %tmp.2, 32768
+ ret i32 %tmp.3
}
define i32 @test2(i32 %x) {
- %tmp.1 = and i32 %x, 65535 ; <i32> [#uses=1]
- %tmp.2 = xor i32 %tmp.1, 32768 ; <i32> [#uses=1]
- %tmp.3 = add i32 %tmp.2, -32768 ; <i32> [#uses=1]
- ret i32 %tmp.3
; CHECK-LABEL: @test2(
-; CHECK: %sext = shl i32 %x, 16
-; CHECK: %tmp.3 = ashr exact i32 %sext, 16
-; CHECK: ret i32 %tmp.3
+; CHECK-NEXT: [[SEXT:%.*]] = shl i32 %x, 16
+; CHECK-NEXT: [[TMP_3:%.*]] = ashr exact i32 [[SEXT]], 16
+; CHECK-NEXT: ret i32 [[TMP_3]]
+;
+ %tmp.1 = and i32 %x, 65535
+ %tmp.2 = xor i32 %tmp.1, 32768
+ %tmp.3 = add i32 %tmp.2, -32768
+ ret i32 %tmp.3
}
define i32 @test3(i16 %P) {
- %tmp.1 = zext i16 %P to i32 ; <i32> [#uses=1]
- %tmp.4 = xor i32 %tmp.1, 32768 ; <i32> [#uses=1]
- %tmp.5 = add i32 %tmp.4, -32768 ; <i32> [#uses=1]
- ret i32 %tmp.5
; CHECK-LABEL: @test3(
-; CHECK: %tmp.5 = sext i16 %P to i32
-; CHECK: ret i32 %tmp.5
+; CHECK-NEXT: [[TMP_5:%.*]] = sext i16 %P to i32
+; CHECK-NEXT: ret i32 [[TMP_5]]
+;
+ %tmp.1 = zext i16 %P to i32
+ %tmp.4 = xor i32 %tmp.1, 32768
+ %tmp.5 = add i32 %tmp.4, -32768
+ ret i32 %tmp.5
}
define i32 @test4(i32 %x) {
- %tmp.1 = and i32 %x, 255 ; <i32> [#uses=1]
- %tmp.2 = xor i32 %tmp.1, 128 ; <i32> [#uses=1]
- %tmp.3 = add i32 %tmp.2, -128 ; <i32> [#uses=1]
- ret i32 %tmp.3
; CHECK-LABEL: @test4(
-; CHECK: %sext = shl i32 %x, 24
-; CHECK: %tmp.3 = ashr exact i32 %sext, 24
-; CHECK: ret i32 %tmp.3
+; CHECK-NEXT: [[SEXT:%.*]] = shl i32 %x, 24
+; CHECK-NEXT: [[TMP_3:%.*]] = ashr exact i32 [[SEXT]], 24
+; CHECK-NEXT: ret i32 [[TMP_3]]
+;
+ %tmp.1 = and i32 %x, 255
+ %tmp.2 = xor i32 %tmp.1, 128
+ %tmp.3 = add i32 %tmp.2, -128
+ ret i32 %tmp.3
}
define i32 @test5(i32 %x) {
- %tmp.2 = shl i32 %x, 16 ; <i32> [#uses=1]
- %tmp.4 = ashr i32 %tmp.2, 16 ; <i32> [#uses=1]
- ret i32 %tmp.4
; CHECK-LABEL: @test5(
-; CHECK: %tmp.2 = shl i32 %x, 16
-; CHECK: %tmp.4 = ashr exact i32 %tmp.2, 16
-; CHECK: ret i32 %tmp.4
+; CHECK-NEXT: [[TMP_2:%.*]] = shl i32 %x, 16
+; CHECK-NEXT: [[TMP_4:%.*]] = ashr exact i32 [[TMP_2]], 16
+; CHECK-NEXT: ret i32 [[TMP_4]]
+;
+ %tmp.2 = shl i32 %x, 16
+ %tmp.4 = ashr i32 %tmp.2, 16
+ ret i32 %tmp.4
}
define i32 @test6(i16 %P) {
- %tmp.1 = zext i16 %P to i32 ; <i32> [#uses=1]
- %sext1 = shl i32 %tmp.1, 16 ; <i32> [#uses=1]
- %tmp.5 = ashr i32 %sext1, 16 ; <i32> [#uses=1]
- ret i32 %tmp.5
; CHECK-LABEL: @test6(
-; CHECK: %tmp.5 = sext i16 %P to i32
-; CHECK: ret i32 %tmp.5
+; CHECK-NEXT: [[TMP_5:%.*]] = sext i16 %P to i32
+; CHECK-NEXT: ret i32 [[TMP_5]]
+;
+ %tmp.1 = zext i16 %P to i32
+ %sext1 = shl i32 %tmp.1, 16
+ %tmp.5 = ashr i32 %sext1, 16
+ ret i32 %tmp.5
}
-define i32 @test7(i32 %x) nounwind readnone {
-entry:
- %shr = lshr i32 %x, 5 ; <i32> [#uses=1]
- %xor = xor i32 %shr, 67108864 ; <i32> [#uses=1]
- %sub = add i32 %xor, -67108864 ; <i32> [#uses=1]
- ret i32 %sub
+define i32 @test7(i32 %x) {
; CHECK-LABEL: @test7(
-; CHECK: %sub = ashr i32 %x, 5
-; CHECK: ret i32 %sub
+; CHECK-NEXT: [[SUB:%.*]] = ashr i32 %x, 5
+; CHECK-NEXT: ret i32 [[SUB]]
+;
+ %shr = lshr i32 %x, 5
+ %xor = xor i32 %shr, 67108864
+ %sub = add i32 %xor, -67108864
+ ret i32 %sub
}
diff --git a/test/Transforms/InstCombine/sink-into-catchswitch.ll b/test/Transforms/InstCombine/sink-into-catchswitch.ll
new file mode 100644
index 000000000000..04a62250fc5d
--- /dev/null
+++ b/test/Transforms/InstCombine/sink-into-catchswitch.ll
@@ -0,0 +1,45 @@
+; RUN: opt -instcombine -S < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+%struct.B = type { i64, i64 }
+
+define void @test1(%struct.B* %p) personality i32 (...)* @__CxxFrameHandler3 {
+invoke.cont:
+ %0 = bitcast %struct.B* %p to <2 x i64>*
+ %1 = load <2 x i64>, <2 x i64>* %0, align 8
+ %2 = extractelement <2 x i64> %1, i32 0
+ invoke void @throw()
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %invoke.cont
+ %cs = catchswitch within none [label %invoke.cont1] unwind label %ehcleanup
+
+invoke.cont1: ; preds = %catch.dispatch
+ %catch = catchpad within %cs [i8* null, i32 64, i8* null]
+ invoke void @throw() [ "funclet"(token %catch) ]
+ to label %unreachable unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont1, %catch.dispatch
+ %phi = phi i64 [ %2, %catch.dispatch ], [ 9, %invoke.cont1 ]
+ %cleanup = cleanuppad within none []
+ call void @release(i64 %phi) [ "funclet"(token %cleanup) ]
+ cleanupret from %cleanup unwind to caller
+
+unreachable: ; preds = %invoke.cont1, %invoke.cont
+ unreachable
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[bc:.*]] = bitcast %struct.B* %p to <2 x i64>*
+; CHECK: %[[ld:.*]] = load <2 x i64>, <2 x i64>* %[[bc]], align 8
+; CHECK: %[[ee:.*]] = extractelement <2 x i64> %[[ld]], i32 0
+
+; CHECK: %[[phi:.*]] = phi i64 [ %[[ee]], {{.*}} ], [ 9, {{.*}} ]
+; CHECK: call void @release(i64 %[[phi]])
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @throw()
+
+declare void @release(i64)
diff --git a/test/Transforms/InstCombine/stacksaverestore.ll b/test/Transforms/InstCombine/stacksaverestore.ll
index de400e91a751..9eb0efb1911b 100644
--- a/test/Transforms/InstCombine/stacksaverestore.ll
+++ b/test/Transforms/InstCombine/stacksaverestore.ll
@@ -1,4 +1,6 @@
-; RUN: opt < %s -instcombine -S | grep "call.*stackrestore" | count 1
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@glob = global i32 0
declare i8* @llvm.stacksave()
declare void @llvm.stackrestore(i8*)
@@ -11,11 +13,19 @@ define i32* @test1(i32 %P) {
ret i32* %A
}
+; CHECK-LABEL: define i32* @test1(
+; CHECK-NOT: call void @llvm.stackrestore
+; CHECK: ret i32*
+
define void @test2(i8* %X) {
call void @llvm.stackrestore( i8* %X ) ;; no allocas before return.
ret void
}
+; CHECK-LABEL: define void @test2(
+; CHECK-NOT: call void @llvm.stackrestore
+; CHECK: ret void
+
define void @foo(i32 %size) nounwind {
entry:
%tmp118124 = icmp sgt i32 %size, 0 ; <i1> [#uses=1]
@@ -52,5 +62,51 @@ return: ; preds = %bb, %entry
ret void
}
+; CHECK-LABEL: define void @foo(
+; CHECK: %tmp = call i8* @llvm.stacksave()
+; CHECK: alloca i8
+; CHECK-NOT: stacksave
+; CHECK: call void @bar(
+; CHECK-NEXT: call void @llvm.stackrestore(i8* %tmp)
+; CHECK: ret void
+
declare void @bar(i32, i8*, i8*, i8*, i8*, i32)
+declare void @inalloca_callee(i32* inalloca)
+
+define void @test3(i32 %c) {
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [0, %entry], [%i1, %loop]
+ %save1 = call i8* @llvm.stacksave()
+ %argmem = alloca inalloca i32
+ store i32 0, i32* %argmem
+ call void @inalloca_callee(i32* inalloca %argmem)
+
+ ; This restore cannot be deleted, the restore below does not make it dead.
+ call void @llvm.stackrestore(i8* %save1)
+
+ ; FIXME: We should be able to remove this save/restore pair, but we don't.
+ %save2 = call i8* @llvm.stacksave()
+ store i32 0, i32* @glob
+ call void @llvm.stackrestore(i8* %save2)
+ %i1 = add i32 1, %i
+ %done = icmp eq i32 %i1, %c
+ br i1 %done, label %loop, label %return
+
+return:
+ ret void
+}
+
+; CHECK-LABEL: define void @test3(
+; CHECK: loop:
+; CHECK: %i = phi i32 [ 0, %entry ], [ %i1, %loop ]
+; CHECK: %save1 = call i8* @llvm.stacksave()
+; CHECK: %argmem = alloca inalloca i32
+; CHECK: store i32 0, i32* %argmem
+; CHECK: call void @inalloca_callee(i32* inalloca {{.*}} %argmem)
+; CHECK: call void @llvm.stackrestore(i8* %save1)
+; CHECK: br i1 %done, label %loop, label %return
+; CHECK: ret void
diff --git a/test/Transforms/InstCombine/strlen-1.ll b/test/Transforms/InstCombine/strlen-1.ll
index fe453699a60e..f3287c0de35f 100644
--- a/test/Transforms/InstCombine/strlen-1.ll
+++ b/test/Transforms/InstCombine/strlen-1.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; Test that the strlen library call simplifier works correctly.
;
; RUN: opt < %s -instcombine -S | FileCheck %s
@@ -10,6 +11,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f3
@null_hello = constant [7 x i8] c"\00hello\00"
@nullstring = constant i8 0
@a = common global [32 x i8] zeroinitializer, align 1
+@null_hello_mid = constant [13 x i8] c"hello wor\00ld\00"
declare i32 @strlen(i8*)
@@ -17,93 +19,159 @@ declare i32 @strlen(i8*)
define i32 @test_simplify1() {
; CHECK-LABEL: @test_simplify1(
+; CHECK-NEXT: ret i32 5
+;
%hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%hello_l = call i32 @strlen(i8* %hello_p)
ret i32 %hello_l
-; CHECK-NEXT: ret i32 5
}
define i32 @test_simplify2() {
; CHECK-LABEL: @test_simplify2(
+; CHECK-NEXT: ret i32 0
+;
%null_p = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
%null_l = call i32 @strlen(i8* %null_p)
ret i32 %null_l
-; CHECK-NEXT: ret i32 0
}
define i32 @test_simplify3() {
; CHECK-LABEL: @test_simplify3(
+; CHECK-NEXT: ret i32 0
+;
%null_hello_p = getelementptr [7 x i8], [7 x i8]* @null_hello, i32 0, i32 0
%null_hello_l = call i32 @strlen(i8* %null_hello_p)
ret i32 %null_hello_l
-; CHECK-NEXT: ret i32 0
}
define i32 @test_simplify4() {
; CHECK-LABEL: @test_simplify4(
+; CHECK-NEXT: ret i32 0
+;
%len = tail call i32 @strlen(i8* @nullstring) nounwind
ret i32 %len
-; CHECK-NEXT: ret i32 0
}
; Check strlen(x) == 0 --> *x == 0.
define i1 @test_simplify5() {
; CHECK-LABEL: @test_simplify5(
+; CHECK-NEXT: ret i1 false
+;
%hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%hello_l = call i32 @strlen(i8* %hello_p)
%eq_hello = icmp eq i32 %hello_l, 0
ret i1 %eq_hello
-; CHECK-NEXT: ret i1 false
}
define i1 @test_simplify6() {
; CHECK-LABEL: @test_simplify6(
+; CHECK-NEXT: ret i1 true
+;
%null_p = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
%null_l = call i32 @strlen(i8* %null_p)
%eq_null = icmp eq i32 %null_l, 0
ret i1 %eq_null
-; CHECK-NEXT: ret i1 true
}
; Check strlen(x) != 0 --> *x != 0.
define i1 @test_simplify7() {
; CHECK-LABEL: @test_simplify7(
+; CHECK-NEXT: ret i1 true
+;
%hello_p = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%hello_l = call i32 @strlen(i8* %hello_p)
%ne_hello = icmp ne i32 %hello_l, 0
ret i1 %ne_hello
-; CHECK-NEXT: ret i1 true
}
define i1 @test_simplify8() {
; CHECK-LABEL: @test_simplify8(
+; CHECK-NEXT: ret i1 false
+;
%null_p = getelementptr [1 x i8], [1 x i8]* @null, i32 0, i32 0
%null_l = call i32 @strlen(i8* %null_p)
%ne_null = icmp ne i32 %null_l, 0
ret i1 %ne_null
-; CHECK-NEXT: ret i1 false
}
define i32 @test_simplify9(i1 %x) {
-; CHECK-LABEL: @test_simplify9
+; CHECK-LABEL: @test_simplify9(
+; CHECK-NEXT: [[TMP1:%.*]] = select i1 %x, i32 5, i32 6
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
%hello = getelementptr [6 x i8], [6 x i8]* @hello, i32 0, i32 0
%longer = getelementptr [7 x i8], [7 x i8]* @longer, i32 0, i32 0
%s = select i1 %x, i8* %hello, i8* %longer
%l = call i32 @strlen(i8* %s)
-; CHECK-NEXT: select i1 %x, i32 5, i32 6
ret i32 %l
-; CHECK-NEXT: ret
+}
+
+; Check the case that should be simplified to a sub instruction.
+; strlen(@hello + x) --> 5 - x
+
+define i32 @test_simplify10(i32 %x) {
+; CHECK-LABEL: @test_simplify10(
+; CHECK-NEXT: [[TMP1:%.*]] = sub i32 5, %x
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %hello_p = getelementptr inbounds [6 x i8], [6 x i8]* @hello, i32 0, i32 %x
+ %hello_l = call i32 @strlen(i8* %hello_p)
+ ret i32 %hello_l
+}
+
+; strlen(@null_hello_mid + (x & 7)) --> 9 - (x & 7)
+
+define i32 @test_simplify11(i32 %x) {
+; CHECK-LABEL: @test_simplify11(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 7
+; CHECK-NEXT: [[TMP1:%.*]] = sub nsw i32 9, [[AND]]
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %and = and i32 %x, 7
+ %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and
+ %hello_l = call i32 @strlen(i8* %hello_p)
+ ret i32 %hello_l
}
; Check cases that shouldn't be simplified.
define i32 @test_no_simplify1() {
; CHECK-LABEL: @test_no_simplify1(
+; CHECK-NEXT: [[A_L:%.*]] = call i32 @strlen(i8* getelementptr inbounds ([32 x i8], [32 x i8]* @a, i32 0, i32 0))
+; CHECK-NEXT: ret i32 [[A_L]]
+;
%a_p = getelementptr [32 x i8], [32 x i8]* @a, i32 0, i32 0
%a_l = call i32 @strlen(i8* %a_p)
-; CHECK-NEXT: %a_l = call i32 @strlen
ret i32 %a_l
-; CHECK-NEXT: ret i32 %a_l
}
+
+; strlen(@null_hello + x) should not be simplified to a sub instruction.
+
+define i32 @test_no_simplify2(i32 %x) {
+; CHECK-LABEL: @test_no_simplify2(
+; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds [7 x i8], [7 x i8]* @null_hello, i32 0, i32 %x
+; CHECK-NEXT: [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
+; CHECK-NEXT: ret i32 [[HELLO_L]]
+;
+ %hello_p = getelementptr inbounds [7 x i8], [7 x i8]* @null_hello, i32 0, i32 %x
+ %hello_l = call i32 @strlen(i8* %hello_p)
+ ret i32 %hello_l
+}
+
+; strlen(@null_hello_mid + (x & 15)) should not be simplified to a sub instruction.
+
+define i32 @test_no_simplify3(i32 %x) {
+; CHECK-LABEL: @test_no_simplify3(
+; CHECK-NEXT: [[AND:%.*]] = and i32 %x, 15
+; CHECK-NEXT: [[HELLO_P:%.*]] = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 [[AND]]
+; CHECK-NEXT: [[HELLO_L:%.*]] = call i32 @strlen(i8* [[HELLO_P]])
+; CHECK-NEXT: ret i32 [[HELLO_L]]
+;
+ %and = and i32 %x, 15
+ %hello_p = getelementptr inbounds [13 x i8], [13 x i8]* @null_hello_mid, i32 0, i32 %and
+ %hello_l = call i32 @strlen(i8* %hello_p)
+ ret i32 %hello_l
+}
+
diff --git a/test/Transforms/InstCombine/tbaa-store-to-load.ll b/test/Transforms/InstCombine/tbaa-store-to-load.ll
new file mode 100644
index 000000000000..707be7350139
--- /dev/null
+++ b/test/Transforms/InstCombine/tbaa-store-to-load.ll
@@ -0,0 +1,17 @@
+; RUN: opt -S -instcombine < %s 2>&1 | FileCheck %s
+
+define i64 @f(i64* %p1, i64* %p2) {
+top:
+ ; check that the tbaa is preserved
+ ; CHECK-LABEL: @f(
+ ; CHECK: %v1 = load i64, i64* %p1, align 8, !tbaa !0
+ ; CHECK: store i64 %v1, i64* %p2, align 8
+ ; CHECK: ret i64 %v1
+ %v1 = load i64, i64* %p1, align 8, !tbaa !0
+ store i64 %v1, i64* %p2, align 8
+ %v2 = load i64, i64* %p2, align 8
+ ret i64 %v2
+}
+
+!0 = !{!1, !1, i64 0}
+!1 = !{!"load_tbaa"}
diff --git a/test/Transforms/InstCombine/trunc.ll b/test/Transforms/InstCombine/trunc.ll
index 38f6b2804d63..2019b3a6df47 100644
--- a/test/Transforms/InstCombine/trunc.ll
+++ b/test/Transforms/InstCombine/trunc.ll
@@ -160,3 +160,24 @@ define i32 @trunc_bitcast3(<4 x i32> %v) {
; CHECK-NEXT: ret i32 %ext
}
+; CHECK-LABEL: @trunc_shl_infloop(
+; CHECK: %tmp = lshr i64 %arg, 1
+; CHECK: %tmp21 = shl i64 %tmp, 2
+; CHECK: %tmp2 = trunc i64 %tmp21 to i32
+; CHECK: icmp sgt i32 %tmp2, 0
+define void @trunc_shl_infloop(i64 %arg) {
+bb:
+ %tmp = lshr i64 %arg, 1
+ %tmp1 = trunc i64 %tmp to i32
+ %tmp2 = shl i32 %tmp1, 2
+ %tmp3 = icmp sgt i32 %tmp2, 0
+ br i1 %tmp3, label %bb2, label %bb1
+
+bb1:
+ %tmp5 = sub i32 0, %tmp1
+ %tmp6 = sub i32 %tmp5, 1
+ unreachable
+
+bb2:
+ unreachable
+}
diff --git a/test/Transforms/InstCombine/unpack-fca.ll b/test/Transforms/InstCombine/unpack-fca.ll
index 435983924b77..47e747ccc468 100644
--- a/test/Transforms/InstCombine/unpack-fca.ll
+++ b/test/Transforms/InstCombine/unpack-fca.ll
@@ -58,6 +58,27 @@ define void @storeStructOfArrayOfA({ [1 x %A] }* %saa.ptr) {
ret void
}
+define void @storeArrayOfB([2 x %B]* %ab.ptr, [2 x %B] %ab) {
+; CHECK-LABEL: storeArrayOfB
+; CHECK-NEXT: [[EVB0:%[a-z0-9\.]+]] = extractvalue [2 x %B] %ab, 0
+; CHECK-NEXT: [[GEP0:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[EV0:%[a-z0-9\.]+]] = extractvalue %B [[EVB0]], 0
+; CHECK-NEXT: store i8* [[EV0]], i8** [[GEP0]], align 8
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 1
+; CHECK-NEXT: [[EV1:%[a-z0-9\.]+]] = extractvalue %B [[EVB0]], 1
+; CHECK-NEXT: store i64 [[EV1]], i64* [[GEP1]], align 8
+; CHECK-NEXT: [[EVB1:%[a-z0-9\.]+]] = extractvalue [2 x %B] %ab, 1
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 0
+; CHECK-NEXT: [[EV2:%[a-z0-9\.]+]] = extractvalue %B [[EVB1]], 0
+; CHECK-NEXT: store i8* [[EV2]], i8** [[GEP2]], align 8
+; CHECK-NEXT: [[GEP3:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 1
+; CHECK-NEXT: [[EV3:%[a-z0-9\.]+]] = extractvalue %B [[EVB1]], 1
+; CHECK-NEXT: store i64 [[EV3]], i64* [[GEP3]], align 8
+; CHECK-NEXT: ret void
+ store [2 x %B] %ab, [2 x %B]* %ab.ptr, align 8
+ ret void
+}
+
define %A @loadA(%A* %a.ptr) {
; CHECK-LABEL: loadA
; CHECK-NEXT: [[GEP:%[a-z0-9\.]+]] = getelementptr inbounds %A, %A* %a.ptr, i64 0, i32 0
@@ -137,6 +158,27 @@ define %B @structB(%B* %b.ptr) {
ret %B %1
}
+define [2 x %B] @loadArrayOfB([2 x %B]* %ab.ptr) {
+; CHECK-LABEL: loadArrayOfB
+; CHECK-NEXT: [[GEP1:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 0
+; CHECK-NEXT: [[LOAD1:%[a-z0-9\.]+]] = load i8*, i8** [[GEP1]], align 8
+; CHECK-NEXT: [[IV1:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD1]], 0
+; CHECK-NEXT: [[GEP2:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 0, i32 1
+; CHECK-NEXT: [[LOAD2:%[a-z0-9\.]+]] = load i64, i64* [[GEP2]], align 8
+; CHECK-NEXT: [[IV2:%[a-z0-9\.]+]] = insertvalue %B [[IV1]], i64 [[LOAD2]], 1
+; CHECK-NEXT: [[IV3:%[a-z0-9\.]+]] = insertvalue [2 x %B] undef, %B [[IV2]], 0
+; CHECK-NEXT: [[GEP3:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 0
+; CHECK-NEXT: [[LOAD3:%[a-z0-9\.]+]] = load i8*, i8** [[GEP3]], align 8
+; CHECK-NEXT: [[IV4:%[a-z0-9\.]+]] = insertvalue %B undef, i8* [[LOAD3]], 0
+; CHECK-NEXT: [[GEP4:%[a-z0-9\.]+]] = getelementptr inbounds [2 x %B], [2 x %B]* %ab.ptr, i64 0, i64 1, i32 1
+; CHECK-NEXT: [[LOAD4:%[a-z0-9\.]+]] = load i64, i64* [[GEP4]], align 8
+; CHECK-NEXT: [[IV5:%[a-z0-9\.]+]] = insertvalue %B [[IV4]], i64 [[LOAD4]], 1
+; CHECK-NEXT: [[IV6:%[a-z0-9\.]+]] = insertvalue [2 x %B] [[IV3]], %B [[IV5]], 1
+; CHECK-NEXT: ret [2 x %B] [[IV6]]
+ %1 = load [2 x %B], [2 x %B]* %ab.ptr, align 8
+ ret [2 x %B] %1
+}
+
%struct.S = type <{ i8, %struct.T }>
%struct.T = type { i32, i32 }
@@ -151,3 +193,30 @@ define i32 @packed_alignment(%struct.S* dereferenceable(9) %s) {
%v = extractvalue %struct.T %tv, 1
ret i32 %v
}
+
+%struct.U = type {i8, i8, i8, i8, i8, i8, i8, i8, i64}
+
+define void @check_alignment(%struct.U* %u, %struct.U* %v) {
+; CHECK-LABEL: check_alignment
+; CHECK: load i8, i8* {{.*}}, align 8
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i8, i8* {{.*}}, align 2
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i8, i8* {{.*}}, align 4
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i8, i8* {{.*}}, align 2
+; CHECK: load i8, i8* {{.*}}, align 1
+; CHECK: load i64, i64* {{.*}}, align 8
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 8
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 2
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 4
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 2
+; CHECK: store i8 {{.*}}, i8* {{.*}}, align 1
+; CHECK: store i64 {{.*}}, i64* {{.*}}, align 8
+ %1 = load %struct.U, %struct.U* %u
+ store %struct.U %1, %struct.U* %v
+ ret void
+}
diff --git a/test/Transforms/InstCombine/urem.ll b/test/Transforms/InstCombine/urem.ll
index 51084224a734..32aa033ce418 100644
--- a/test/Transforms/InstCombine/urem.ll
+++ b/test/Transforms/InstCombine/urem.ll
@@ -1,8 +1,13 @@
-; RUN: opt < %s -instcombine -S | grep urem
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
define i64 @rem_unsigned(i64 %x1, i64 %y2) {
- %r = udiv i64 %x1, %y2
- %r7 = mul i64 %r, %y2
- %r8 = sub i64 %x1, %r7
- ret i64 %r8
+; CHECK-LABEL: @rem_unsigned(
+; CHECK-NEXT: [[R:%.*]] = urem i64 %x1, %y2
+; CHECK-NEXT: ret i64 [[R]]
+;
+ %r = udiv i64 %x1, %y2
+ %r7 = mul i64 %r, %y2
+ %r8 = sub i64 %x1, %r7
+ ret i64 %r8
}
diff --git a/test/Transforms/InstCombine/vararg.ll b/test/Transforms/InstCombine/vararg.ll
new file mode 100644
index 000000000000..263a7425a075
--- /dev/null
+++ b/test/Transforms/InstCombine/vararg.ll
@@ -0,0 +1,30 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+%struct.__va_list = type { i8*, i8*, i8*, i32, i32 }
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @llvm.va_start(i8*)
+declare void @llvm.va_end(i8*)
+declare void @llvm.va_copy(i8*, i8*)
+
+define i32 @func(i8* nocapture readnone %fmt, ...) {
+; CHECK-LABEL: @func(
+; CHECK: entry:
+; CHECK-NEXT: ret i32 0
+entry:
+ %va0 = alloca %struct.__va_list, align 8
+ %va1 = alloca %struct.__va_list, align 8
+ %0 = bitcast %struct.__va_list* %va0 to i8*
+ %1 = bitcast %struct.__va_list* %va1 to i8*
+ call void @llvm.lifetime.start(i64 32, i8* %0)
+ call void @llvm.va_start(i8* %0)
+ call void @llvm.lifetime.start(i64 32, i8* %1)
+ call void @llvm.va_copy(i8* %1, i8* %0)
+ call void @llvm.va_end(i8* %1)
+ call void @llvm.lifetime.end(i64 32, i8* %1)
+ call void @llvm.va_end(i8* %0)
+ call void @llvm.lifetime.end(i64 32, i8* %0)
+ ret i32 0
+}
+
diff --git a/test/Transforms/InstCombine/vec_demanded_elts.ll b/test/Transforms/InstCombine/vec_demanded_elts.ll
index 0b9663300c39..e744b59ec46e 100644
--- a/test/Transforms/InstCombine/vec_demanded_elts.ll
+++ b/test/Transforms/InstCombine/vec_demanded_elts.ll
@@ -194,66 +194,6 @@ define <4 x float> @test_select(float %f, float %g) {
ret <4 x float> %ret
}
-declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
-define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
-; CHECK-LABEL: @test_vpermilvar_ps(
-; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
- %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
- ret <4 x float> %a
-}
-
-declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
-define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
-; CHECK-LABEL: @test_vpermilvar_ps_256(
-; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
- %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
- ret <8 x float> %a
-}
-
-declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
-define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
-; CHECK-LABEL: @test_vpermilvar_pd(
-; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
- %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
- ret <2 x double> %a
-}
-
-declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
-define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
-; CHECK-LABEL: @test_vpermilvar_pd_256(
-; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
- %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
- ret <4 x double> %a
-}
-
-define <4 x float> @test_vpermilvar_ps_zero(<4 x float> %v) {
-; CHECK-LABEL: @test_vpermilvar_ps_zero(
-; CHECK: shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
- %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
- ret <4 x float> %a
-}
-
-define <8 x float> @test_vpermilvar_ps_256_zero(<8 x float> %v) {
-; CHECK-LABEL: @test_vpermilvar_ps_256_zero(
-; CHECK: shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
- %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
- ret <8 x float> %a
-}
-
-define <2 x double> @test_vpermilvar_pd_zero(<2 x double> %v) {
-; CHECK-LABEL: @test_vpermilvar_pd_zero(
-; CHECK: shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
- %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
- ret <2 x double> %a
-}
-
-define <4 x double> @test_vpermilvar_pd_256_zero(<4 x double> %v) {
-; CHECK-LABEL: @test_vpermilvar_pd_256_zero(
-; CHECK: shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
- %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
- ret <4 x double> %a
-}
-
define <2 x i64> @PR24922(<2 x i64> %v) {
; CHECK-LABEL: @PR24922
; CHECK: select <2 x i1>
diff --git a/test/Transforms/InstCombine/vec_phi_extract.ll b/test/Transforms/InstCombine/vec_phi_extract.ll
index 1079dc9e7198..9beb98cb1af8 100644
--- a/test/Transforms/InstCombine/vec_phi_extract.ll
+++ b/test/Transforms/InstCombine/vec_phi_extract.ll
@@ -1,8 +1,9 @@
; RUN: opt < %s -instcombine -S | FileCheck %s
define void @f(i64 %val, i32 %limit, i32 *%ptr) {
-;CHECK: %0 = trunc i64
-;CHECK: %1 = phi i32
+; CHECK-LABEL: @f
+; CHECK: %0 = trunc i64 %val to i32
+; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
entry:
%tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
%vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
@@ -25,18 +26,72 @@ ret:
ret void
}
+define void @copy(i64 %val, i32 %limit, i32 *%ptr) {
+; CHECK-LABEL: @copy
+; CHECK: %0 = trunc i64 %val to i32
+; CHECK: %1 = phi i32 [ %0, %entry ], [ {{.*}}, %loop ]
+entry:
+ %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
+ %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
+ %0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+ %1 = trunc <16 x i64> %0 to <16 x i32>
+ br label %loop
+
+loop:
+ %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+ %elt = extractelement <16 x i32> %2, i32 0
+ %eltcopy = extractelement <16 x i32> %2, i32 0
+ %end = icmp ult i32 %elt, %limit
+ %3 = add i32 10, %eltcopy
+ %4 = sext i32 %elt to i64
+ %5 = getelementptr i32, i32* %ptr, i64 %4
+ store i32 %3, i32* %5
+ %inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ br i1 %end, label %loop, label %ret
+
+ret:
+ ret void
+}
+
+define void @nocopy(i64 %val, i32 %limit, i32 *%ptr) {
+; CHECK-LABEL: @nocopy
+; CHECK-NOT: phi i32
+; CHECK: phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+entry:
+ %tempvector = insertelement <16 x i64> undef, i64 %val, i32 0
+ %vector = shufflevector <16 x i64> %tempvector, <16 x i64> undef, <16 x i32> zeroinitializer
+ %0 = add <16 x i64> %vector, <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>
+ %1 = trunc <16 x i64> %0 to <16 x i32>
+ br label %loop
+
+loop:
+ %2 = phi <16 x i32> [ %1, %entry ], [ %inc, %loop ]
+ %elt = extractelement <16 x i32> %2, i32 0
+ %eltcopy = extractelement <16 x i32> %2, i32 1
+ %end = icmp ult i32 %elt, %limit
+ %3 = add i32 10, %eltcopy
+ %4 = sext i32 %elt to i64
+ %5 = getelementptr i32, i32* %ptr, i64 %4
+ store i32 %3, i32* %5
+ %inc = add <16 x i32> %2, <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+ br i1 %end, label %loop, label %ret
+
+ret:
+ ret void
+}
+
define i1 @g(<3 x i32> %input_2) {
-; CHECK: extractelement
+; CHECK-LABEL: @g
+; CHECK: extractelement <3 x i32> %input_2, i32 0
entry:
br label %for.cond
for.cond:
-; CHECK: phi i32
%input_2.addr.0 = phi <3 x i32> [ %input_2, %entry ], [ %div45, %for.body ]
%input_1.addr.1 = phi <3 x i32> [ undef, %entry ], [ %dec43, %for.body ]
br i1 undef, label %for.end, label %for.body
-; CHECK: extractelement
+; CHECK: extractelement <3 x i32> %{{.*}}, i32 0
for.body:
%dec43 = add <3 x i32> %input_1.addr.1, <i32 -1, i32 -1, i32 -1>
%sub44 = sub <3 x i32> <i32 -1, i32 -1, i32 -1>, %dec43
diff --git a/test/Transforms/InstCombine/volatile_store.ll b/test/Transforms/InstCombine/volatile_store.ll
index 7377b6815e29..c2f63d6659f0 100644
--- a/test/Transforms/InstCombine/volatile_store.ll
+++ b/test/Transforms/InstCombine/volatile_store.ll
@@ -1,14 +1,22 @@
-; RUN: opt < %s -instcombine -S | grep "store volatile"
-; RUN: opt < %s -instcombine -S | grep "load volatile"
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-@x = weak global i32 0 ; <i32*> [#uses=2]
+@x = weak global i32 0
define void @self_assign_1() {
+; CHECK-LABEL: @self_assign_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP:%.*]] = load volatile i32, i32* @x, align 4
+; CHECK-NEXT: store volatile i32 [[TMP]], i32* @x, align 4
+; CHECK-NEXT: br label %return
+; CHECK: return:
+; CHECK-NEXT: ret void
+;
entry:
- %tmp = load volatile i32, i32* @x ; <i32> [#uses=1]
- store volatile i32 %tmp, i32* @x
- br label %return
+ %tmp = load volatile i32, i32* @x
+ store volatile i32 %tmp, i32* @x
+ br label %return
-return: ; preds = %entry
- ret void
+return:
+ ret void
}
diff --git a/test/Transforms/InstCombine/x86-avx.ll b/test/Transforms/InstCombine/x86-avx.ll
new file mode 100644
index 000000000000..12dc22513165
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-avx.ll
@@ -0,0 +1,158 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps(
+; CHECK-NEXT: ret <4 x float> %v
+;
+ %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>)
+ ret <4 x float> %a
+}
+
+define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_ps_256(
+; CHECK-NEXT: ret <8 x float> %v
+;
+ %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+ ret <8 x float> %a
+}
+
+define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd(
+; CHECK-NEXT: ret <2 x double> %v
+;
+ %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>)
+ ret <2 x double> %a
+}
+
+define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @identity_test_vpermilvar_pd_256(
+; CHECK-NEXT: ret <4 x double> %v
+;
+ %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>)
+ ret <4 x double> %a
+}
+
+; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_zero(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> zeroinitializer
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
+ ret <4 x float> %a
+}
+
+define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
+; CHECK-NEXT: ret <8 x float> [[TMP1]]
+;
+ %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
+ ret <8 x float> %a
+}
+
+define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_zero(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> zeroinitializer
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
+ ret <2 x double> %a
+}
+
+define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) {
+; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
+; CHECK-NEXT: ret <4 x double> [[TMP1]]
+;
+ %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
+ ret <4 x double> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles.
+
+define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
+ ret <4 x float> %a
+}
+
+define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @test_vpermilvar_ps_256(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT: ret <8 x float> [[TMP1]]
+;
+ %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x float> %a
+}
+
+define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 1, i32 0>
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
+ ret <2 x double> %a
+}
+
+define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @test_vpermilvar_pd_256(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
+; CHECK-NEXT: ret <4 x double> [[TMP1]]
+;
+ %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
+ ret <4 x double> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles with undef mask elements.
+
+define <4 x float> @undef_test_vpermilvar_ps(<4 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> %v, <4 x float> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>)
+ ret <4 x float> %a
+}
+
+define <8 x float> @undef_test_vpermilvar_ps_256(<8 x float> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_ps_256(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %v, <8 x float> undef, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
+; CHECK-NEXT: ret <8 x float> [[TMP1]]
+;
+ %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 undef, i32 6, i32 5, i32 undef, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x float> %a
+}
+
+define <2 x double> @undef_test_vpermilvar_pd(<2 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 undef, i32 0>
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 undef, i64 0>)
+ ret <2 x double> %a
+}
+
+define <4 x double> @undef_test_vpermilvar_pd_256(<4 x double> %v) {
+; CHECK-LABEL: @undef_test_vpermilvar_pd_256(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
+; CHECK-NEXT: ret <4 x double> [[TMP1]]
+;
+ %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 undef, i64 1, i64 2, i64 undef>)
+ ret <4 x double> %a
+}
+
+declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
+declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
+
+declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
+declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
diff --git a/test/Transforms/InstCombine/x86-avx2.ll b/test/Transforms/InstCombine/x86-avx2.ll
new file mode 100644
index 000000000000..4c13b4c6ae74
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-avx2.ll
@@ -0,0 +1,85 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+; Verify that instcombine is able to fold identity shuffles.
+
+define <8 x i32> @identity_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @identity_test_vpermd(
+; CHECK-NEXT: ret <8 x i32> %a0
+;
+ %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+ ret <8 x i32> %a
+}
+
+define <8 x float> @identity_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @identity_test_vpermps(
+; CHECK-NEXT: ret <8 x float> %a0
+;
+ %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
+ ret <8 x float> %a
+}
+
+; Instcombine should be able to fold the following shuffle to a builtin shufflevector
+; with a shuffle mask of all zeroes.
+
+define <8 x i32> @zero_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @zero_test_vpermd(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
+ ret <8 x i32> %a
+}
+
+define <8 x float> @zero_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @zero_test_vpermps(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> zeroinitializer
+; CHECK-NEXT: ret <8 x float> [[TMP1]]
+;
+ %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
+ ret <8 x float> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles.
+
+define <8 x i32> @shuffle_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @shuffle_test_vpermd(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x i32> %a
+}
+
+define <8 x float> @shuffle_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @shuffle_test_vpermps(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: ret <8 x float> [[TMP1]]
+;
+ %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x float> %a
+}
+
+; Verify that instcombine is able to fold constant shuffles with undef mask elements.
+
+define <8 x i32> @undef_test_vpermd(<8 x i32> %a0) {
+; CHECK-LABEL: @undef_test_vpermd(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> %a0, <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %a = tail call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x i32> %a
+}
+
+define <8 x float> @undef_test_vpermps(<8 x float> %a0) {
+; CHECK-LABEL: @undef_test_vpermps(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> %a0, <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
+; CHECK-NEXT: ret <8 x float> [[TMP1]]
+;
+ %a = tail call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
+ ret <8 x float> %a
+}
+
+declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
+declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
diff --git a/test/Transforms/InstCombine/x86-f16c.ll b/test/Transforms/InstCombine/x86-f16c.ll
index e10b339907e3..6b5b6cb26eda 100644
--- a/test/Transforms/InstCombine/x86-f16c.ll
+++ b/test/Transforms/InstCombine/x86-f16c.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
declare <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16>)
@@ -9,9 +10,10 @@ declare <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16>)
; Only bottom 4 elements required.
define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_128
-; CHECK-NEXT: %1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
-; CHECK-NEXT: ret <4 x float> %1
+; CHECK-LABEL: @demand_vcvtph2ps_128(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %A)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
%1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> %1)
ret <4 x float> %2
@@ -19,10 +21,11 @@ define <4 x float> @demand_vcvtph2ps_128(<8 x i16> %A) {
; All 8 elements required.
define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
-; CHECK-LABEL: @demand_vcvtph2ps_256
-; CHECK-NEXT: %1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: %2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
-; CHECK-NEXT: ret <8 x float> %2
+; CHECK-LABEL: @demand_vcvtph2ps_256(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> [[TMP1]])
+; CHECK-NEXT: ret <8 x float> [[TMP2]]
+;
%1 = shufflevector <8 x i16> %A, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> %1)
ret <8 x float> %2
@@ -33,29 +36,33 @@ define <8 x float> @demand_vcvtph2ps_256(<8 x i16> %A) {
;
define <4 x float> @fold_vcvtph2ps_128() {
-; CHECK-LABEL: @fold_vcvtph2ps_128
-; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+; CHECK-LABEL: @fold_vcvtph2ps_128(
+; CHECK-NEXT: ret <4 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00>
+;
%1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
ret <4 x float> %1
}
define <8 x float> @fold_vcvtph2ps_256() {
-; CHECK-LABEL: @fold_vcvtph2ps_256
-; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+; CHECK-LABEL: @fold_vcvtph2ps_256(
+; CHECK-NEXT: ret <8 x float> <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float -0.000000e+00, float 2.000000e+00, float 6.550400e+04, float -1.000000e+00, float -2.000000e+00>
+;
%1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 14336, i16 15360, i16 32768, i16 16384, i16 31743, i16 48128, i16 49152>)
ret <8 x float> %1
}
define <4 x float> @fold_vcvtph2ps_128_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_128_zero
-; CHECK-NEXT: ret <4 x float> zeroinitializer
+; CHECK-LABEL: @fold_vcvtph2ps_128_zero(
+; CHECK-NEXT: ret <4 x float> zeroinitializer
+;
%1 = tail call <4 x float> @llvm.x86.vcvtph2ps.128(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
ret <4 x float> %1
}
define <8 x float> @fold_vcvtph2ps_256_zero() {
-; CHECK-LABEL: @fold_vcvtph2ps_256_zero
-; CHECK-NEXT: ret <8 x float> zeroinitializer
+; CHECK-LABEL: @fold_vcvtph2ps_256_zero(
+; CHECK-NEXT: ret <8 x float> zeroinitializer
+;
%1 = tail call <8 x float> @llvm.x86.vcvtph2ps.256(<8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0, i16 0>)
ret <8 x float> %1
}
diff --git a/test/Transforms/InstCombine/x86-masked-memops.ll b/test/Transforms/InstCombine/x86-masked-memops.ll
new file mode 100644
index 000000000000..717a2477ef1a
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-masked-memops.ll
@@ -0,0 +1,302 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+;; MASKED LOADS
+
+; If the mask isn't constant, do nothing.
+
+define <4 x float> @mload(i8* %f, <4 x i32> %mask) {
+ %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
+ ret <4 x float> %ld
+
+; CHECK-LABEL: @mload(
+; CHECK-NEXT: %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> %mask)
+; CHECK-NEXT: ret <4 x float> %ld
+}
+
+; Zero mask returns a zero vector.
+
+define <4 x float> @mload_zeros(i8* %f) {
+ %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> zeroinitializer)
+ ret <4 x float> %ld
+
+; CHECK-LABEL: @mload_zeros(
+; CHECK-NEXT: ret <4 x float> zeroinitializer
+}
+
+; Only the sign bit matters.
+
+define <4 x float> @mload_fake_ones(i8* %f) {
+ %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>)
+ ret <4 x float> %ld
+
+; CHECK-LABEL: @mload_fake_ones(
+; CHECK-NEXT: ret <4 x float> zeroinitializer
+}
+
+; All mask bits are set, so this is just a vector load.
+
+define <4 x float> @mload_real_ones(i8* %f) {
+ %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 2147483648>)
+ ret <4 x float> %ld
+
+; CHECK-LABEL: @mload_real_ones(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>*
+; CHECK-NEXT: %unmaskedload = load <4 x float>, <4 x float>* %castvec
+; CHECK-NEXT: ret <4 x float> %unmaskedload
+}
+
+; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
+
+define <4 x float> @mload_one_one(i8* %f) {
+ %ld = tail call <4 x float> @llvm.x86.avx.maskload.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
+ ret <4 x float> %ld
+
+; CHECK-LABEL: @mload_one_one(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>*
+; CHECK-NEXT: %1 = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x float> zeroinitializer)
+; CHECK-NEXT: ret <4 x float> %1
+}
+
+; Try doubles.
+
+define <2 x double> @mload_one_one_double(i8* %f) {
+ %ld = tail call <2 x double> @llvm.x86.avx.maskload.pd(i8* %f, <2 x i64> <i64 -1, i64 0>)
+ ret <2 x double> %ld
+
+; CHECK-LABEL: @mload_one_one_double(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>*
+; CHECK-NEXT: %1 = call <2 x double> @llvm.masked.load.v2f64.p0v2f64(<2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x double> zeroinitializer)
+; CHECK-NEXT: ret <2 x double> %1
+}
+
+; Try 256-bit FP ops.
+
+define <8 x float> @mload_v8f32(i8* %f) {
+ %ld = tail call <8 x float> @llvm.x86.avx.maskload.ps.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
+ ret <8 x float> %ld
+
+; CHECK-LABEL: @mload_v8f32(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>*
+; CHECK-NEXT: %1 = call <8 x float> @llvm.masked.load.v8f32.p0v8f32(<8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x float> zeroinitializer)
+; CHECK-NEXT: ret <8 x float> %1
+}
+
+define <4 x double> @mload_v4f64(i8* %f) {
+ %ld = tail call <4 x double> @llvm.x86.avx.maskload.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
+ ret <4 x double> %ld
+
+; CHECK-LABEL: @mload_v4f64(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>*
+; CHECK-NEXT: %1 = call <4 x double> @llvm.masked.load.v4f64.p0v4f64(<4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x double> zeroinitializer)
+; CHECK-NEXT: ret <4 x double> %1
+}
+
+; Try the AVX2 variants.
+
+define <4 x i32> @mload_v4i32(i8* %f) {
+ %ld = tail call <4 x i32> @llvm.x86.avx2.maskload.d(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>)
+ ret <4 x i32> %ld
+
+; CHECK-LABEL: @mload_v4i32(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>*
+; CHECK-NEXT: %1 = call <4 x i32> @llvm.masked.load.v4i32.p0v4i32(<4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>, <4 x i32> zeroinitializer)
+; CHECK-NEXT: ret <4 x i32> %1
+}
+
+define <2 x i64> @mload_v2i64(i8* %f) {
+ %ld = tail call <2 x i64> @llvm.x86.avx2.maskload.q(i8* %f, <2 x i64> <i64 -1, i64 0>)
+ ret <2 x i64> %ld
+
+; CHECK-LABEL: @mload_v2i64(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>*
+; CHECK-NEXT: %1 = call <2 x i64> @llvm.masked.load.v2i64.p0v2i64(<2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>, <2 x i64> zeroinitializer)
+; CHECK-NEXT: ret <2 x i64> %1
+}
+
+define <8 x i32> @mload_v8i32(i8* %f) {
+ %ld = tail call <8 x i32> @llvm.x86.avx2.maskload.d.256(i8* %f, <8 x i32> <i32 0, i32 0, i32 0, i32 -1, i32 0, i32 0, i32 0, i32 0>)
+ ret <8 x i32> %ld
+
+; CHECK-LABEL: @mload_v8i32(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>*
+; CHECK-NEXT: %1 = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false>, <8 x i32> zeroinitializer)
+; CHECK-NEXT: ret <8 x i32> %1
+}
+
+define <4 x i64> @mload_v4i64(i8* %f) {
+ %ld = tail call <4 x i64> @llvm.x86.avx2.maskload.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 0, i64 0>)
+ ret <4 x i64> %ld
+
+; CHECK-LABEL: @mload_v4i64(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>*
+; CHECK-NEXT: %1 = call <4 x i64> @llvm.masked.load.v4i64.p0v4i64(<4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>, <4 x i64> zeroinitializer)
+; CHECK-NEXT: ret <4 x i64> %1
+}
+
+
+;; MASKED STORES
+
+; If the mask isn't constant, do nothing.
+
+define void @mstore(i8* %f, <4 x i32> %mask, <4 x float> %v) {
+ tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
+ ret void
+
+; CHECK-LABEL: @mstore(
+; CHECK-NEXT: tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> %mask, <4 x float> %v)
+; CHECK-NEXT: ret void
+}
+
+; Zero mask is a nop.
+
+define void @mstore_zeros(i8* %f, <4 x float> %v) {
+ tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> zeroinitializer, <4 x float> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_zeros(
+; CHECK-NEXT: ret void
+}
+
+; Only the sign bit matters.
+
+define void @mstore_fake_ones(i8* %f, <4 x float> %v) {
+ tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 1, i32 2, i32 3, i32 2147483647>, <4 x float> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_fake_ones(
+; CHECK-NEXT: ret void
+}
+
+; All mask bits are set, so this is just a vector store.
+
+define void @mstore_real_ones(i8* %f, <4 x float> %v) {
+ tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 -1, i32 -2, i32 -3, i32 -2147483648>, <4 x float> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_real_ones(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>*
+; CHECK-NEXT: store <4 x float> %v, <4 x float>* %castvec
+; CHECK-NEXT: ret void
+}
+
+; It's a constant mask, so convert to an LLVM intrinsic. The backend should optimize further.
+
+define void @mstore_one_one(i8* %f, <4 x float> %v) {
+ tail call void @llvm.x86.avx.maskstore.ps(i8* %f, <4 x i32> <i32 0, i32 0, i32 0, i32 -1>, <4 x float> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_one_one(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x float>*
+; CHECK-NEXT: call void @llvm.masked.store.v4f32.p0v4f32(<4 x float> %v, <4 x float>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 false, i1 true>)
+; CHECK-NEXT: ret void
+}
+
+; Try doubles.
+
+define void @mstore_one_one_double(i8* %f, <2 x double> %v) {
+ tail call void @llvm.x86.avx.maskstore.pd(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x double> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_one_one_double(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x double>*
+; CHECK-NEXT: call void @llvm.masked.store.v2f64.p0v2f64(<2 x double> %v, <2 x double>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT: ret void
+}
+
+; Try 256-bit FP ops.
+
+define void @mstore_v8f32(i8* %f, <8 x float> %v) {
+ tail call void @llvm.x86.avx.maskstore.ps.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x float> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_v8f32(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x float>*
+; CHECK-NEXT: call void @llvm.masked.store.v8f32.p0v8f32(<8 x float> %v, <8 x float>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: ret void
+}
+
+define void @mstore_v4f64(i8* %f, <4 x double> %v) {
+ tail call void @llvm.x86.avx.maskstore.pd.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x double> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_v4f64(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x double>*
+; CHECK-NEXT: call void @llvm.masked.store.v4f64.p0v4f64(<4 x double> %v, <4 x double>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+; CHECK-NEXT: ret void
+}
+
+; Try the AVX2 variants.
+
+define void @mstore_v4i32(i8* %f, <4 x i32> %v) {
+ tail call void @llvm.x86.avx2.maskstore.d(i8* %f, <4 x i32> <i32 0, i32 1, i32 -1, i32 -2>, <4 x i32> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_v4i32(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i32>*
+; CHECK-NEXT: call void @llvm.masked.store.v4i32.p0v4i32(<4 x i32> %v, <4 x i32>* %castvec, i32 1, <4 x i1> <i1 false, i1 false, i1 true, i1 true>)
+; CHECK-NEXT: ret void
+}
+
+define void @mstore_v2i64(i8* %f, <2 x i64> %v) {
+ tail call void @llvm.x86.avx2.maskstore.q(i8* %f, <2 x i64> <i64 -1, i64 0>, <2 x i64> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_v2i64(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <2 x i64>*
+; CHECK-NEXT: call void @llvm.masked.store.v2i64.p0v2i64(<2 x i64> %v, <2 x i64>* %castvec, i32 1, <2 x i1> <i1 true, i1 false>)
+; CHECK-NEXT: ret void
+}
+
+define void @mstore_v8i32(i8* %f, <8 x i32> %v) {
+ tail call void @llvm.x86.avx2.maskstore.d.256(i8* %f, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 -1, i32 -2, i32 -3, i32 -4>, <8 x i32> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_v8i32(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <8 x i32>*
+; CHECK-NEXT: call void @llvm.masked.store.v8i32.p0v8i32(<8 x i32> %v, <8 x i32>* %castvec, i32 1, <8 x i1> <i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true, i1 true>)
+; CHECK-NEXT: ret void
+}
+
+define void @mstore_v4i64(i8* %f, <4 x i64> %v) {
+ tail call void @llvm.x86.avx2.maskstore.q.256(i8* %f, <4 x i64> <i64 -1, i64 0, i64 1, i64 2>, <4 x i64> %v)
+ ret void
+
+; CHECK-LABEL: @mstore_v4i64(
+; CHECK-NEXT: %castvec = bitcast i8* %f to <4 x i64>*
+; CHECK-NEXT: call void @llvm.masked.store.v4i64.p0v4i64(<4 x i64> %v, <4 x i64>* %castvec, i32 1, <4 x i1> <i1 true, i1 false, i1 false, i1 false>)
+; CHECK-NEXT: ret void
+}
+
+; The original SSE2 masked store variant.
+
+define void @mstore_v16i8_sse2_zeros(<16 x i8> %d, i8* %p) {
+ tail call void @llvm.x86.sse2.maskmov.dqu(<16 x i8> %d, <16 x i8> zeroinitializer, i8* %p)
+ ret void
+
+; CHECK-LABEL: @mstore_v16i8_sse2_zeros(
+; CHECK-NEXT: ret void
+}
+
+
+declare <4 x float> @llvm.x86.avx.maskload.ps(i8*, <4 x i32>)
+declare <2 x double> @llvm.x86.avx.maskload.pd(i8*, <2 x i64>)
+declare <8 x float> @llvm.x86.avx.maskload.ps.256(i8*, <8 x i32>)
+declare <4 x double> @llvm.x86.avx.maskload.pd.256(i8*, <4 x i64>)
+
+declare <4 x i32> @llvm.x86.avx2.maskload.d(i8*, <4 x i32>)
+declare <2 x i64> @llvm.x86.avx2.maskload.q(i8*, <2 x i64>)
+declare <8 x i32> @llvm.x86.avx2.maskload.d.256(i8*, <8 x i32>)
+declare <4 x i64> @llvm.x86.avx2.maskload.q.256(i8*, <4 x i64>)
+
+declare void @llvm.x86.avx.maskstore.ps(i8*, <4 x i32>, <4 x float>)
+declare void @llvm.x86.avx.maskstore.pd(i8*, <2 x i64>, <2 x double>)
+declare void @llvm.x86.avx.maskstore.ps.256(i8*, <8 x i32>, <8 x float>)
+declare void @llvm.x86.avx.maskstore.pd.256(i8*, <4 x i64>, <4 x double>)
+
+declare void @llvm.x86.avx2.maskstore.d(i8*, <4 x i32>, <4 x i32>)
+declare void @llvm.x86.avx2.maskstore.q(i8*, <2 x i64>, <2 x i64>)
+declare void @llvm.x86.avx2.maskstore.d.256(i8*, <8 x i32>, <8 x i32>)
+declare void @llvm.x86.avx2.maskstore.q.256(i8*, <4 x i64>, <4 x i64>)
+
+declare void @llvm.x86.sse2.maskmov.dqu(<16 x i8>, <16 x i8>, i8*)
+
diff --git a/test/Transforms/InstCombine/x86-movmsk.ll b/test/Transforms/InstCombine/x86-movmsk.ll
new file mode 100644
index 000000000000..11acc1dbca84
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-movmsk.ll
@@ -0,0 +1,324 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+;
+; DemandedBits - MOVMSK zeros the upper bits of the result.
+;
+
+define i32 @test_upper_x86_mmx_pmovmskb(x86_mmx %a0) {
+; CHECK-LABEL: @test_upper_x86_mmx_pmovmskb(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+ %2 = and i32 %1, 255
+ ret i32 %2
+}
+
+define i32 @test_upper_x86_sse_movmsk_ps(<4 x float> %a0) {
+; CHECK-LABEL: @test_upper_x86_sse_movmsk_ps(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
+ %2 = and i32 %1, 15
+ ret i32 %2
+}
+
+define i32 @test_upper_x86_sse2_movmsk_pd(<2 x double> %a0) {
+; CHECK-LABEL: @test_upper_x86_sse2_movmsk_pd(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
+ %2 = and i32 %1, 3
+ ret i32 %2
+}
+
+define i32 @test_upper_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+; CHECK-LABEL: @test_upper_x86_sse2_pmovmskb_128(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
+ %2 = and i32 %1, 65535
+ ret i32 %2
+}
+
+define i32 @test_upper_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+; CHECK-LABEL: @test_upper_x86_avx_movmsk_ps_256(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
+ %2 = and i32 %1, 255
+ ret i32 %2
+}
+
+define i32 @test_upper_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+; CHECK-LABEL: @test_upper_x86_avx_movmsk_pd_256(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
+ %2 = and i32 %1, 15
+ ret i32 %2
+}
+
+; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
+
+;
+; DemandedBits - If we don't use the lower bits then we just return zero.
+;
+
+define i32 @test_lower_x86_mmx_pmovmskb(x86_mmx %a0) {
+; CHECK-LABEL: @test_lower_x86_mmx_pmovmskb(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %a0)
+ %2 = and i32 %1, -256
+ ret i32 %2
+}
+
+define i32 @test_lower_x86_sse_movmsk_ps(<4 x float> %a0) {
+; CHECK-LABEL: @test_lower_x86_sse_movmsk_ps(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %a0)
+ %2 = and i32 %1, -16
+ ret i32 %2
+}
+
+define i32 @test_lower_x86_sse2_movmsk_pd(<2 x double> %a0) {
+; CHECK-LABEL: @test_lower_x86_sse2_movmsk_pd(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> %a0)
+ %2 = and i32 %1, -4
+ ret i32 %2
+}
+
+define i32 @test_lower_x86_sse2_pmovmskb_128(<16 x i8> %a0) {
+; CHECK-LABEL: @test_lower_x86_sse2_pmovmskb_128(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> %a0)
+ %2 = and i32 %1, -65536
+ ret i32 %2
+}
+
+define i32 @test_lower_x86_avx_movmsk_ps_256(<8 x float> %a0) {
+; CHECK-LABEL: @test_lower_x86_avx_movmsk_ps_256(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %a0)
+ %2 = and i32 %1, -256
+ ret i32 %2
+}
+
+define i32 @test_lower_x86_avx_movmsk_pd_256(<4 x double> %a0) {
+; CHECK-LABEL: @test_lower_x86_avx_movmsk_pd_256(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> %a0)
+ %2 = and i32 %1, -16
+ ret i32 %2
+}
+
+; llvm.x86.avx2.pmovmskb uses the whole of the 32-bit register.
+
+;
+; Constant Folding (UNDEF -> ZERO)
+;
+
+define i32 @undef_x86_mmx_pmovmskb() {
+; CHECK-LABEL: @undef_x86_mmx_pmovmskb(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx undef)
+ ret i32 %1
+}
+
+define i32 @undef_x86_sse_movmsk_ps() {
+; CHECK-LABEL: @undef_x86_sse_movmsk_ps(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> undef)
+ ret i32 %1
+}
+
+define i32 @undef_x86_sse2_movmsk_pd() {
+; CHECK-LABEL: @undef_x86_sse2_movmsk_pd(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> undef)
+ ret i32 %1
+}
+
+define i32 @undef_x86_sse2_pmovmskb_128() {
+; CHECK-LABEL: @undef_x86_sse2_pmovmskb_128(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> undef)
+ ret i32 %1
+}
+
+define i32 @undef_x86_avx_movmsk_ps_256() {
+; CHECK-LABEL: @undef_x86_avx_movmsk_ps_256(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> undef)
+ ret i32 %1
+}
+
+define i32 @undef_x86_avx_movmsk_pd_256() {
+; CHECK-LABEL: @undef_x86_avx_movmsk_pd_256(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> undef)
+ ret i32 %1
+}
+
+define i32 @undef_x86_avx2_pmovmskb() {
+; CHECK-LABEL: @undef_x86_avx2_pmovmskb(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> undef)
+ ret i32 %1
+}
+
+;
+; Constant Folding (ZERO -> ZERO)
+;
+
+define i32 @zero_x86_mmx_pmovmskb() {
+; CHECK-LABEL: @zero_x86_mmx_pmovmskb(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<1 x i64> zeroinitializer to x86_mmx))
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = bitcast <1 x i64> zeroinitializer to x86_mmx
+ %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
+ ret i32 %2
+}
+
+define i32 @zero_x86_sse_movmsk_ps() {
+; CHECK-LABEL: @zero_x86_sse_movmsk_ps(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> zeroinitializer)
+ ret i32 %1
+}
+
+define i32 @zero_x86_sse2_movmsk_pd() {
+; CHECK-LABEL: @zero_x86_sse2_movmsk_pd(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> zeroinitializer)
+ ret i32 %1
+}
+
+define i32 @zero_x86_sse2_pmovmskb_128() {
+; CHECK-LABEL: @zero_x86_sse2_pmovmskb_128(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> zeroinitializer)
+ ret i32 %1
+}
+
+define i32 @zero_x86_avx_movmsk_ps_256() {
+; CHECK-LABEL: @zero_x86_avx_movmsk_ps_256(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> zeroinitializer)
+ ret i32 %1
+}
+
+define i32 @zero_x86_avx_movmsk_pd_256() {
+; CHECK-LABEL: @zero_x86_avx_movmsk_pd_256(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> zeroinitializer)
+ ret i32 %1
+}
+
+define i32 @zero_x86_avx2_pmovmskb() {
+; CHECK-LABEL: @zero_x86_avx2_pmovmskb(
+; CHECK-NEXT: ret i32 0
+;
+ %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> zeroinitializer)
+ ret i32 %1
+}
+
+;
+; Constant Folding
+;
+
+define i32 @fold_x86_mmx_pmovmskb() {
+; CHECK-LABEL: @fold_x86_mmx_pmovmskb(
+; CHECK-NEXT: [[TMP1:%.*]] = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx bitcast (<8 x i8> <i8 0, i8 -1, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 0> to x86_mmx))
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %1 = bitcast <8 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256> to x86_mmx
+ %2 = call i32 @llvm.x86.mmx.pmovmskb(x86_mmx %1)
+ ret i32 %2
+}
+
+define i32 @fold_x86_sse_movmsk_ps() {
+; CHECK-LABEL: @fold_x86_sse_movmsk_ps(
+; CHECK-NEXT: ret i32 10
+;
+ %1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> <float 1.0, float -1.0, float 100.0, float -200.0>)
+ ret i32 %1
+}
+
+define i32 @fold_x86_sse2_movmsk_pd() {
+; CHECK-LABEL: @fold_x86_sse2_movmsk_pd(
+; CHECK-NEXT: ret i32 2
+;
+ %1 = call i32 @llvm.x86.sse2.movmsk.pd(<2 x double> <double 1.0, double -1.0>)
+ ret i32 %1
+}
+
+define i32 @fold_x86_sse2_pmovmskb_128() {
+; CHECK-LABEL: @fold_x86_sse2_pmovmskb_128(
+; CHECK-NEXT: ret i32 5654
+;
+ %1 = call i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8> <i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
+ ret i32 %1
+}
+
+define i32 @fold_x86_avx_movmsk_ps_256() {
+; CHECK-LABEL: @fold_x86_avx_movmsk_ps_256(
+; CHECK-NEXT: ret i32 170
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> <float 1.0, float -1.0, float 100.0, float -200.0, float +0.0, float -0.0, float 100000.0, float -5000000.0>)
+ ret i32 %1
+}
+
+define i32 @fold_x86_avx_movmsk_pd_256() {
+; CHECK-LABEL: @fold_x86_avx_movmsk_pd_256(
+; CHECK-NEXT: ret i32 10
+;
+ %1 = call i32 @llvm.x86.avx.movmsk.pd.256(<4 x double> <double 1.0, double -1.0, double 100.0, double -200.0>)
+ ret i32 %1
+}
+
+define i32 @fold_x86_avx2_pmovmskb() {
+; CHECK-LABEL: @fold_x86_avx2_pmovmskb(
+; CHECK-NEXT: ret i32 370546176
+;
+ %1 = call i32 @llvm.x86.avx2.pmovmskb(<32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256, i8 0, i8 255, i8 -1, i8 127, i8 -127, i8 63, i8 64, i8 256>)
+ ret i32 %1
+}
+
+declare i32 @llvm.x86.mmx.pmovmskb(x86_mmx)
+
+declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>)
+declare i32 @llvm.x86.sse2.movmsk.pd(<2 x double>)
+declare i32 @llvm.x86.sse2.pmovmskb.128(<16 x i8>)
+
+declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>)
+declare i32 @llvm.x86.avx.movmsk.pd.256(<4 x double>)
+declare i32 @llvm.x86.avx2.pmovmskb(<32 x i8>)
diff --git a/test/Transforms/InstCombine/x86-pmovsx.ll b/test/Transforms/InstCombine/x86-pmovsx.ll
deleted file mode 100644
index 31bdc59b16a8..000000000000
--- a/test/Transforms/InstCombine/x86-pmovsx.ll
+++ /dev/null
@@ -1,136 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-declare <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16>) nounwind readnone
-
-declare <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8>) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8>) nounwind readnone
-declare <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8>) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32>) nounwind readnone
-declare <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16>) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16>) nounwind readnone
-
-;
-; Basic sign extension tests
-;
-
-define <4 x i32> @sse41_pmovsxbd(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovsxbd
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: sext <4 x i8> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
-
- %res = call <4 x i32> @llvm.x86.sse41.pmovsxbd(<16 x i8> %v)
- ret <4 x i32> %res
-}
-
-define <2 x i64> @sse41_pmovsxbq(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovsxbq
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: sext <2 x i8> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
-
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxbq(<16 x i8> %v)
- ret <2 x i64> %res
-}
-
-define <8 x i16> @sse41_pmovsxbw(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovsxbw
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: sext <8 x i8> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
-
- %res = call <8 x i16> @llvm.x86.sse41.pmovsxbw(<16 x i8> %v)
- ret <8 x i16> %res
-}
-
-define <2 x i64> @sse41_pmovsxdq(<4 x i32> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovsxdq
-; CHECK-NEXT: shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: sext <2 x i32> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
-
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxdq(<4 x i32> %v)
- ret <2 x i64> %res
-}
-
-define <4 x i32> @sse41_pmovsxwd(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovsxwd
-; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: sext <4 x i16> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
-
- %res = call <4 x i32> @llvm.x86.sse41.pmovsxwd(<8 x i16> %v)
- ret <4 x i32> %res
-}
-
-define <2 x i64> @sse41_pmovsxwq(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovsxwq
-; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: sext <2 x i16> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
-
- %res = call <2 x i64> @llvm.x86.sse41.pmovsxwq(<8 x i16> %v)
- ret <2 x i64> %res
-}
-
-define <8 x i32> @avx2_pmovsxbd(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovsxbd
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: sext <8 x i8> %1 to <8 x i32>
-; CHECK-NEXT: ret <8 x i32> %2
-
- %res = call <8 x i32> @llvm.x86.avx2.pmovsxbd(<16 x i8> %v)
- ret <8 x i32> %res
-}
-
-define <4 x i64> @avx2_pmovsxbq(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovsxbq
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: sext <4 x i8> %1 to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> %2
-
- %res = call <4 x i64> @llvm.x86.avx2.pmovsxbq(<16 x i8> %v)
- ret <4 x i64> %res
-}
-
-define <16 x i16> @avx2_pmovsxbw(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovsxbw
-; CHECK-NEXT: sext <16 x i8> %v to <16 x i16>
-; CHECK-NEXT: ret <16 x i16> %1
-
- %res = call <16 x i16> @llvm.x86.avx2.pmovsxbw(<16 x i8> %v)
- ret <16 x i16> %res
-}
-
-define <4 x i64> @avx2_pmovsxdq(<4 x i32> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovsxdq
-; CHECK-NEXT: sext <4 x i32> %v to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> %1
-
- %res = call <4 x i64> @llvm.x86.avx2.pmovsxdq(<4 x i32> %v)
- ret <4 x i64> %res
-}
-
-define <8 x i32> @avx2_pmovsxwd(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovsxwd
-; CHECK-NEXT: sext <8 x i16> %v to <8 x i32>
-; CHECK-NEXT: ret <8 x i32> %1
-
- %res = call <8 x i32> @llvm.x86.avx2.pmovsxwd(<8 x i16> %v)
- ret <8 x i32> %res
-}
-
-define <4 x i64> @avx2_pmovsxwq(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovsxwq
-; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: sext <4 x i16> %1 to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> %2
-
- %res = call <4 x i64> @llvm.x86.avx2.pmovsxwq(<8 x i16> %v)
- ret <4 x i64> %res
-}
diff --git a/test/Transforms/InstCombine/x86-pmovzx.ll b/test/Transforms/InstCombine/x86-pmovzx.ll
deleted file mode 100644
index 31028cba26eb..000000000000
--- a/test/Transforms/InstCombine/x86-pmovzx.ll
+++ /dev/null
@@ -1,136 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-
-declare <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8>) nounwind readnone
-declare <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32>) nounwind readnone
-declare <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16>) nounwind readnone
-declare <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16>) nounwind readnone
-
-declare <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8>) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8>) nounwind readnone
-declare <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8>) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32>) nounwind readnone
-declare <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16>) nounwind readnone
-declare <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16>) nounwind readnone
-
-;
-; Basic zero extension tests
-;
-
-define <4 x i32> @sse41_pmovzxbd(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovzxbd
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: zext <4 x i8> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
-
- %res = call <4 x i32> @llvm.x86.sse41.pmovzxbd(<16 x i8> %v)
- ret <4 x i32> %res
-}
-
-define <2 x i64> @sse41_pmovzxbq(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovzxbq
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: zext <2 x i8> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
-
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxbq(<16 x i8> %v)
- ret <2 x i64> %res
-}
-
-define <8 x i16> @sse41_pmovzxbw(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovzxbw
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: zext <8 x i8> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
-
- %res = call <8 x i16> @llvm.x86.sse41.pmovzxbw(<16 x i8> %v)
- ret <8 x i16> %res
-}
-
-define <2 x i64> @sse41_pmovzxdq(<4 x i32> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovzxdq
-; CHECK-NEXT: shufflevector <4 x i32> %v, <4 x i32> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: zext <2 x i32> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
-
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxdq(<4 x i32> %v)
- ret <2 x i64> %res
-}
-
-define <4 x i32> @sse41_pmovzxwd(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovzxwd
-; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: zext <4 x i16> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
-
- %res = call <4 x i32> @llvm.x86.sse41.pmovzxwd(<8 x i16> %v)
- ret <4 x i32> %res
-}
-
-define <2 x i64> @sse41_pmovzxwq(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @sse41_pmovzxwq
-; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <2 x i32> <i32 0, i32 1>
-; CHECK-NEXT: zext <2 x i16> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
-
- %res = call <2 x i64> @llvm.x86.sse41.pmovzxwq(<8 x i16> %v)
- ret <2 x i64> %res
-}
-
-define <8 x i32> @avx2_pmovzxbd(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovzxbd
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-; CHECK-NEXT: zext <8 x i8> %1 to <8 x i32>
-; CHECK-NEXT: ret <8 x i32> %2
-
- %res = call <8 x i32> @llvm.x86.avx2.pmovzxbd(<16 x i8> %v)
- ret <8 x i32> %res
-}
-
-define <4 x i64> @avx2_pmovzxbq(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovzxbq
-; CHECK-NEXT: shufflevector <16 x i8> %v, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: zext <4 x i8> %1 to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> %2
-
- %res = call <4 x i64> @llvm.x86.avx2.pmovzxbq(<16 x i8> %v)
- ret <4 x i64> %res
-}
-
-define <16 x i16> @avx2_pmovzxbw(<16 x i8> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovzxbw
-; CHECK-NEXT: zext <16 x i8> %v to <16 x i16>
-; CHECK-NEXT: ret <16 x i16> %1
-
- %res = call <16 x i16> @llvm.x86.avx2.pmovzxbw(<16 x i8> %v)
- ret <16 x i16> %res
-}
-
-define <4 x i64> @avx2_pmovzxdq(<4 x i32> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovzxdq
-; CHECK-NEXT: zext <4 x i32> %v to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> %1
-
- %res = call <4 x i64> @llvm.x86.avx2.pmovzxdq(<4 x i32> %v)
- ret <4 x i64> %res
-}
-
-define <8 x i32> @avx2_pmovzxwd(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovzxwd
-; CHECK-NEXT: zext <8 x i16> %v to <8 x i32>
-; CHECK-NEXT: ret <8 x i32> %1
-
- %res = call <8 x i32> @llvm.x86.avx2.pmovzxwd(<8 x i16> %v)
- ret <8 x i32> %res
-}
-
-define <4 x i64> @avx2_pmovzxwq(<8 x i16> %v) nounwind readnone {
-; CHECK-LABEL: @avx2_pmovzxwq
-; CHECK-NEXT: shufflevector <8 x i16> %v, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-; CHECK-NEXT: zext <4 x i16> %1 to <4 x i64>
-; CHECK-NEXT: ret <4 x i64> %2
-
- %res = call <4 x i64> @llvm.x86.avx2.pmovzxwq(<8 x i16> %v)
- ret <4 x i64> %res
-}
diff --git a/test/Transforms/InstCombine/x86-pshufb.ll b/test/Transforms/InstCombine/x86-pshufb.ll
index caaaed8910a8..3ada4fbd1662 100644
--- a/test/Transforms/InstCombine/x86-pshufb.ll
+++ b/test/Transforms/InstCombine/x86-pshufb.ll
@@ -1,38 +1,38 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
; Verify that instcombine is able to fold identity shuffles.
define <16 x i8> @identity_test(<16 x i8> %InVec) {
-; CHECK-LABEL: @identity_test
-; CHECK: ret <16 x i8> %InVec
-
+; CHECK-LABEL: @identity_test(
+; CHECK-NEXT: ret <16 x i8> %InVec
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %1
}
define <32 x i8> @identity_test_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @identity_test_avx2
-; CHECK: ret <32 x i8> %InVec
-
+; CHECK-LABEL: @identity_test_avx2(
+; CHECK-NEXT: ret <32 x i8> %InVec
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <32 x i8> %1
}
-
; Verify that instcombine is able to fold byte shuffles with zero masks.
define <16 x i8> @fold_to_zero_vector(<16 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector
-; CHECK: ret <16 x i8> zeroinitializer
-
+; CHECK-LABEL: @fold_to_zero_vector(
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <16 x i8> %1
}
define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_avx2
-; CHECK: ret <32 x i8> zeroinitializer
-
+; CHECK-LABEL: @fold_to_zero_vector_avx2(
+; CHECK-NEXT: ret <32 x i8> zeroinitializer
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <32 x i8> %1
}
@@ -41,9 +41,10 @@ define <32 x i8> @fold_to_zero_vector_avx2(<32 x i8> %InVec) {
; with a shuffle mask of all zeroes.
define <16 x i8> @splat_test(<16 x i8> %InVec) {
-; CHECK-LABEL: @splat_test
-; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
-
+; CHECK-LABEL: @splat_test(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> zeroinitializer
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> zeroinitializer)
ret <16 x i8> %1
}
@@ -54,9 +55,10 @@ define <16 x i8> @splat_test(<16 x i8> %InVec) {
; the lower byte in the high 128-bit lane of %InVec (shuffle index 16).
define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @splat_test_avx2
-; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-
+; CHECK-LABEL: @splat_test_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> zeroinitializer)
ret <32 x i8> %1
}
@@ -65,114 +67,128 @@ define <32 x i8> @splat_test_avx2(<32 x i8> %InVec) {
; vector %InVec and a vector of all zeroes.
define <16 x i8> @blend1(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend1
-; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
-
+; CHECK-LABEL: @blend1(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 1, i32 16, i32 3, i32 16, i32 5, i32 16, i32 7, i32 16, i32 9, i32 16, i32 11, i32 16, i32 13, i32 16, i32 15>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
ret <16 x i8> %1
}
define <16 x i8> @blend2(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend2
-; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
-
+; CHECK-LABEL: @blend2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 2, i32 3, i32 16, i32 16, i32 6, i32 7, i32 16, i32 16, i32 10, i32 11, i32 16, i32 16, i32 14, i32 15>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
ret <16 x i8> %1
}
define <16 x i8> @blend3(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend3
-; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
-
+; CHECK-LABEL: @blend3(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %1
}
define <16 x i8> @blend4(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend4
-; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
-
+; CHECK-LABEL: @blend4(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %1
}
define <16 x i8> @blend5(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend5
-; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-
+; CHECK-LABEL: @blend5(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <16 x i8> %1
}
define <16 x i8> @blend6(<16 x i8> %InVec) {
-; CHECK-LABEL: @blend6
-; CHECK: shufflevector <16 x i8> %InVec, {{.*}}, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-
+; CHECK-LABEL: @blend6(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <16 x i8> %1
}
define <32 x i8> @blend1_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend1_avx2
-; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
-
+; CHECK-LABEL: @blend1_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 1, i32 32, i32 3, i32 32, i32 5, i32 32, i32 7, i32 32, i32 9, i32 32, i32 11, i32 32, i32 13, i32 32, i32 15, i32 48, i32 17, i32 48, i32 19, i32 48, i32 21, i32 48, i32 23, i32 48, i32 25, i32 48, i32 27, i32 48, i32 29, i32 48, i32 31>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15, i8 -128, i8 1, i8 -128, i8 3, i8 -128, i8 5, i8 -128, i8 7, i8 -128, i8 9, i8 -128, i8 11, i8 -128, i8 13, i8 -128, i8 15>)
ret <32 x i8> %1
}
define <32 x i8> @blend2_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend2_avx2
-; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
-
+; CHECK-LABEL: @blend2_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 2, i32 3, i32 32, i32 32, i32 6, i32 7, i32 32, i32 32, i32 10, i32 11, i32 32, i32 32, i32 14, i32 15, i32 48, i32 48, i32 18, i32 19, i32 48, i32 48, i32 22, i32 23, i32 48, i32 48, i32 26, i32 27, i32 48, i32 48, i32 30, i32 31>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15, i8 -128, i8 -128, i8 2, i8 3, i8 -128, i8 -128, i8 6, i8 7, i8 -128, i8 -128, i8 10, i8 11, i8 -128, i8 -128, i8 14, i8 15>)
ret <32 x i8> %1
}
define <32 x i8> @blend3_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend3_avx2
-; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
-
+; CHECK-LABEL: @blend3_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 20, i32 21, i32 22, i32 23, i32 48, i32 48, i32 48, i32 48, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 12, i8 13, i8 14, i8 15>)
ret <32 x i8> %1
}
define <32 x i8> @blend4_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend4_avx2
-; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
-
+; CHECK-LABEL: @blend4_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
ret <32 x i8> %1
}
define <32 x i8> @blend5_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend5_avx2
-; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
-
+; CHECK-LABEL: @blend5_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 18, i32 19, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 2, i8 3, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <32 x i8> %1
}
define <32 x i8> @blend6_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @blend6_avx2
-; CHECK: shufflevector <32 x i8> %InVec, {{.*}}, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
-
+; CHECK-LABEL: @blend6_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 16, i32 17, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 0, i8 1, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <32 x i8> %1
}
; movq idiom.
define <16 x i8> @movq_idiom(<16 x i8> %InVec) {
-; CHECK-LABEL: @movq_idiom
-; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
-
+; CHECK-LABEL: @movq_idiom(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <16 x i8> %1
}
define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @movq_idiom_avx2
-; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
-
+; CHECK-LABEL: @movq_idiom_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 32, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48, i32 48>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128, i8 -128>)
ret <32 x i8> %1
}
@@ -180,33 +196,37 @@ define <32 x i8> @movq_idiom_avx2(<32 x i8> %InVec) {
; Vector permutations using byte shuffles.
define <16 x i8> @permute1(<16 x i8> %InVec) {
-; CHECK-LABEL: @permute1
-; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
-
+; CHECK-LABEL: @permute1(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
ret <16 x i8> %1
}
define <16 x i8> @permute2(<16 x i8> %InVec) {
-; CHECK-LABEL: @permute2
-; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-
+; CHECK-LABEL: @permute2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
ret <16 x i8> %1
}
define <32 x i8> @permute1_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @permute1_avx2
-; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
-
+; CHECK-LABEL: @permute1_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15, i8 4, i8 5, i8 6, i8 7, i8 4, i8 5, i8 6, i8 7, i8 12, i8 13, i8 14, i8 15, i8 12, i8 13, i8 14, i8 15>)
ret <32 x i8> %1
}
define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @permute2_avx2
-; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
-
+; CHECK-LABEL: @permute2_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7>)
ret <32 x i8> %1
}
@@ -215,53 +235,90 @@ define <32 x i8> @permute2_avx2(<32 x i8> %InVec) {
; are not -128 and that are not encoded in four bits.
define <16 x i8> @identity_test2_2(<16 x i8> %InVec) {
-; CHECK-LABEL: @identity_test2_2
-; CHECK: ret <16 x i8> %InVec
-
+; CHECK-LABEL: @identity_test2_2(
+; CHECK-NEXT: ret <16 x i8> %InVec
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
ret <16 x i8> %1
}
define <32 x i8> @identity_test_avx2_2(<32 x i8> %InVec) {
-; CHECK-LABEL: @identity_test_avx2_2
-; CHECK: ret <32 x i8> %InVec
-
+; CHECK-LABEL: @identity_test_avx2_2(
+; CHECK-NEXT: ret <32 x i8> %InVec
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 16, i8 33, i8 66, i8 19, i8 36, i8 69, i8 22, i8 39, i8 72, i8 25, i8 42, i8 75, i8 28, i8 45, i8 78, i8 31, i8 48, i8 81, i8 34, i8 51, i8 84, i8 37, i8 54, i8 87, i8 40, i8 57, i8 90, i8 43, i8 60, i8 93, i8 46, i8 63>)
ret <32 x i8> %1
}
define <16 x i8> @fold_to_zero_vector_2(<16 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_2
-; CHECK: ret <16 x i8> zeroinitializer
-
+; CHECK-LABEL: @fold_to_zero_vector_2(
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 -125, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15>)
ret <16 x i8> %1
}
define <32 x i8> @fold_to_zero_vector_avx2_2(<32 x i8> %InVec) {
-; CHECK-LABEL: @fold_to_zero_vector_avx2_2
-; CHECK: ret <32 x i8> zeroinitializer
-
+; CHECK-LABEL: @fold_to_zero_vector_avx2_2(
+; CHECK-NEXT: ret <32 x i8> zeroinitializer
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 -127, i8 -1, i8 -53, i8 -32, i8 -4, i8 -7, i8 -33, i8 -66, i8 -99, i8 -120, i8 -100, i8 -22, i8 -17, i8 -1, i8 -11, i8 -15, i8 -126, i8 -2, i8 -52, i8 -31, i8 -5, i8 -8, i8 -34, i8 -67, i8 -100, i8 -119, i8 -101, i8 -23, i8 -16, i8 -2, i8 -12, i8 -16>)
ret <32 x i8> %1
}
define <16 x i8> @permute3(<16 x i8> %InVec) {
-; CHECK-LABEL: @permute3
-; CHECK: shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
-
+; CHECK-LABEL: @permute3(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 48, i8 17, i8 34, i8 51, i8 20, i8 37, i8 54, i8 23, i8 16, i8 49, i8 66, i8 19, i8 52, i8 69, i8 22, i8 55>)
ret <16 x i8> %1
}
define <32 x i8> @permute3_avx2(<32 x i8> %InVec) {
-; CHECK-LABEL: @permute3_avx2
-; CHECK: shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
-
+; CHECK-LABEL: @permute3_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> undef, <32 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15, i32 12, i32 13, i32 14, i32 15, i32 20, i32 21, i32 22, i32 23, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
%1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 52, i8 21, i8 38, i8 55, i8 20, i8 37, i8 54, i8 23, i8 28, i8 61, i8 78, i8 31, i8 60, i8 29, i8 30, i8 79, i8 52, i8 21, i8 38, i8 55, i8 20, i8 53, i8 102, i8 23, i8 92, i8 93, i8 94, i8 95, i8 108, i8 109, i8 110, i8 111>)
ret <32 x i8> %1
}
+; FIXME: Verify that instcombine is able to fold constant byte shuffles with undef mask elements.
+
+define <16 x i8> @fold_with_undef_elts(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_undef_elts(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %InVec, <16 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 0, i32 16, i32 undef, i32 16, i32 1, i32 16, i32 undef, i32 16, i32 2, i32 16, i32 undef, i32 16, i32 3, i32 16, i32 undef, i32 16>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_with_undef_elts_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_undef_elts_avx2(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> %InVec, <32 x i8> <i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <32 x i32> <i32 0, i32 32, i32 undef, i32 32, i32 1, i32 32, i32 undef, i32 32, i32 2, i32 32, i32 undef, i32 32, i32 3, i32 32, i32 undef, i32 32, i32 16, i32 48, i32 undef, i32 48, i32 17, i32 48, i32 undef, i32 48, i32 18, i32 48, i32 undef, i32 48, i32 19, i32 48, i32 undef, i32 48>
+; CHECK-NEXT: ret <32 x i8> [[TMP1]]
+;
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> <i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128, i8 0, i8 -128, i8 undef, i8 -128, i8 1, i8 -128, i8 undef, i8 -128, i8 2, i8 -128, i8 undef, i8 -128, i8 3, i8 -128, i8 undef, i8 -128>)
+ ret <32 x i8> %1
+}
+
+define <16 x i8> @fold_with_allundef_elts(<16 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_allundef_elts(
+; CHECK-NEXT: ret <16 x i8> undef
+;
+ %1 = tail call <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8> %InVec, <16 x i8> undef)
+ ret <16 x i8> %1
+}
+
+define <32 x i8> @fold_with_allundef_elts_avx2(<32 x i8> %InVec) {
+; CHECK-LABEL: @fold_with_allundef_elts_avx2(
+; CHECK-NEXT: ret <32 x i8> undef
+;
+ %1 = tail call <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8> %InVec, <32 x i8> undef)
+ ret <32 x i8> %1
+}
declare <16 x i8> @llvm.x86.ssse3.pshuf.b.128(<16 x i8>, <16 x i8>)
declare <32 x i8> @llvm.x86.avx2.pshuf.b(<32 x i8>, <32 x i8>)
diff --git a/test/Transforms/InstCombine/x86-sse.ll b/test/Transforms/InstCombine/x86-sse.ll
new file mode 100644
index 000000000000..c465c53e4018
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-sse.ll
@@ -0,0 +1,661 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define float @test_rcp_ss_0(float %a) {
+; CHECK-LABEL: @test_rcp_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT: ret float [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 0
+ ret float %6
+}
+
+define float @test_rcp_ss_1(float %a) {
+; CHECK-LABEL: @test_rcp_ss_1(
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 1
+ ret float %6
+}
+
+define float @test_sqrt_ss_0(float %a) {
+; CHECK-LABEL: @test_sqrt_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT: ret float [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 0
+ ret float %6
+}
+
+define float @test_sqrt_ss_2(float %a) {
+; CHECK-LABEL: @test_sqrt_ss_2(
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 2
+ ret float %6
+}
+
+define float @test_rsqrt_ss_0(float %a) {
+; CHECK-LABEL: @test_rsqrt_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT: ret float [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 0
+ ret float %6
+}
+
+define float @test_rsqrt_ss_3(float %a) {
+; CHECK-LABEL: @test_rsqrt_ss_3(
+; CHECK-NEXT: ret float 3.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 3
+ ret float %6
+}
+
+define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_add_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %a, <4 x float> %3)
+ ret <4 x float> %4
+}
+
+define float @test_add_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fadd float %a, %b
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %8)
+ %r = extractelement <4 x float> %9, i32 0
+ ret float %r
+}
+
+define float @test_add_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_add_ss_1(
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = tail call <4 x float> @llvm.x86.sse.add.ss(<4 x float> %4, <4 x float> %5)
+ %7 = extractelement <4 x float> %6, i32 1
+ ret float %7
+}
+
+define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_sub_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %a, <4 x float> %3)
+ ret <4 x float> %4
+}
+
+define float @test_sub_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fsub float %a, %b
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %8)
+ %r = extractelement <4 x float> %9, i32 0
+ ret float %r
+}
+
+define float @test_sub_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_sub_ss_2(
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = tail call <4 x float> @llvm.x86.sse.sub.ss(<4 x float> %4, <4 x float> %5)
+ %7 = extractelement <4 x float> %6, i32 2
+ ret float %7
+}
+
+define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_mul_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %a, <4 x float> %3)
+ ret <4 x float> %4
+}
+
+define float @test_mul_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fmul float %a, %b
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %8)
+ %r = extractelement <4 x float> %9, i32 0
+ ret float %r
+}
+
+define float @test_mul_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_mul_ss_3(
+; CHECK-NEXT: ret float 3.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = tail call <4 x float> @llvm.x86.sse.mul.ss(<4 x float> %4, <4 x float> %5)
+ %7 = extractelement <4 x float> %6, i32 3
+ ret float %7
+}
+
+define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_div_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %a, <4 x float> %3)
+ ret <4 x float> %4
+}
+
+define float @test_div_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv float %a, %b
+; CHECK-NEXT: ret float [[TMP1]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %8)
+ %r = extractelement <4 x float> %9, i32 0
+ ret float %r
+}
+
+define float @test_div_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_div_ss_1(
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = tail call <4 x float> @llvm.x86.sse.div.ss(<4 x float> %4, <4 x float> %5)
+ %7 = extractelement <4 x float> %6, i32 1
+ ret float %7
+}
+
+define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_min_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %a, <4 x float> %3)
+ ret <4 x float> %4
+}
+
+define float @test_min_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT: ret float [[TMP4]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %8)
+ %10 = extractelement <4 x float> %9, i32 0
+ ret float %10
+}
+
+define float @test_min_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_min_ss_2(
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = tail call <4 x float> @llvm.x86.sse.min.ss(<4 x float> %4, <4 x float> %5)
+ %7 = extractelement <4 x float> %6, i32 2
+ ret float %7
+}
+
+define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_max_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %b)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %a, <4 x float> %3)
+ ret <4 x float> %4
+}
+
+define float @test_max_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT: ret float [[TMP4]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %8)
+ %10 = extractelement <4 x float> %9, i32 0
+ ret float %10
+}
+
+define float @test_max_ss_3(float %a, float %b) {
+; CHECK-LABEL: @test_max_ss_3(
+; CHECK-NEXT: ret float 3.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = tail call <4 x float> @llvm.x86.sse.max.ss(<4 x float> %4, <4 x float> %5)
+ %7 = extractelement <4 x float> %6, i32 3
+ ret float %7
+}
+
+define <4 x float> @test_cmp_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_cmp_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %b, i8 0)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %a, <4 x float> %3, i8 0)
+ ret <4 x float> %4
+}
+
+define float @test_cmp_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_cmp_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i8 0)
+; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP3]], i32 0
+; CHECK-NEXT: ret float [[R]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %8, i8 0)
+ %r = extractelement <4 x float> %9, i32 0
+ ret float %r
+}
+
+define float @test_cmp_ss_1(float %a, float %b) {
+; CHECK-LABEL: @test_cmp_ss_1(
+; CHECK-NEXT: ret float 1.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = tail call <4 x float> @llvm.x86.sse.cmp.ss(<4 x float> %4, <4 x float> %5, i8 0)
+ %7 = extractelement <4 x float> %6, i32 1
+ ret float %7
+}
+
+define i32 @test_comieq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comieq_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.comieq.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_comige_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comige_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.comige.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_comigt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comigt_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.comigt.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_comile_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comile_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.comile.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_comilt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comilt_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.comilt.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_comineq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_comineq_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.comineq.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_ucomieq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomieq_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.ucomieq.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_ucomige_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomige_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.ucomige.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_ucomigt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomigt_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.ucomigt.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_ucomile_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomile_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.ucomile.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_ucomilt_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomilt_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.ucomilt.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+define i32 @test_ucomineq_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_ucomineq_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call i32 @llvm.x86.sse.ucomineq.ss(<4 x float> %4, <4 x float> %8)
+ ret i32 %9
+}
+
+declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>)
+declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>)
+
+declare <4 x float> @llvm.x86.sse.add.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.sub.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.mul.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.div.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>)
+declare <4 x float> @llvm.x86.sse.cmp.ss(<4 x float>, <4 x float>, i8)
+
+declare i32 @llvm.x86.sse.comieq.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comige.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comigt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comile.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comilt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.comineq.ss(<4 x float>, <4 x float>)
+
+declare i32 @llvm.x86.sse.ucomieq.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomige.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomigt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomile.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomilt.ss(<4 x float>, <4 x float>)
+declare i32 @llvm.x86.sse.ucomineq.ss(<4 x float>, <4 x float>)
diff --git a/test/Transforms/InstCombine/x86-sse2.ll b/test/Transforms/InstCombine/x86-sse2.ll
new file mode 100644
index 000000000000..560930bea6ef
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-sse2.ll
@@ -0,0 +1,500 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define double @test_sqrt_sd_0(double %a) {
+; CHECK-LABEL: @test_sqrt_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT: ret double [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+ %4 = extractelement <2 x double> %3, i32 0
+ ret double %4
+}
+
+define double @test_sqrt_sd_1(double %a) {
+; CHECK-LABEL: @test_sqrt_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = tail call <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double> %2)
+ %4 = extractelement <2 x double> %3, i32 1
+ ret double %4
+}
+
+define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_add_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %a, <2 x double> %1)
+ ret <2 x double> %2
+}
+
+define double @test_add_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fadd double %a, %b
+; CHECK-NEXT: ret double [[TMP1]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_add_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_add_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.add.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_sub_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %a, <2 x double> %1)
+ ret <2 x double> %2
+}
+
+define double @test_sub_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fsub double %a, %b
+; CHECK-NEXT: ret double [[TMP1]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_sub_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_sub_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.sub.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_mul_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %a, <2 x double> %1)
+ ret <2 x double> %2
+}
+
+define double @test_mul_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fmul double %a, %b
+; CHECK-NEXT: ret double [[TMP1]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_mul_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_mul_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.mul.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_div_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %a, <2 x double> %1)
+ ret <2 x double> %2
+}
+
+define double @test_div_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = fdiv double %a, %b
+; CHECK-NEXT: ret double [[TMP1]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_div_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_div_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.div.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_min_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %a, <2 x double> %1)
+ ret <2 x double> %2
+}
+
+define double @test_min_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT: ret double [[TMP4]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_min_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_min_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.min.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_max_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %b)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %a, <2 x double> %1)
+ ret <2 x double> %2
+}
+
+define double @test_max_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT: ret double [[TMP4]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_max_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_max_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.max.sd(<2 x double> %2, <2 x double> %4)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define <2 x double> @test_cmp_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_cmp_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %b, i8 0)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %2 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %a, <2 x double> %1, i8 0)
+ ret <2 x double> %2
+}
+
+define double @test_cmp_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_cmp_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i8 0)
+; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[TMP3]], i32 0
+; CHECK-NEXT: ret double [[TMP4]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_cmp_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_cmp_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double> %2, <2 x double> %4, i8 0)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define i32 @test_comieq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comieq_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.comieq.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_comige_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comige_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.comige.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_comigt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comigt_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.comigt.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_comile_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comile_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.comile.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_comilt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comilt_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.comilt.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_comineq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_comineq_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.comineq.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_ucomieq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomieq_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.ucomieq.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_ucomige_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomige_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.ucomige.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_ucomigt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomigt_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.ucomigt.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_ucomile_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomile_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.ucomile.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_ucomilt_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomilt_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.ucomilt.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+define i32 @test_ucomineq_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_ucomineq_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]])
+; CHECK-NEXT: ret i32 [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call i32 @llvm.x86.sse2.ucomineq.sd(<2 x double> %2, <2 x double> %4)
+ ret i32 %5
+}
+
+declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
+
+declare <2 x double> @llvm.x86.sse2.add.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.sub.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.mul.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.div.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>)
+declare <2 x double> @llvm.x86.sse2.cmp.sd(<2 x double>, <2 x double>, i8)
+
+declare i32 @llvm.x86.sse2.comieq.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comige.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comigt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comile.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comilt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.comineq.sd(<2 x double>, <2 x double>)
+
+declare i32 @llvm.x86.sse2.ucomieq.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomige.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomigt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomile.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomilt.sd(<2 x double>, <2 x double>)
+declare i32 @llvm.x86.sse2.ucomineq.sd(<2 x double>, <2 x double>)
diff --git a/test/Transforms/InstCombine/x86-sse41.ll b/test/Transforms/InstCombine/x86-sse41.ll
new file mode 100644
index 000000000000..16975471b9e1
--- /dev/null
+++ b/test/Transforms/InstCombine/x86-sse41.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define <2 x double> @test_round_sd(<2 x double> %a, <2 x double> %b) {
+; CHECK-LABEL: @test_round_sd(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %a, <2 x double> %b, i32 10)
+; CHECK-NEXT: ret <2 x double> [[TMP1]]
+;
+ %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 0
+ %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
+ %3 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %1, <2 x double> %2, i32 10)
+ ret <2 x double> %3
+}
+
+define double @test_round_sd_0(double %a, double %b) {
+; CHECK-LABEL: @test_round_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %b, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> undef, <2 x double> [[TMP1]], i32 10)
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT: ret double [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+ %6 = extractelement <2 x double> %5, i32 0
+ ret double %6
+}
+
+define double @test_round_sd_1(double %a, double %b) {
+; CHECK-LABEL: @test_round_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = insertelement <2 x double> undef, double %b, i32 0
+ %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
+ %5 = tail call <2 x double> @llvm.x86.sse41.round.sd(<2 x double> %2, <2 x double> %4, i32 10)
+ %6 = extractelement <2 x double> %5, i32 1
+ ret double %6
+}
+
+define <4 x float> @test_round_ss(<4 x float> %a, <4 x float> %b) {
+; CHECK-LABEL: @test_round_ss(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> <float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>, <4 x float> %b, i32 10)
+; CHECK-NEXT: ret <4 x float> [[TMP1]]
+;
+ %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
+ %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
+ %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
+ %4 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
+ %5 = insertelement <4 x float> %4, float 2.000000e+00, i32 2
+ %6 = insertelement <4 x float> %5, float 3.000000e+00, i32 3
+ %7 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %3, <4 x float> %6, i32 10)
+ ret <4 x float> %7
+}
+
+define float @test_round_ss_0(float %a, float %b) {
+; CHECK-LABEL: @test_round_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %b, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> undef, <4 x float> [[TMP1]], i32 10)
+; CHECK-NEXT: [[R:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT: ret float [[R]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+ %r = extractelement <4 x float> %9, i32 0
+ ret float %r
+}
+
+define float @test_round_ss_2(float %a, float %b) {
+; CHECK-LABEL: @test_round_ss_2(
+; CHECK-NEXT: ret float 2.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = insertelement <4 x float> undef, float %b, i32 0
+ %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
+ %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
+ %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
+ %9 = tail call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %4, <4 x float> %8, i32 10)
+ %r = extractelement <4 x float> %9, i32 2
+ ret float %r
+}
+
+declare <2 x double> @llvm.x86.sse41.round.sd(<2 x double>, <2 x double>, i32) nounwind readnone
+declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
diff --git a/test/Transforms/InstCombine/x86-sse4a.ll b/test/Transforms/InstCombine/x86-sse4a.ll
index 815d26bd2254..53353abefb36 100644
--- a/test/Transforms/InstCombine/x86-sse4a.ll
+++ b/test/Transforms/InstCombine/x86-sse4a.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
;
@@ -5,45 +6,51 @@
;
define <2 x i64> @test_extrq_call(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_call
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_call(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_extrq_zero_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg0
-; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+; CHECK-LABEL: @test_extrq_zero_arg0(
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> zeroinitializer, <16 x i8> %y) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_extrq_zero_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_zero_arg1
-; CHECK-NEXT: ret <2 x i64> %x
+; CHECK-LABEL: @test_extrq_zero_arg1(
+; CHECK-NEXT: ret <2 x i64> %x
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> zeroinitializer) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_extrq_to_extqi(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_to_extqi
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_to_extqi(
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 15)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_extrq_constant(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant
-; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
+; CHECK-LABEL: @test_extrq_constant(
+; CHECK-NEXT: ret <2 x i64> <i64 255, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 55>, <16 x i8> <i8 8, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_constant_undef
-; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
+; CHECK-LABEL: @test_extrq_constant_undef(
+; CHECK-NEXT: ret <2 x i64> <i64 65535, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> <i64 -1, i64 undef>, <16 x i8> <i8 16, i8 15, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0>) nounwind
ret <2 x i64> %1
}
@@ -53,57 +60,64 @@ define <2 x i64> @test_extrq_constant_undef(<2 x i64> %x, <16 x i8> %y) {
;
define <2 x i64> @test_extrqi_call(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_call
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrqi_call(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 23)
ret <2 x i64> %1
}
define <2 x i64> @test_extrqi_shuffle_1zuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_1zuu
-; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %3
+; CHECK-LABEL: @test_extrqi_shuffle_1zuu(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 32, i8 32)
ret <2 x i64> %1
}
define <2 x i64> @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu
-; CHECK-NEXT: %1 = bitcast <2 x i64> %x to <16 x i8>
-; CHECK-NEXT: %2 = shufflevector <16 x i8> %1, <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: %3 = bitcast <16 x i8> %2 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %3
+; CHECK-LABEL: @test_extrqi_shuffle_2zzzzzzzuuuuuuuu(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %x to <16 x i8>
+; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> <i8 undef, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef>, <16 x i32> <i32 2, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast <16 x i8> [[TMP2]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP3]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 8, i8 16)
ret <2 x i64> %1
}
define <2 x i64> @test_extrqi_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_undef
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_extrqi_undef(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 32, i8 33)
ret <2 x i64> %1
}
define <2 x i64> @test_extrqi_zero(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_zero
-; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+; CHECK-LABEL: @test_extrqi_zero(
+; CHECK-NEXT: ret <2 x i64> <i64 0, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> zeroinitializer, i8 3, i8 18)
ret <2 x i64> %1
}
define <2 x i64> @test_extrqi_constant(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant
-; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
+; CHECK-LABEL: @test_extrqi_constant(
+; CHECK-NEXT: ret <2 x i64> <i64 7, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 55>, i8 3, i8 18)
ret <2 x i64> %1
}
define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_constant_undef
-; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
+; CHECK-LABEL: @test_extrqi_constant_undef(
+; CHECK-NEXT: ret <2 x i64> <i64 15, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> <i64 -1, i64 undef>, i8 4, i8 18)
ret <2 x i64> %1
}
@@ -113,31 +127,35 @@ define <2 x i64> @test_extrqi_constant_undef(<2 x i64> %x) {
;
define <2 x i64> @test_insertq_call(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_call
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertq_call(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_insertq_to_insertqi(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_to_insertqi
-; CHECK-NEXT: %1 = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertq_to_insertqi(
+; CHECK-NEXT: [[TMP1:%.*]] = call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> <i64 8, i64 undef>, i8 18, i8 2)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> <i64 8, i64 658>) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_insertq_constant(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant
-; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
+; CHECK-LABEL: @test_insertq_constant(
+; CHECK-NEXT: ret <2 x i64> <i64 32, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 0, i64 0>, <2 x i64> <i64 8, i64 658>) nounwind
ret <2 x i64> %1
}
define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_constant_undef
-; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
+; CHECK-LABEL: @test_insertq_constant_undef(
+; CHECK-NEXT: ret <2 x i64> <i64 33, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> <i64 1, i64 undef>, <2 x i64> <i64 8, i64 658>) nounwind
ret <2 x i64> %1
}
@@ -147,9 +165,10 @@ define <2 x i64> @test_insertq_constant_undef(<2 x i64> %x, <2 x i64> %y) {
;
define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_04uu
-; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-LABEL: @test_insertqi_shuffle_04uu(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 16, i32 17, i32 18, i32 19, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = bitcast <16 x i8> %v to <2 x i64>
%2 = bitcast <16 x i8> %i to <2 x i64>
%3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 32, i8 32)
@@ -158,9 +177,10 @@ define <16 x i8> @test_insertqi_shuffle_04uu(<16 x i8> %v, <16 x i8> %i) {
}
define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
-; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu
-; CHECK-NEXT: %1 = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
-; CHECK-NEXT: ret <16 x i8> %1
+; CHECK-LABEL: @test_insertqi_shuffle_8123uuuu(
+; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> %v, <16 x i8> %i, <16 x i32> <i32 16, i32 17, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
+; CHECK-NEXT: ret <16 x i8> [[TMP1]]
+;
%1 = bitcast <16 x i8> %v to <2 x i64>
%2 = bitcast <16 x i8> %i to <2 x i64>
%3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 16, i8 0)
@@ -169,8 +189,9 @@ define <16 x i8> @test_insertqi_shuffle_8123uuuu(<16 x i8> %v, <16 x i8> %i) {
}
define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @test_insertqi_constant
-; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
+; CHECK-LABEL: @test_insertqi_constant(
+; CHECK-NEXT: ret <2 x i64> <i64 -131055, i64 undef>
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> <i64 -1, i64 -1>, <2 x i64> <i64 8, i64 0>, i8 16, i8 1)
ret <2 x i64> %1
}
@@ -179,36 +200,41 @@ define <2 x i64> @test_insertqi_constant(<2 x i64> %v, <2 x i64> %i) {
; the result are undefined, and we copy the bottom 64 bits from the
; second arg
define <2 x i64> @testInsert64Bits(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testInsert64Bits
-; CHECK-NEXT: ret <2 x i64> %i
+; CHECK-LABEL: @testInsert64Bits(
+; CHECK-NEXT: ret <2 x i64> %i
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 0)
ret <2 x i64> %1
}
define <2 x i64> @testZeroLength(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testZeroLength
-; CHECK-NEXT: ret <2 x i64> %i
+; CHECK-LABEL: @testZeroLength(
+; CHECK-NEXT: ret <2 x i64> %i
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 0)
ret <2 x i64> %1
}
define <2 x i64> @testUndefinedInsertq_1(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_1
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @testUndefinedInsertq_1(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 0, i8 16)
ret <2 x i64> %1
}
define <2 x i64> @testUndefinedInsertq_2(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_2
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @testUndefinedInsertq_2(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 48, i8 32)
ret <2 x i64> %1
}
define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
-; CHECK-LABEL: @testUndefinedInsertq_3
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @testUndefinedInsertq_3(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %v, <2 x i64> %i, i8 64, i8 16)
ret <2 x i64> %1
}
@@ -218,27 +244,30 @@ define <2 x i64> @testUndefinedInsertq_3(<2 x i64> %v, <2 x i64> %i) {
;
define <2 x i64> @test_extrq_arg0(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_arg0(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %y) nounwind
ret <2 x i64> %2
}
define <2 x i64> @test_extrq_arg1(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_arg1
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_arg1(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %1) nounwind
ret <2 x i64> %2
}
define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_args01
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrq_args01(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = shufflevector <16 x i8> %y, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
%3 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %1, <16 x i8> %2) nounwind
@@ -246,69 +275,77 @@ define <2 x i64> @test_extrq_args01(<2 x i64> %x, <16 x i8> %y) {
}
define <2 x i64> @test_extrq_ret(<2 x i64> %x, <16 x i8> %y) {
-; CHECK-LABEL: @test_extrq_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_extrq_ret(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrq(<2 x i64> %x, <16 x i8> %y) nounwind
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
ret <2 x i64> %2
}
define <2 x i64> @test_extrqi_arg0(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_extrqi_arg0(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %1, i8 3, i8 2)
ret <2 x i64> %2
}
define <2 x i64> @test_extrqi_ret(<2 x i64> %x) {
-; CHECK-LABEL: @test_extrqi_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_extrqi_ret(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.extrqi(<2 x i64> %x, i8 3, i8 2) nounwind
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
ret <2 x i64> %2
}
define <2 x i64> @test_insertq_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertq_arg0(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %1, <2 x i64> %y) nounwind
ret <2 x i64> %2
}
define <2 x i64> @test_insertq_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertq_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_insertq_ret(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertq(<2 x i64> %x, <2 x i64> %y) nounwind
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
ret <2 x i64> %2
}
define <2 x i64> @test_insertqi_arg0(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg0
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertqi_arg0(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %y, i8 3, i8 2) nounwind
ret <2 x i64> %2
}
define <2 x i64> @test_insertqi_arg1(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_arg1
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertqi_arg1(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %1, i8 3, i8 2) nounwind
ret <2 x i64> %2
}
define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_args01
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @test_insertqi_args01(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) #1
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %x, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = shufflevector <2 x i64> %y, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%3 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %1, <2 x i64> %2, i8 3, i8 2) nounwind
@@ -316,8 +353,9 @@ define <2 x i64> @test_insertqi_args01(<2 x i64> %x, <2 x i64> %y) {
}
define <2 x i64> @test_insertqi_ret(<2 x i64> %x, <2 x i64> %y) {
-; CHECK-LABEL: @test_insertqi_ret
-; CHECK-NEXT: ret <2 x i64> undef
+; CHECK-LABEL: @test_insertqi_ret(
+; CHECK-NEXT: ret <2 x i64> undef
+;
%1 = tail call <2 x i64> @llvm.x86.sse4a.insertqi(<2 x i64> %x, <2 x i64> %y, i8 3, i8 2) nounwind
%2 = shufflevector <2 x i64> %1, <2 x i64> undef, <2 x i32> <i32 1, i32 1>
ret <2 x i64> %2
diff --git a/test/Transforms/InstCombine/x86-vector-shifts.ll b/test/Transforms/InstCombine/x86-vector-shifts.ll
index 59e445a40bef..012a826a7651 100644
--- a/test/Transforms/InstCombine/x86-vector-shifts.ll
+++ b/test/Transforms/InstCombine/x86-vector-shifts.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
@@ -6,93 +7,105 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
;
define <8 x i16> @sse2_psrai_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrai_w_0
-; CHECK-NEXT: ret <8 x i16> %v
+; CHECK-LABEL: @sse2_psrai_w_0(
+; CHECK-NEXT: ret <8 x i16> %v
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 0)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrai_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrai_w_15
-; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psrai_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 15)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrai_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrai_w_64
-; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psrai_w_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %v, i32 64)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psrai_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrai_d_0
-; CHECK-NEXT: ret <4 x i32> %v
+; CHECK-LABEL: @sse2_psrai_d_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 0)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrai_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrai_d_15
-; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psrai_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 15)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrai_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrai_d_64
-; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psrai_d_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %v, i32 64)
ret <4 x i32> %1
}
define <16 x i16> @avx2_psrai_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrai_w_0
-; CHECK-NEXT: ret <16 x i16> %v
+; CHECK-LABEL: @avx2_psrai_w_0(
+; CHECK-NEXT: ret <16 x i16> %v
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 0)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrai_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrai_w_15
-; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psrai_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 15)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrai_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrai_w_64
-; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psrai_w_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %v, i32 64)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psrai_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrai_d_0
-; CHECK-NEXT: ret <8 x i32> %v
+; CHECK-LABEL: @avx2_psrai_d_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 0)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrai_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrai_d_15
-; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psrai_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 15)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrai_d_64
-; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psrai_d_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %v, i32 64)
ret <8 x i32> %1
}
@@ -102,133 +115,151 @@ define <8 x i32> @avx2_psrai_d_64(<8 x i32> %v) {
;
define <8 x i16> @sse2_psrli_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrli_w_0
-; CHECK-NEXT: ret <8 x i16> %v
+; CHECK-LABEL: @sse2_psrli_w_0(
+; CHECK-NEXT: ret <8 x i16> %v
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 0)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrli_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrli_w_15
-; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psrli_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 15)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrli_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrli_w_64
-; CHECK-NEXT: ret <8 x i16> zeroinitializer
+; CHECK-LABEL: @sse2_psrli_w_64(
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrli.w(<8 x i16> %v, i32 64)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psrli_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrli_d_0
-; CHECK-NEXT: ret <4 x i32> %v
+; CHECK-LABEL: @sse2_psrli_d_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 0)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrli_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrli_d_15
-; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psrli_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 15)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrli_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrli_d_64
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
+; CHECK-LABEL: @sse2_psrli_d_64(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrli.d(<4 x i32> %v, i32 64)
ret <4 x i32> %1
}
define <2 x i64> @sse2_psrli_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrli_q_0
-; CHECK-NEXT: ret <2 x i64> %v
+; CHECK-LABEL: @sse2_psrli_q_0(
+; CHECK-NEXT: ret <2 x i64> %v
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 0)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrli_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrli_q_15
-; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @sse2_psrli_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 15)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrli_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrli_q_64
-; CHECK-NEXT: ret <2 x i64> zeroinitializer
+; CHECK-LABEL: @sse2_psrli_q_64(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %v, i32 64)
ret <2 x i64> %1
}
define <16 x i16> @avx2_psrli_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrli_w_0
-; CHECK-NEXT: ret <16 x i16> %v
+; CHECK-LABEL: @avx2_psrli_w_0(
+; CHECK-NEXT: ret <16 x i16> %v
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 0)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrli_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrli_w_15
-; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psrli_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 15)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrli_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrli_w_64
-; CHECK-NEXT: ret <16 x i16> zeroinitializer
+; CHECK-LABEL: @avx2_psrli_w_64(
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrli.w(<16 x i16> %v, i32 64)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psrli_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrli_d_0
-; CHECK-NEXT: ret <8 x i32> %v
+; CHECK-LABEL: @avx2_psrli_d_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 0)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrli_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrli_d_15
-; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psrli_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 15)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrli_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrli_d_64
-; CHECK-NEXT: ret <8 x i32> zeroinitializer
+; CHECK-LABEL: @avx2_psrli_d_64(
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrli.d(<8 x i32> %v, i32 64)
ret <8 x i32> %1
}
define <4 x i64> @avx2_psrli_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrli_q_0
-; CHECK-NEXT: ret <4 x i64> %v
+; CHECK-LABEL: @avx2_psrli_q_0(
+; CHECK-NEXT: ret <4 x i64> %v
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 0)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrli_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrli_q_15
-; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT: ret <4 x i64> %1
+; CHECK-LABEL: @avx2_psrli_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 15)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrli_q_64
-; CHECK-NEXT: ret <4 x i64> zeroinitializer
+; CHECK-LABEL: @avx2_psrli_q_64(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %v, i32 64)
ret <4 x i64> %1
}
@@ -238,133 +269,151 @@ define <4 x i64> @avx2_psrli_q_64(<4 x i64> %v) {
;
define <8 x i16> @sse2_pslli_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_pslli_w_0
-; CHECK-NEXT: ret <8 x i16> %v
+; CHECK-LABEL: @sse2_pslli_w_0(
+; CHECK-NEXT: ret <8 x i16> %v
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 0)
ret <8 x i16> %1
}
define <8 x i16> @sse2_pslli_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_pslli_w_15
-; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_pslli_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 15)
ret <8 x i16> %1
}
define <8 x i16> @sse2_pslli_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_pslli_w_64
-; CHECK-NEXT: ret <8 x i16> zeroinitializer
+; CHECK-LABEL: @sse2_pslli_w_64(
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.pslli.w(<8 x i16> %v, i32 64)
ret <8 x i16> %1
}
define <4 x i32> @sse2_pslli_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_pslli_d_0
-; CHECK-NEXT: ret <4 x i32> %v
+; CHECK-LABEL: @sse2_pslli_d_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 0)
ret <4 x i32> %1
}
define <4 x i32> @sse2_pslli_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_pslli_d_15
-; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_pslli_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 15)
ret <4 x i32> %1
}
define <4 x i32> @sse2_pslli_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_pslli_d_64
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
+; CHECK-LABEL: @sse2_pslli_d_64(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.pslli.d(<4 x i32> %v, i32 64)
ret <4 x i32> %1
}
define <2 x i64> @sse2_pslli_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_pslli_q_0
-; CHECK-NEXT: ret <2 x i64> %v
+; CHECK-LABEL: @sse2_pslli_q_0(
+; CHECK-NEXT: ret <2 x i64> %v
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 0)
ret <2 x i64> %1
}
define <2 x i64> @sse2_pslli_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_pslli_q_15
-; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @sse2_pslli_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 15)
ret <2 x i64> %1
}
define <2 x i64> @sse2_pslli_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_pslli_q_64
-; CHECK-NEXT: ret <2 x i64> zeroinitializer
+; CHECK-LABEL: @sse2_pslli_q_64(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %v, i32 64)
ret <2 x i64> %1
}
define <16 x i16> @avx2_pslli_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_pslli_w_0
-; CHECK-NEXT: ret <16 x i16> %v
+; CHECK-LABEL: @avx2_pslli_w_0(
+; CHECK-NEXT: ret <16 x i16> %v
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 0)
ret <16 x i16> %1
}
define <16 x i16> @avx2_pslli_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_pslli_w_15
-; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_pslli_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 15)
ret <16 x i16> %1
}
define <16 x i16> @avx2_pslli_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_pslli_w_64
-; CHECK-NEXT: ret <16 x i16> zeroinitializer
+; CHECK-LABEL: @avx2_pslli_w_64(
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.pslli.w(<16 x i16> %v, i32 64)
ret <16 x i16> %1
}
define <8 x i32> @avx2_pslli_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_pslli_d_0
-; CHECK-NEXT: ret <8 x i32> %v
+; CHECK-LABEL: @avx2_pslli_d_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 0)
ret <8 x i32> %1
}
define <8 x i32> @avx2_pslli_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_pslli_d_15
-; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_pslli_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 15)
ret <8 x i32> %1
}
define <8 x i32> @avx2_pslli_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_pslli_d_64
-; CHECK-NEXT: ret <8 x i32> zeroinitializer
+; CHECK-LABEL: @avx2_pslli_d_64(
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.pslli.d(<8 x i32> %v, i32 64)
ret <8 x i32> %1
}
define <4 x i64> @avx2_pslli_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_pslli_q_0
-; CHECK-NEXT: ret <4 x i64> %v
+; CHECK-LABEL: @avx2_pslli_q_0(
+; CHECK-NEXT: ret <4 x i64> %v
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 0)
ret <4 x i64> %1
}
define <4 x i64> @avx2_pslli_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_pslli_q_15
-; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT: ret <4 x i64> %1
+; CHECK-LABEL: @avx2_pslli_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 15)
ret <4 x i64> %1
}
define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_pslli_q_64
-; CHECK-NEXT: ret <4 x i64> zeroinitializer
+; CHECK-LABEL: @avx2_pslli_q_64(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %v, i32 64)
ret <4 x i64> %1
}
@@ -374,125 +423,141 @@ define <4 x i64> @avx2_pslli_q_64(<4 x i64> %v) {
;
define <8 x i16> @sse2_psra_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_0
-; CHECK-NEXT: ret <8 x i16> %v
+; CHECK-LABEL: @sse2_psra_w_0(
+; CHECK-NEXT: ret <8 x i16> %v
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psra_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_15
-; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psra_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psra_w_15_splat(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_15_splat
-; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psra_w_15_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psra_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psra_w_64
-; CHECK-NEXT: %1 = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psra_w_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psra_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_0
-; CHECK-NEXT: ret <4 x i32> %v
+; CHECK-LABEL: @sse2_psra_d_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> zeroinitializer)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psra_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_15
-; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psra_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psra_d_15_splat(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_15_splat
-; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psra_d_15_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psra_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psra_d_64
-; CHECK-NEXT: %1 = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psra_d_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <16 x i16> @avx2_psra_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_0
-; CHECK-NEXT: ret <16 x i16> %v
+; CHECK-LABEL: @avx2_psra_w_0(
+; CHECK-NEXT: ret <16 x i16> %v
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> zeroinitializer)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psra_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_15
-; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psra_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psra_w_15_splat(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_15_splat
-; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psra_w_15_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psra_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psra_w_64
-; CHECK-NEXT: %1 = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psra_w_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psra_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_0
-; CHECK-NEXT: ret <8 x i32> %v
+; CHECK-LABEL: @avx2_psra_d_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> zeroinitializer)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psra_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_15
-; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psra_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psra_d_15_splat(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_15_splat
-; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psra_d_15_splat(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psra_d_64
-; CHECK-NEXT: %1 = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psra_d_64(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
@@ -502,161 +567,183 @@ define <8 x i32> @avx2_psra_d_64(<8 x i32> %v) {
;
define <8 x i16> @sse2_psrl_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_0
-; CHECK-NEXT: ret <8 x i16> %v
+; CHECK-LABEL: @sse2_psrl_w_0(
+; CHECK-NEXT: ret <8 x i16> %v
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrl_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_15
-; CHECK-NEXT: %1 = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psrl_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrl_w_15_splat(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_15_splat
-; CHECK-NEXT: ret <8 x i16> zeroinitializer
+; CHECK-LABEL: @sse2_psrl_w_15_splat(
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psrl_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psrl_w_64
-; CHECK-NEXT: ret <8 x i16> zeroinitializer
+; CHECK-LABEL: @sse2_psrl_w_64(
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psrl_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_0
-; CHECK-NEXT: ret <4 x i32> %v
+; CHECK-LABEL: @sse2_psrl_d_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> zeroinitializer)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrl_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_15
-; CHECK-NEXT: %1 = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psrl_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrl_d_15_splat(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_15_splat
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
+; CHECK-LABEL: @sse2_psrl_d_15_splat(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psrl_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psrl_d_64
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
+; CHECK-LABEL: @sse2_psrl_d_64(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <2 x i64> @sse2_psrl_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrl_q_0
-; CHECK-NEXT: ret <2 x i64> %v
+; CHECK-LABEL: @sse2_psrl_q_0(
+; CHECK-NEXT: ret <2 x i64> %v
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> zeroinitializer)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrl_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrl_q_15
-; CHECK-NEXT: %1 = lshr <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @sse2_psrl_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psrl_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psrl_q_64
-; CHECK-NEXT: ret <2 x i64> zeroinitializer
+; CHECK-LABEL: @sse2_psrl_q_64(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <2 x i64> %1
}
define <16 x i16> @avx2_psrl_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_0
-; CHECK-NEXT: ret <16 x i16> %v
+; CHECK-LABEL: @avx2_psrl_w_0(
+; CHECK-NEXT: ret <16 x i16> %v
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> zeroinitializer)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrl_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_15
-; CHECK-NEXT: %1 = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psrl_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrl_w_15_splat(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_15_splat
-; CHECK-NEXT: ret <16 x i16> zeroinitializer
+; CHECK-LABEL: @avx2_psrl_w_15_splat(
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psrl_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psrl_w_64
-; CHECK-NEXT: ret <16 x i16> zeroinitializer
+; CHECK-LABEL: @avx2_psrl_w_64(
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psrl_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_0
-; CHECK-NEXT: ret <8 x i32> %v
+; CHECK-LABEL: @avx2_psrl_d_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> zeroinitializer)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrl_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_15
-; CHECK-NEXT: %1 = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psrl_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrl_d_15_splat(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_15_splat
-; CHECK-NEXT: ret <8 x i32> zeroinitializer
+; CHECK-LABEL: @avx2_psrl_d_15_splat(
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psrl_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psrl_d_64
-; CHECK-NEXT: ret <8 x i32> zeroinitializer
+; CHECK-LABEL: @avx2_psrl_d_64(
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <4 x i64> @avx2_psrl_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrl_q_0
-; CHECK-NEXT: ret <4 x i64> %v
+; CHECK-LABEL: @avx2_psrl_q_0(
+; CHECK-NEXT: ret <4 x i64> %v
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> zeroinitializer)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrl_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrl_q_15
-; CHECK-NEXT: %1 = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT: ret <4 x i64> %1
+; CHECK-LABEL: @avx2_psrl_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psrl_q_64
-; CHECK-NEXT: ret <4 x i64> zeroinitializer
+; CHECK-LABEL: @avx2_psrl_q_64(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <4 x i64> %1
}
@@ -666,183 +753,643 @@ define <4 x i64> @avx2_psrl_q_64(<4 x i64> %v) {
;
define <8 x i16> @sse2_psll_w_0(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_0
-; CHECK-NEXT: ret <8 x i16> %v
+; CHECK-LABEL: @sse2_psll_w_0(
+; CHECK-NEXT: ret <8 x i16> %v
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> zeroinitializer)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psll_w_15(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_15
-; CHECK-NEXT: %1 = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psll_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psll_w_15_splat(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_15_splat
-; CHECK-NEXT: ret <8 x i16> zeroinitializer
+; CHECK-LABEL: @sse2_psll_w_15_splat(
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <8 x i16> %1
}
define <8 x i16> @sse2_psll_w_64(<8 x i16> %v) {
-; CHECK-LABEL: @sse2_psll_w_64
-; CHECK-NEXT: ret <8 x i16> zeroinitializer
+; CHECK-LABEL: @sse2_psll_w_64(
+; CHECK-NEXT: ret <8 x i16> zeroinitializer
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <8 x i16> %1
}
define <4 x i32> @sse2_psll_d_0(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_0
-; CHECK-NEXT: ret <4 x i32> %v
+; CHECK-LABEL: @sse2_psll_d_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> zeroinitializer)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psll_d_15(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_15
-; CHECK-NEXT: %1 = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psll_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psll_d_15_splat(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_15_splat
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
+; CHECK-LABEL: @sse2_psll_d_15_splat(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <4 x i32> %1
}
define <4 x i32> @sse2_psll_d_64(<4 x i32> %v) {
-; CHECK-LABEL: @sse2_psll_d_64
-; CHECK-NEXT: ret <4 x i32> zeroinitializer
+; CHECK-LABEL: @sse2_psll_d_64(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <4 x i32> %1
}
define <2 x i64> @sse2_psll_q_0(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psll_q_0
-; CHECK-NEXT: ret <2 x i64> %v
+; CHECK-LABEL: @sse2_psll_q_0(
+; CHECK-NEXT: ret <2 x i64> %v
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> zeroinitializer)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psll_q_15(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psll_q_15
-; CHECK-NEXT: %1 = shl <2 x i64> %v, <i64 15, i64 15>
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @sse2_psll_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 15, i64 15>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <2 x i64> %1
}
define <2 x i64> @sse2_psll_q_64(<2 x i64> %v) {
-; CHECK-LABEL: @sse2_psll_q_64
-; CHECK-NEXT: ret <2 x i64> zeroinitializer
+; CHECK-LABEL: @sse2_psll_q_64(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
%1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <2 x i64> %1
}
define <16 x i16> @avx2_psll_w_0(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_0
-; CHECK-NEXT: ret <16 x i16> %v
+; CHECK-LABEL: @avx2_psll_w_0(
+; CHECK-NEXT: ret <16 x i16> %v
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> zeroinitializer)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psll_w_15(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_15
-; CHECK-NEXT: %1 = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psll_w_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <16 x i16> %v, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psll_w_15_splat(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_15_splat
-; CHECK-NEXT: ret <16 x i16> zeroinitializer
+; CHECK-LABEL: @avx2_psll_w_15_splat(
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15>)
ret <16 x i16> %1
}
define <16 x i16> @avx2_psll_w_64(<16 x i16> %v) {
-; CHECK-LABEL: @avx2_psll_w_64
-; CHECK-NEXT: ret <16 x i16> zeroinitializer
+; CHECK-LABEL: @avx2_psll_w_64(
+; CHECK-NEXT: ret <16 x i16> zeroinitializer
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> <i16 64, i16 0, i16 0, i16 0, i16 9999, i16 9999, i16 9999, i16 9999>)
ret <16 x i16> %1
}
define <8 x i32> @avx2_psll_d_0(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_0
-; CHECK-NEXT: ret <8 x i32> %v
+; CHECK-LABEL: @avx2_psll_d_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> zeroinitializer)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psll_d_15(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_15
-; CHECK-NEXT: %1 = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psll_d_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psll_d_15_splat(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_15_splat
-; CHECK-NEXT: ret <8 x i32> zeroinitializer
+; CHECK-LABEL: @avx2_psll_d_15_splat(
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 15, i32 15, i32 15, i32 15>)
ret <8 x i32> %1
}
define <8 x i32> @avx2_psll_d_64(<8 x i32> %v) {
-; CHECK-LABEL: @avx2_psll_d_64
-; CHECK-NEXT: ret <8 x i32> zeroinitializer
+; CHECK-LABEL: @avx2_psll_d_64(
+; CHECK-NEXT: ret <8 x i32> zeroinitializer
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> <i32 64, i32 0, i32 9999, i32 9999>)
ret <8 x i32> %1
}
define <4 x i64> @avx2_psll_q_0(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psll_q_0
-; CHECK-NEXT: ret <4 x i64> %v
+; CHECK-LABEL: @avx2_psll_q_0(
+; CHECK-NEXT: ret <4 x i64> %v
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> zeroinitializer)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psll_q_15(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psll_q_15
-; CHECK-NEXT: %1 = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
-; CHECK-NEXT: ret <4 x i64> %1
+; CHECK-LABEL: @avx2_psll_q_15(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 15, i64 15, i64 15, i64 15>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 15, i64 9999>)
ret <4 x i64> %1
}
define <4 x i64> @avx2_psll_q_64(<4 x i64> %v) {
-; CHECK-LABEL: @avx2_psll_q_64
-; CHECK-NEXT: ret <4 x i64> zeroinitializer
+; CHECK-LABEL: @avx2_psll_q_64(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
%1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> <i64 64, i64 9999>)
ret <4 x i64> %1
}
;
+; ASHR - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psrav_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_var(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_var(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_allbig(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 31, i32 31, i32 31, i32 undef>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrav_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_allbig(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 undef, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31, i32 31>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrav_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_128_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 64>, i32 undef, i32 0
+ %2 = tail call <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psrav_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrav_d_256_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = ashr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 32, i32 24, i32 8, i32 0>, i32 undef, i32 1
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32> %v, <8 x i32> %1)
+ ret <8 x i32> %2
+}
+
+;
+; LSHR - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psrlv_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_var(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_var(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_big(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_big(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_allbig(
+; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psrlv_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_allbig(
+; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psrlv_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_128_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
+ %2 = tail call <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psrlv_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psrlv_d_256_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32> %v, <8 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <2 x i64> @avx2_psrlv_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_0(
+; CHECK-NEXT: ret <2 x i64> %v
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_0(
+; CHECK-NEXT: ret <4 x i64> %v
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_var(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 8>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_var(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_big(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_big(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_allbig(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psrlv_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_allbig(
+; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psrlv_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_128_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <2 x i64> %v, <i64 0, i64 undef>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
+ %2 = tail call <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64> %v, <2 x i64> %1)
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @avx2_psrlv_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psrlv_q_256_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = lshr <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
+ %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+ %2 = tail call <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64> %v, <4 x i64> %1)
+ ret <4 x i64> %2
+}
+
+;
+; SHL - Constant Per-Element Vector
+;
+
+define <4 x i32> @avx2_psllv_d_128_0(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_0(
+; CHECK-NEXT: ret <4 x i32> %v
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> zeroinitializer)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_0(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_0(
+; CHECK-NEXT: ret <8 x i32> %v
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> zeroinitializer)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_var(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_var(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 0, i32 8, i32 16, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 31>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_var(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_var(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 24, i32 31, i32 24, i32 8, i32 0>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_big(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 0, i32 8, i32 16, i32 64>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_big(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 0, i32 8, i32 16, i32 64, i32 31, i32 24, i32 8, i32 0>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_allbig(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_allbig(
+; CHECK-NEXT: ret <4 x i32> <i32 0, i32 0, i32 0, i32 undef>
+;
+ %1 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> <i32 32, i32 100, i32 -255, i32 undef>)
+ ret <4 x i32> %1
+}
+
+define <8 x i32> @avx2_psllv_d_256_allbig(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_allbig(
+; CHECK-NEXT: ret <8 x i32> <i32 undef, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
+;
+ %1 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> <i32 undef, i32 100, i32 255, i32 55555, i32 -32, i32 -100, i32 -255, i32 -55555>)
+ ret <8 x i32> %1
+}
+
+define <4 x i32> @avx2_psllv_d_128_undef(<4 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_128_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i32> %v, <i32 undef, i32 8, i32 16, i32 31>
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
+ %1 = insertelement <4 x i32> <i32 0, i32 8, i32 16, i32 31>, i32 undef, i32 0
+ %2 = tail call <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32> %v, <4 x i32> %1)
+ ret <4 x i32> %2
+}
+
+define <8 x i32> @avx2_psllv_d_256_undef(<8 x i32> %v) {
+; CHECK-LABEL: @avx2_psllv_d_256_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <8 x i32> %v, <i32 0, i32 undef, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
+ %1 = insertelement <8 x i32> <i32 0, i32 8, i32 16, i32 31, i32 31, i32 24, i32 8, i32 0>, i32 undef, i32 1
+ %2 = tail call <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32> %v, <8 x i32> %1)
+ ret <8 x i32> %2
+}
+
+define <2 x i64> @avx2_psllv_q_128_0(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_0(
+; CHECK-NEXT: ret <2 x i64> %v
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> zeroinitializer)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_0(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_0(
+; CHECK-NEXT: ret <4 x i64> %v
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> zeroinitializer)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_var(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_var(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 8>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 8>)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_var(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_var(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 0, i64 8, i64 16, i64 31>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 31>)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_big(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 0, i64 128>)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_big(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_big(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 0, i64 8, i64 16, i64 64>)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_allbig(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_allbig(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
+ %1 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> <i64 128, i64 -64>)
+ ret <2 x i64> %1
+}
+
+define <4 x i64> @avx2_psllv_q_256_allbig(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_allbig(
+; CHECK-NEXT: ret <4 x i64> <i64 0, i64 undef, i64 0, i64 0>
+;
+ %1 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> <i64 64, i64 undef, i64 -128, i64 -60>)
+ ret <4 x i64> %1
+}
+
+define <2 x i64> @avx2_psllv_q_128_undef(<2 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_128_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <2 x i64> %v, <i64 0, i64 undef>
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
+ %1 = insertelement <2 x i64> <i64 0, i64 8>, i64 undef, i64 1
+ %2 = tail call <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64> %v, <2 x i64> %1)
+ ret <2 x i64> %2
+}
+
+define <4 x i64> @avx2_psllv_q_256_undef(<4 x i64> %v) {
+; CHECK-LABEL: @avx2_psllv_q_256_undef(
+; CHECK-NEXT: [[TMP1:%.*]] = shl <4 x i64> %v, <i64 undef, i64 8, i64 16, i64 31>
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
+ %1 = insertelement <4 x i64> <i64 0, i64 8, i64 16, i64 31>, i64 undef, i64 0
+ %2 = tail call <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64> %v, <4 x i64> %1)
+ ret <4 x i64> %2
+}
+
+;
; Vector Demanded Bits
;
define <8 x i16> @sse2_psra_w_var(<8 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psra_w_var
-; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psra_w_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
ret <8 x i16> %2
}
define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
-; CHECK-LABEL: @sse2_psra_w_var_bc
-; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <8 x i16>
-; CHECK-NEXT: %2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %1)
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @sse2_psra_w_var_bc(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT: ret <8 x i16> [[TMP2]]
+;
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = bitcast <2 x i64> %1 to <8 x i16>
%3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %v, <8 x i16> %2)
@@ -850,19 +1397,21 @@ define <8 x i16> @sse2_psra_w_var_bc(<8 x i16> %v, <2 x i64> %a) {
}
define <4 x i32> @sse2_psra_d_var(<4 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @sse2_psra_d_var
-; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psra_d_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
ret <4 x i32> %2
}
define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psra_d_var_bc
-; CHECK-NEXT: %1 = bitcast <8 x i16> %a to <4 x i32>
-; CHECK-NEXT: %2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %1)
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @sse2_psra_d_var_bc(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <8 x i16> %a to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
%1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = bitcast <8 x i16> %1 to <4 x i32>
%3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %v, <4 x i32> %2)
@@ -870,64 +1419,71 @@ define <4 x i32> @sse2_psra_d_var_bc(<4 x i32> %v, <8 x i16> %a) {
}
define <16 x i16> @avx2_psra_w_var(<16 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx2_psra_w_var
-; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psra_w_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %v, <8 x i16> %1)
ret <16 x i16> %2
}
define <8 x i32> @avx2_psra_d_var(<8 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx2_psra_d_var
-; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psra_d_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %v, <4 x i32> %1)
ret <8 x i32> %2
}
define <8 x i16> @sse2_psrl_w_var(<8 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psrl_w_var
-; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psrl_w_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <8 x i16> @llvm.x86.sse2.psrl.w(<8 x i16> %v, <8 x i16> %1)
ret <8 x i16> %2
}
define <4 x i32> @sse2_psrl_d_var(<4 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @sse2_psrl_d_var
-; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psrl_d_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%2 = tail call <4 x i32> @llvm.x86.sse2.psrl.d(<4 x i32> %v, <4 x i32> %1)
ret <4 x i32> %2
}
define <2 x i64> @sse2_psrl_q_var(<2 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @sse2_psrl_q_var
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @sse2_psrl_q_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse2.psrl.q(<2 x i64> %v, <2 x i64> %1)
ret <2 x i64> %2
}
define <16 x i16> @avx2_psrl_w_var(<16 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx2_psrl_w_var
-; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psrl_w_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
ret <16 x i16> %2
}
define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
-; CHECK-LABEL: @avx2_psrl_w_var_bc
-; CHECK-NEXT: %1 = bitcast <16 x i8> %a to <8 x i16>
-; CHECK-NEXT: %2 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %1)
-; CHECK-NEXT: ret <16 x i16> %2
+; CHECK-LABEL: @avx2_psrl_w_var_bc(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <16 x i8> %a to <8 x i16>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> [[TMP1]])
+; CHECK-NEXT: ret <16 x i16> [[TMP2]]
+;
%1 = shufflevector <16 x i8> %a, <16 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
%2 = bitcast <16 x i8> %1 to <8 x i16>
%3 = tail call <16 x i16> @llvm.x86.avx2.psrl.w(<16 x i16> %v, <8 x i16> %2)
@@ -935,19 +1491,21 @@ define <16 x i16> @avx2_psrl_w_var_bc(<16 x i16> %v, <16 x i8> %a) {
}
define <8 x i32> @avx2_psrl_d_var(<8 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx2_psrl_d_var
-; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psrl_d_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
ret <8 x i32> %2
}
define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx2_psrl_d_var_bc
-; CHECK-NEXT: %1 = bitcast <2 x i64> %a to <4 x i32>
-; CHECK-NEXT: %2 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %1)
-; CHECK-NEXT: ret <8 x i32> %2
+; CHECK-LABEL: @avx2_psrl_d_var_bc(
+; CHECK-NEXT: [[TMP1:%.*]] = bitcast <2 x i64> %a to <4 x i32>
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> [[TMP1]])
+; CHECK-NEXT: ret <8 x i32> [[TMP2]]
+;
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = bitcast <2 x i64> %1 to <4 x i32>
%3 = tail call <8 x i32> @llvm.x86.avx2.psrl.d(<8 x i32> %v, <4 x i32> %2)
@@ -955,63 +1513,70 @@ define <8 x i32> @avx2_psrl_d_var_bc(<8 x i32> %v, <2 x i64> %a) {
}
define <4 x i64> @avx2_psrl_q_var(<4 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx2_psrl_q_var
-; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT: ret <4 x i64> %1
+; CHECK-LABEL: @avx2_psrl_q_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <4 x i64> @llvm.x86.avx2.psrl.q(<4 x i64> %v, <2 x i64> %1)
ret <4 x i64> %2
}
define <8 x i16> @sse2_psll_w_var(<8 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @sse2_psll_w_var
-; CHECK-NEXT: %1 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT: ret <8 x i16> %1
+; CHECK-LABEL: @sse2_psll_w_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <8 x i16> [[TMP1]]
+;
%1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <8 x i16> @llvm.x86.sse2.psll.w(<8 x i16> %v, <8 x i16> %1)
ret <8 x i16> %2
}
define <4 x i32> @sse2_psll_d_var(<4 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @sse2_psll_d_var
-; CHECK-NEXT: %1 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT: ret <4 x i32> %1
+; CHECK-LABEL: @sse2_psll_d_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <4 x i32> [[TMP1]]
+;
%1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%2 = tail call <4 x i32> @llvm.x86.sse2.psll.d(<4 x i32> %v, <4 x i32> %1)
ret <4 x i32> %2
}
define <2 x i64> @sse2_psll_q_var(<2 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @sse2_psll_q_var
-; CHECK-NEXT: %1 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT: ret <2 x i64> %1
+; CHECK-LABEL: @sse2_psll_q_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <2 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <2 x i64> @llvm.x86.sse2.psll.q(<2 x i64> %v, <2 x i64> %1)
ret <2 x i64> %2
}
define <16 x i16> @avx2_psll_w_var(<16 x i16> %v, <8 x i16> %a) {
-; CHECK-LABEL: @avx2_psll_w_var
-; CHECK-NEXT: %1 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
-; CHECK-NEXT: ret <16 x i16> %1
+; CHECK-LABEL: @avx2_psll_w_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %a)
+; CHECK-NEXT: ret <16 x i16> [[TMP1]]
+;
%1 = shufflevector <8 x i16> %a, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
%2 = tail call <16 x i16> @llvm.x86.avx2.psll.w(<16 x i16> %v, <8 x i16> %1)
ret <16 x i16> %2
}
define <8 x i32> @avx2_psll_d_var(<8 x i32> %v, <4 x i32> %a) {
-; CHECK-LABEL: @avx2_psll_d_var
-; CHECK-NEXT: %1 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
-; CHECK-NEXT: ret <8 x i32> %1
+; CHECK-LABEL: @avx2_psll_d_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %a)
+; CHECK-NEXT: ret <8 x i32> [[TMP1]]
+;
%1 = shufflevector <4 x i32> %a, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
%2 = tail call <8 x i32> @llvm.x86.avx2.psll.d(<8 x i32> %v, <4 x i32> %1)
ret <8 x i32> %2
}
define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
-; CHECK-LABEL: @avx2_psll_q_var
-; CHECK-NEXT: %1 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
-; CHECK-NEXT: ret <4 x i64> %1
+; CHECK-LABEL: @avx2_psll_q_var(
+; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %a)
+; CHECK-NEXT: ret <4 x i64> [[TMP1]]
+;
%1 = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
%2 = tail call <4 x i64> @llvm.x86.avx2.psll.q(<4 x i64> %v, <2 x i64> %1)
ret <4 x i64> %2
@@ -1022,8 +1587,9 @@ define <4 x i64> @avx2_psll_q_var(<4 x i64> %v, <2 x i64> %a) {
;
define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
-; CHECK-LABEL: @test_sse2_psra_w_0
-; CHECK-NEXT: ret <8 x i16> %A
+; CHECK-LABEL: @test_sse2_psra_w_0(
+; CHECK-NEXT: ret <8 x i16> %A
+;
%1 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %A, i32 0)
%2 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
%3 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %2, i32 0)
@@ -1031,8 +1597,9 @@ define <8 x i16> @test_sse2_psra_w_0(<8 x i16> %A) {
}
define <8 x i16> @test_sse2_psra_w_8() {
-; CHECK-LABEL: @test_sse2_psra_w_8
-; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+; CHECK-LABEL: @test_sse2_psra_w_8(
+; CHECK-NEXT: ret <8 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
%1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <8 x i16>
%2 = tail call <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16> %1, i32 3)
%3 = tail call <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
@@ -1041,8 +1608,9 @@ define <8 x i16> @test_sse2_psra_w_8() {
}
define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
-; CHECK-LABEL: @test_sse2_psra_d_0
-; CHECK-NEXT: ret <4 x i32> %A
+; CHECK-LABEL: @test_sse2_psra_d_0(
+; CHECK-NEXT: ret <4 x i32> %A
+;
%1 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %A, i32 0)
%2 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
%3 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 0)
@@ -1050,8 +1618,9 @@ define <4 x i32> @test_sse2_psra_d_0(<4 x i32> %A) {
}
define <4 x i32> @sse2_psra_d_8() {
-; CHECK-LABEL: @sse2_psra_d_8
-; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+; CHECK-LABEL: @sse2_psra_d_8(
+; CHECK-NEXT: ret <4 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
%1 = bitcast <2 x i64> <i64 1152956690052710400, i64 1152956690052710400> to <4 x i32>
%2 = tail call <4 x i32> @llvm.x86.sse2.psrai.d(<4 x i32> %1, i32 3)
%3 = tail call <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
@@ -1060,8 +1629,9 @@ define <4 x i32> @sse2_psra_d_8() {
}
define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
-; CHECK-LABEL: @test_avx2_psra_w_0
-; CHECK-NEXT: ret <16 x i16> %A
+; CHECK-LABEL: @test_avx2_psra_w_0(
+; CHECK-NEXT: ret <16 x i16> %A
+;
%1 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %A, i32 0)
%2 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %1, <8 x i16> <i16 0, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
%3 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %2, i32 0)
@@ -1069,8 +1639,9 @@ define <16 x i16> @test_avx2_psra_w_0(<16 x i16> %A) {
}
define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
-; CHECK-LABEL: @test_avx2_psra_w_8
-; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+; CHECK-LABEL: @test_avx2_psra_w_8(
+; CHECK-NEXT: ret <16 x i16> <i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16, i16 -128, i16 64, i16 32, i16 16>
+;
%1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <16 x i16>
%2 = tail call <16 x i16> @llvm.x86.avx2.psrai.w(<16 x i16> %1, i32 3)
%3 = tail call <16 x i16> @llvm.x86.avx2.psra.w(<16 x i16> %2, <8 x i16> <i16 3, i16 0, i16 0, i16 0, i16 7, i16 0, i16 0, i16 0>)
@@ -1079,8 +1650,9 @@ define <16 x i16> @test_avx2_psra_w_8(<16 x i16> %A) {
}
define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
-; CHECK-LABEL: @test_avx2_psra_d_0
-; CHECK-NEXT: ret <8 x i32> %A
+; CHECK-LABEL: @test_avx2_psra_d_0(
+; CHECK-NEXT: ret <8 x i32> %A
+;
%1 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %A, i32 0)
%2 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %1, <4 x i32> <i32 0, i32 0, i32 7, i32 0>)
%3 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %2, i32 0)
@@ -1088,8 +1660,9 @@ define <8 x i32> @test_avx2_psra_d_0(<8 x i32> %A) {
}
define <8 x i32> @test_avx2_psra_d_8() {
-; CHECK-LABEL: @test_avx2_psra_d_8
-; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+; CHECK-LABEL: @test_avx2_psra_d_8(
+; CHECK-NEXT: ret <8 x i32> <i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608, i32 4194432, i32 1048608>
+;
%1 = bitcast <4 x i64> <i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400, i64 1152956690052710400> to <8 x i32>
%2 = tail call <8 x i32> @llvm.x86.avx2.psrai.d(<8 x i32> %1, i32 3)
%3 = tail call <8 x i32> @llvm.x86.avx2.psra.d(<8 x i32> %2, <4 x i32> <i32 3, i32 0, i32 7, i32 0>)
@@ -1097,7 +1670,14 @@ define <8 x i32> @test_avx2_psra_d_8() {
ret <8 x i32> %4
}
+;
+; Old Tests
+;
+
define <2 x i64> @test_sse2_1() {
+; CHECK-LABEL: @test_sse2_1(
+; CHECK-NEXT: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
+;
%S = bitcast i32 1 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1116,11 +1696,12 @@ define <2 x i64> @test_sse2_1() {
%15 = bitcast <4 x i32> %14 to <2 x i64>
%16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
ret <2 x i64> %16
-; CHECK: test_sse2_1
-; CHECK: ret <2 x i64> <i64 72058418680037440, i64 144117112246370624>
}
define <4 x i64> @test_avx2_1() {
+; CHECK-LABEL: @test_avx2_1(
+; CHECK-NEXT: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
+;
%S = bitcast i32 1 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1139,11 +1720,12 @@ define <4 x i64> @test_avx2_1() {
%15 = bitcast <8 x i32> %14 to <4 x i64>
%16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
ret <4 x i64> %16
-; CHECK: test_avx2_1
-; CHECK: ret <4 x i64> <i64 64, i64 128, i64 192, i64 256>
}
define <2 x i64> @test_sse2_0() {
+; CHECK-LABEL: @test_sse2_0(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
%S = bitcast i32 128 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1162,11 +1744,12 @@ define <2 x i64> @test_sse2_0() {
%15 = bitcast <4 x i32> %14 to <2 x i64>
%16 = tail call <2 x i64> @llvm.x86.sse2.pslli.q(<2 x i64> %15, i32 %S)
ret <2 x i64> %16
-; CHECK: test_sse2_0
-; CHECK: ret <2 x i64> zeroinitializer
}
define <4 x i64> @test_avx2_0() {
+; CHECK-LABEL: @test_avx2_0(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
%S = bitcast i32 128 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1185,10 +1768,11 @@ define <4 x i64> @test_avx2_0() {
%15 = bitcast <8 x i32> %14 to <4 x i64>
%16 = tail call <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64> %15, i32 %S)
ret <4 x i64> %16
-; CHECK: test_avx2_0
-; CHECK: ret <4 x i64> zeroinitializer
}
define <2 x i64> @test_sse2_psrl_1() {
+; CHECK-LABEL: @test_sse2_psrl_1(
+; CHECK-NEXT: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
+;
%S = bitcast i32 1 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1207,11 +1791,12 @@ define <2 x i64> @test_sse2_psrl_1() {
%15 = bitcast <4 x i32> %14 to <2 x i64>
%16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
ret <2 x i64> %16
-; CHECK: test_sse2_psrl_1
-; CHECK: ret <2 x i64> <i64 562954248421376, i64 9007267974742020>
}
define <4 x i64> @test_avx2_psrl_1() {
+; CHECK-LABEL: @test_avx2_psrl_1(
+; CHECK-NEXT: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
+;
%S = bitcast i32 1 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1230,11 +1815,12 @@ define <4 x i64> @test_avx2_psrl_1() {
%15 = bitcast <8 x i32> %14 to <4 x i64>
%16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
ret <4 x i64> %16
-; CHECK: test_avx2_psrl_1
-; CHECK: ret <4 x i64> <i64 16, i64 32, i64 64, i64 128>
}
define <2 x i64> @test_sse2_psrl_0() {
+; CHECK-LABEL: @test_sse2_psrl_0(
+; CHECK-NEXT: ret <2 x i64> zeroinitializer
+;
%S = bitcast i32 128 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1253,11 +1839,12 @@ define <2 x i64> @test_sse2_psrl_0() {
%15 = bitcast <4 x i32> %14 to <2 x i64>
%16 = tail call <2 x i64> @llvm.x86.sse2.psrli.q(<2 x i64> %15, i32 %S)
ret <2 x i64> %16
-; CHECK: test_sse2_psrl_0
-; CHECK: ret <2 x i64> zeroinitializer
}
define <4 x i64> @test_avx2_psrl_0() {
+; CHECK-LABEL: @test_avx2_psrl_0(
+; CHECK-NEXT: ret <4 x i64> zeroinitializer
+;
%S = bitcast i32 128 to i32
%1 = zext i32 %S to i64
%2 = insertelement <2 x i64> undef, i64 %1, i32 0
@@ -1276,8 +1863,6 @@ define <4 x i64> @test_avx2_psrl_0() {
%15 = bitcast <8 x i32> %14 to <4 x i64>
%16 = tail call <4 x i64> @llvm.x86.avx2.psrli.q(<4 x i64> %15, i32 %S)
ret <4 x i64> %16
-; CHECK: test_avx2_psrl_0
-; CHECK: ret <4 x i64> zeroinitializer
}
declare <4 x i64> @llvm.x86.avx2.pslli.q(<4 x i64>, i32) #1
@@ -1315,4 +1900,17 @@ declare <8 x i16> @llvm.x86.sse2.psrai.w(<8 x i16>, i32) #1
declare <4 x i32> @llvm.x86.sse2.psra.d(<4 x i32>, <4 x i32>) #1
declare <8 x i16> @llvm.x86.sse2.psra.w(<8 x i16>, <8 x i16>) #1
+declare <4 x i32> @llvm.x86.avx2.psrav.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psrav.d.256(<8 x i32>, <8 x i32>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psrlv.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psrlv.d.256(<8 x i32>, <8 x i32>) #1
+declare <2 x i64> @llvm.x86.avx2.psrlv.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx2.psrlv.q.256(<4 x i64>, <4 x i64>) #1
+
+declare <4 x i32> @llvm.x86.avx2.psllv.d(<4 x i32>, <4 x i32>) #1
+declare <8 x i32> @llvm.x86.avx2.psllv.d.256(<8 x i32>, <8 x i32>) #1
+declare <2 x i64> @llvm.x86.avx2.psllv.q(<2 x i64>, <2 x i64>) #1
+declare <4 x i64> @llvm.x86.avx2.psllv.q.256(<4 x i64>, <4 x i64>) #1
+
attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/InstCombine/x86-xop.ll b/test/Transforms/InstCombine/x86-xop.ll
index 176c504989df..015d511ac4d4 100644
--- a/test/Transforms/InstCombine/x86-xop.ll
+++ b/test/Transforms/InstCombine/x86-xop.ll
@@ -1,141 +1,215 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
+define double @test_vfrcz_sd_0(double %a) {
+; CHECK-LABEL: @test_vfrcz_sd_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[TMP2]], i32 0
+; CHECK-NEXT: ret double [[TMP3]]
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+ %4 = extractelement <2 x double> %3, i32 0
+ ret double %4
+}
+
+define double @test_vfrcz_sd_1(double %a) {
+; CHECK-LABEL: @test_vfrcz_sd_1(
+; CHECK-NEXT: ret double 1.000000e+00
+;
+ %1 = insertelement <2 x double> undef, double %a, i32 0
+ %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
+ %3 = tail call <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double> %2)
+ %4 = extractelement <2 x double> %3, i32 1
+ ret double %4
+}
+
+define float @test_vfrcz_ss_0(float %a) {
+; CHECK-LABEL: @test_vfrcz_ss_0(
+; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float %a, i32 0
+; CHECK-NEXT: [[TMP2:%.*]] = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> [[TMP1]])
+; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
+; CHECK-NEXT: ret float [[TMP3]]
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 0
+ ret float %6
+}
+
+define float @test_vfrcz_ss_3(float %a) {
+; CHECK-LABEL: @test_vfrcz_ss_3(
+; CHECK-NEXT: ret float 3.000000e+00
+;
+ %1 = insertelement <4 x float> undef, float %a, i32 0
+ %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
+ %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
+ %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
+ %5 = tail call <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float> %4)
+ %6 = extractelement <4 x float> %5, i32 3
+ ret float %6
+}
+
define <2 x i64> @cmp_slt_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_slt_v2i64
-; CHECK-NEXT: %1 = icmp slt <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_slt_v2i64(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp slt <2 x i64> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
+;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <2 x i64> @cmp_ult_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ult_v2i64
-; CHECK-NEXT: %1 = icmp ult <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_ult_v2i64(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult <2 x i64> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
+;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomltuq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <2 x i64> @cmp_sle_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_sle_v2i64
-; CHECK-NEXT: %1 = icmp sle <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_sle_v2i64(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sle <2 x i64> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
+;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <2 x i64> @cmp_ule_v2i64(<2 x i64> %a, <2 x i64> %b) {
-; CHECK-LABEL: @cmp_ule_v2i64
-; CHECK-NEXT: %1 = icmp ule <2 x i64> %a, %b
-; CHECK-NEXT: %2 = sext <2 x i1> %1 to <2 x i64>
-; CHECK-NEXT: ret <2 x i64> %2
+; CHECK-LABEL: @cmp_ule_v2i64(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ule <2 x i64> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <2 x i1> [[TMP1]] to <2 x i64>
+; CHECK-NEXT: ret <2 x i64> [[TMP2]]
+;
%1 = tail call <2 x i64> @llvm.x86.xop.vpcomleuq(<2 x i64> %a, <2 x i64> %b)
ret <2 x i64> %1
}
define <4 x i32> @cmp_sgt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sgt_v4i32
-; CHECK-NEXT: %1 = icmp sgt <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_sgt_v4i32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt <4 x i32> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtd(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <4 x i32> @cmp_ugt_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_ugt_v4i32
-; CHECK-NEXT: %1 = icmp ugt <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_ugt_v4i32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ugt <4 x i32> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgtud(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <4 x i32> @cmp_sge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_sge_v4i32
-; CHECK-NEXT: %1 = icmp sge <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_sge_v4i32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp sge <4 x i32> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomged(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <4 x i32> @cmp_uge_v4i32(<4 x i32> %a, <4 x i32> %b) {
-; CHECK-LABEL: @cmp_uge_v4i32
-; CHECK-NEXT: %1 = icmp uge <4 x i32> %a, %b
-; CHECK-NEXT: %2 = sext <4 x i1> %1 to <4 x i32>
-; CHECK-NEXT: ret <4 x i32> %2
+; CHECK-LABEL: @cmp_uge_v4i32(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp uge <4 x i32> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i1> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: ret <4 x i32> [[TMP2]]
+;
%1 = tail call <4 x i32> @llvm.x86.xop.vpcomgeud(<4 x i32> %a, <4 x i32> %b)
ret <4 x i32> %1
}
define <8 x i16> @cmp_seq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_seq_v8i16
-; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_seq_v8i16(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> [[TMP2]]
+;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomeqw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <8 x i16> @cmp_ueq_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_ueq_v8i16
-; CHECK-NEXT: %1 = icmp eq <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_ueq_v8i16(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp eq <8 x i16> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> [[TMP2]]
+;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomequw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <8 x i16> @cmp_sne_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_sne_v8i16
-; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_sne_v8i16(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> [[TMP2]]
+;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomnew(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <8 x i16> @cmp_une_v8i16(<8 x i16> %a, <8 x i16> %b) {
-; CHECK-LABEL: @cmp_une_v8i16
-; CHECK-NEXT: %1 = icmp ne <8 x i16> %a, %b
-; CHECK-NEXT: %2 = sext <8 x i1> %1 to <8 x i16>
-; CHECK-NEXT: ret <8 x i16> %2
+; CHECK-LABEL: @cmp_une_v8i16(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne <8 x i16> %a, %b
+; CHECK-NEXT: [[TMP2:%.*]] = sext <8 x i1> [[TMP1]] to <8 x i16>
+; CHECK-NEXT: ret <8 x i16> [[TMP2]]
+;
%1 = tail call <8 x i16> @llvm.x86.xop.vpcomneuw(<8 x i16> %a, <8 x i16> %b)
ret <8 x i16> %1
}
define <16 x i8> @cmp_strue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_strue_v16i8
-; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-LABEL: @cmp_strue_v16i8(
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueb(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
define <16 x i8> @cmp_utrue_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_utrue_v16i8
-; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+; CHECK-LABEL: @cmp_utrue_v16i8(
+; CHECK-NEXT: ret <16 x i8> <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
+;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomtrueub(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
define <16 x i8> @cmp_sfalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_sfalse_v16i8
-; CHECK-NEXT: ret <16 x i8> zeroinitializer
+; CHECK-LABEL: @cmp_sfalse_v16i8(
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseb(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
define <16 x i8> @cmp_ufalse_v16i8(<16 x i8> %a, <16 x i8> %b) {
-; CHECK-LABEL: @cmp_ufalse_v16i8
-; CHECK-NEXT: ret <16 x i8> zeroinitializer
+; CHECK-LABEL: @cmp_ufalse_v16i8(
+; CHECK-NEXT: ret <16 x i8> zeroinitializer
+;
%1 = tail call <16 x i8> @llvm.x86.xop.vpcomfalseub(<16 x i8> %a, <16 x i8> %b)
ret <16 x i8> %1
}
+declare <2 x double> @llvm.x86.xop.vfrcz.sd(<2 x double>) nounwind readnone
+declare <4 x float> @llvm.x86.xop.vfrcz.ss(<4 x float>) nounwind readnone
+
declare <16 x i8> @llvm.x86.xop.vpcomltb(<16 x i8>, <16 x i8>) nounwind readnone
declare <8 x i16> @llvm.x86.xop.vpcomltw(<8 x i16>, <8 x i16>) nounwind readnone
declare <4 x i32> @llvm.x86.xop.vpcomltd(<4 x i32>, <4 x i32>) nounwind readnone
diff --git a/test/Transforms/InstCombine/xor.ll b/test/Transforms/InstCombine/xor.ll
index c8debcbac226..951b44654d44 100644
--- a/test/Transforms/InstCombine/xor.ll
+++ b/test/Transforms/InstCombine/xor.ll
@@ -1,277 +1,334 @@
-; This test makes sure that these instructions are properly eliminated.
-;
-; RUN: opt < %s -instcombine -S | \
-; RUN: FileCheck %s
-; END.
-@G1 = global i32 0 ; <i32*> [#uses=1]
-@G2 = global i32 0 ; <i32*> [#uses=1]
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+@G1 = global i32 0
+@G2 = global i32 0
define i1 @test0(i1 %A) {
; CHECK-LABEL: @test0(
-; CHECK-NEXT: ret i1 %A
- %B = xor i1 %A, false ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-NEXT: ret i1 %A
+;
+ %B = xor i1 %A, false
+ ret i1 %B
}
define i32 @test1(i32 %A) {
; CHECK-LABEL: @test1(
-; CHECK-NEXT: ret i32 %A
- %B = xor i32 %A, 0 ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-NEXT: ret i32 %A
+;
+ %B = xor i32 %A, 0
+ ret i32 %B
}
define i1 @test2(i1 %A) {
; CHECK-LABEL: @test2(
-; CHECK-NEXT: ret i1 false
- %B = xor i1 %A, %A ; <i1> [#uses=1]
- ret i1 %B
+; CHECK-NEXT: ret i1 false
+;
+ %B = xor i1 %A, %A
+ ret i1 %B
}
define i32 @test3(i32 %A) {
; CHECK-LABEL: @test3(
-; CHECK-NEXT: ret i32 0
- %B = xor i32 %A, %A ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-NEXT: ret i32 0
+;
+ %B = xor i32 %A, %A
+ ret i32 %B
}
define i32 @test4(i32 %A) {
; CHECK-LABEL: @test4(
-; CHECK-NEXT: ret i32 -1
- %NotA = xor i32 -1, %A ; <i32> [#uses=1]
- %B = xor i32 %A, %NotA ; <i32> [#uses=1]
- ret i32 %B
+; CHECK-NEXT: ret i32 -1
+;
+ %NotA = xor i32 -1, %A
+ %B = xor i32 %A, %NotA
+ ret i32 %B
}
define i32 @test5(i32 %A) {
; CHECK-LABEL: @test5(
-; CHECK-NEXT: %1 = and i32 %A, -124
-; CHECK-NEXT: ret i32 %1
- %t1 = or i32 %A, 123 ; <i32> [#uses=1]
- %r = xor i32 %t1, 123 ; <i32> [#uses=1]
- ret i32 %r
+; CHECK-NEXT: [[TMP1:%.*]] = and i32 %A, -124
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %t1 = or i32 %A, 123
+ %r = xor i32 %t1, 123
+ ret i32 %r
}
define i8 @test6(i8 %A) {
; CHECK-LABEL: @test6(
-; CHECK-NEXT: ret i8 %A
- %B = xor i8 %A, 17 ; <i8> [#uses=1]
- %C = xor i8 %B, 17 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: ret i8 %A
+;
+ %B = xor i8 %A, 17
+ %C = xor i8 %B, 17
+ ret i8 %C
}
define i32 @test7(i32 %A, i32 %B) {
; CHECK-LABEL: @test7(
-; CHECK-NEXT: %A1 = and i32 %A, 7
-; CHECK-NEXT: %B1 = and i32 %B, 128
-; CHECK-NEXT: %C11 = or i32 %A1, %B1
-; CHECK-NEXT: ret i32 %C11
- %A1 = and i32 %A, 7 ; <i32> [#uses=1]
- %B1 = and i32 %B, 128 ; <i32> [#uses=1]
- %C1 = xor i32 %A1, %B1 ; <i32> [#uses=1]
- ret i32 %C1
+; CHECK-NEXT: [[A1:%.*]] = and i32 %A, 7
+; CHECK-NEXT: [[B1:%.*]] = and i32 %B, 128
+; CHECK-NEXT: [[C11:%.*]] = or i32 [[A1]], [[B1]]
+; CHECK-NEXT: ret i32 [[C11]]
+;
+ %A1 = and i32 %A, 7
+ %B1 = and i32 %B, 128
+ %C1 = xor i32 %A1, %B1
+ ret i32 %C1
}
define i8 @test8(i1 %c) {
; CHECK-LABEL: @test8(
-; CHECK: br i1 %c, label %False, label %True
- %d = xor i1 %c, true ; <i1> [#uses=1]
- br i1 %d, label %True, label %False
+; CHECK-NEXT: br i1 %c, label %False, label %True
+; CHECK: True:
+; CHECK-NEXT: ret i8 1
+; CHECK: False:
+; CHECK-NEXT: ret i8 3
+;
+ %d = xor i1 %c, true
+ br i1 %d, label %True, label %False
-True: ; preds = %0
- ret i8 1
+True:
+ ret i8 1
-False: ; preds = %0
- ret i8 3
+False:
+ ret i8 3
}
define i1 @test9(i8 %A) {
; CHECK-LABEL: @test9(
-; CHECK-NEXT: %C = icmp eq i8 %A, 89
-; CHECK-NEXT: ret i1 %C
- %B = xor i8 %A, 123 ; <i8> [#uses=1]
- %C = icmp eq i8 %B, 34 ; <i1> [#uses=1]
- ret i1 %C
+; CHECK-NEXT: [[C:%.*]] = icmp eq i8 %A, 89
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = xor i8 %A, 123
+ %C = icmp eq i8 %B, 34
+ ret i1 %C
}
define i8 @test10(i8 %A) {
; CHECK-LABEL: @test10(
-; CHECK-NEXT: %B = and i8 %A, 3
-; CHECK-NEXT: %C1 = or i8 %B, 4
-; CHECK-NEXT: ret i8 %C1
- %B = and i8 %A, 3 ; <i8> [#uses=1]
- %C = xor i8 %B, 4 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: [[B:%.*]] = and i8 %A, 3
+; CHECK-NEXT: [[C1:%.*]] = or i8 [[B]], 4
+; CHECK-NEXT: ret i8 [[C1]]
+;
+ %B = and i8 %A, 3
+ %C = xor i8 %B, 4
+ ret i8 %C
}
define i8 @test11(i8 %A) {
; CHECK-LABEL: @test11(
-; CHECK-NEXT: %B = and i8 %A, -13
-; CHECK-NEXT: %1 = or i8 %B, 8
-; CHECK-NEXT: ret i8 %1
- %B = or i8 %A, 12 ; <i8> [#uses=1]
- %C = xor i8 %B, 4 ; <i8> [#uses=1]
- ret i8 %C
+; CHECK-NEXT: [[B:%.*]] = and i8 %A, -13
+; CHECK-NEXT: [[TMP1:%.*]] = or i8 [[B]], 8
+; CHECK-NEXT: ret i8 [[TMP1]]
+;
+ %B = or i8 %A, 12
+ %C = xor i8 %B, 4
+ ret i8 %C
}
define i1 @test12(i8 %A) {
; CHECK-LABEL: @test12(
-; CHECK-NEXT: %c = icmp ne i8 %A, 4
-; CHECK-NEXT: ret i1 %c
- %B = xor i8 %A, 4 ; <i8> [#uses=1]
- %c = icmp ne i8 %B, 0 ; <i1> [#uses=1]
- ret i1 %c
+; CHECK-NEXT: [[C:%.*]] = icmp ne i8 %A, 4
+; CHECK-NEXT: ret i1 [[C]]
+;
+ %B = xor i8 %A, 4
+ %c = icmp ne i8 %B, 0
+ ret i1 %c
}
define i1 @test13(i8 %A, i8 %B) {
; CHECK-LABEL: @test13(
-; CHECK-NEXT: %1 = icmp ne i8 %A, %B
-; CHECK-NEXT: ret i1 %1
- %C = icmp ult i8 %A, %B ; <i1> [#uses=1]
- %D = icmp ugt i8 %A, %B ; <i1> [#uses=1]
- %E = xor i1 %C, %D ; <i1> [#uses=1]
- ret i1 %E
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i8 %A, %B
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
+ %C = icmp ult i8 %A, %B
+ %D = icmp ugt i8 %A, %B
+ %E = xor i1 %C, %D
+ ret i1 %E
}
define i1 @test14(i8 %A, i8 %B) {
; CHECK-LABEL: @test14(
-; CHECK-NEXT: ret i1 true
- %C = icmp eq i8 %A, %B ; <i1> [#uses=1]
- %D = icmp ne i8 %B, %A ; <i1> [#uses=1]
- %E = xor i1 %C, %D ; <i1> [#uses=1]
- ret i1 %E
+; CHECK-NEXT: ret i1 true
+;
+ %C = icmp eq i8 %A, %B
+ %D = icmp ne i8 %B, %A
+ %E = xor i1 %C, %D
+ ret i1 %E
}
define i32 @test15(i32 %A) {
; CHECK-LABEL: @test15(
-; CHECK-NEXT: %C = sub i32 0, %A
-; CHECK-NEXT: ret i32 %C
- %B = add i32 %A, -1 ; <i32> [#uses=1]
- %C = xor i32 %B, -1 ; <i32> [#uses=1]
- ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = sub i32 0, %A
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = add i32 %A, -1
+ %C = xor i32 %B, -1
+ ret i32 %C
}
define i32 @test16(i32 %A) {
; CHECK-LABEL: @test16(
-; CHECK-NEXT: %C = sub i32 -124, %A
-; CHECK-NEXT: ret i32 %C
- %B = add i32 %A, 123 ; <i32> [#uses=1]
- %C = xor i32 %B, -1 ; <i32> [#uses=1]
- ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = sub i32 -124, %A
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = add i32 %A, 123
+ %C = xor i32 %B, -1
+ ret i32 %C
}
define i32 @test17(i32 %A) {
; CHECK-LABEL: @test17(
-; CHECK-NEXT: %C = add i32 %A, -124
-; CHECK-NEXT: ret i32 %C
- %B = sub i32 123, %A ; <i32> [#uses=1]
- %C = xor i32 %B, -1 ; <i32> [#uses=1]
- ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = add i32 %A, -124
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = sub i32 123, %A
+ %C = xor i32 %B, -1
+ ret i32 %C
}
define i32 @test18(i32 %A) {
; CHECK-LABEL: @test18(
-; CHECK-NEXT: %C = add i32 %A, 124
-; CHECK-NEXT: ret i32 %C
- %B = xor i32 %A, -1 ; <i32> [#uses=1]
- %C = sub i32 123, %B ; <i32> [#uses=1]
- ret i32 %C
+; CHECK-NEXT: [[C:%.*]] = add i32 %A, 124
+; CHECK-NEXT: ret i32 [[C]]
+;
+ %B = xor i32 %A, -1
+ %C = sub i32 123, %B
+ ret i32 %C
}
define i32 @test19(i32 %A, i32 %B) {
; CHECK-LABEL: @test19(
-; CHECK-NEXT: ret i32 %B
- %C = xor i32 %A, %B ; <i32> [#uses=1]
- %D = xor i32 %C, %A ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-NEXT: ret i32 %B
+;
+ %C = xor i32 %A, %B
+ %D = xor i32 %C, %A
+ ret i32 %D
}
define void @test20(i32 %A, i32 %B) {
; CHECK-LABEL: @test20(
-; CHECK-NEXT: store i32 %B, i32* @G1
-; CHECK-NEXT: store i32 %A, i32* @G2
-; CHECK-NEXT: ret void
- %tmp.2 = xor i32 %B, %A ; <i32> [#uses=2]
- %tmp.5 = xor i32 %tmp.2, %B ; <i32> [#uses=2]
- %tmp.8 = xor i32 %tmp.5, %tmp.2 ; <i32> [#uses=1]
- store i32 %tmp.8, i32* @G1
- store i32 %tmp.5, i32* @G2
- ret void
+; CHECK-NEXT: store i32 %B, i32* @G1, align 4
+; CHECK-NEXT: store i32 %A, i32* @G2, align 4
+; CHECK-NEXT: ret void
+;
+ %t2 = xor i32 %B, %A
+ %t5 = xor i32 %t2, %B
+ %t8 = xor i32 %t5, %t2
+ store i32 %t8, i32* @G1
+ store i32 %t5, i32* @G2
+ ret void
}
define i32 @test21(i1 %C, i32 %A, i32 %B) {
; CHECK-LABEL: @test21(
-; CHECK-NEXT: %D = select i1 %C, i32 %B, i32 %A
-; CHECK-NEXT: ret i32 %D
- %C2 = xor i1 %C, true ; <i1> [#uses=1]
- %D = select i1 %C2, i32 %A, i32 %B ; <i32> [#uses=1]
- ret i32 %D
+; CHECK-NEXT: [[D:%.*]] = select i1 %C, i32 %B, i32 %A
+; CHECK-NEXT: ret i32 [[D]]
+;
+ %C2 = xor i1 %C, true
+ %D = select i1 %C2, i32 %A, i32 %B
+ ret i32 %D
}
define i32 @test22(i1 %X) {
; CHECK-LABEL: @test22(
-; CHECK-NEXT: %1 = zext i1 %X to i32
-; CHECK-NEXT: ret i32 %1
- %Y = xor i1 %X, true ; <i1> [#uses=1]
- %Z = zext i1 %Y to i32 ; <i32> [#uses=1]
- %Q = xor i32 %Z, 1 ; <i32> [#uses=1]
- ret i32 %Q
+; CHECK-NEXT: [[TMP1:%.*]] = zext i1 %X to i32
+; CHECK-NEXT: ret i32 [[TMP1]]
+;
+ %Y = xor i1 %X, true
+ %Z = zext i1 %Y to i32
+ %Q = xor i32 %Z, 1
+ ret i32 %Q
+}
+
+; Look through a zext between xors.
+
+define i32 @fold_zext_xor_sandwich(i1 %X) {
+; CHECK-LABEL: @fold_zext_xor_sandwich(
+; CHECK-NEXT: [[TMP1:%.*]] = zext i1 %X to i32
+; CHECK-NEXT: [[Q:%.*]] = xor i32 [[TMP1]], 3
+; CHECK-NEXT: ret i32 [[Q]]
+;
+ %Y = xor i1 %X, true
+ %Z = zext i1 %Y to i32
+ %Q = xor i32 %Z, 2
+ ret i32 %Q
+}
+
+define <2 x i32> @fold_zext_xor_sandwich_vec(<2 x i1> %X) {
+; CHECK-LABEL: @fold_zext_xor_sandwich_vec(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> %X to <2 x i32>
+; CHECK-NEXT: [[Q:%.*]] = xor <2 x i32> [[TMP1]], <i32 3, i32 3>
+; CHECK-NEXT: ret <2 x i32> [[Q]]
+;
+ %Y = xor <2 x i1> %X, <i1 true, i1 true>
+ %Z = zext <2 x i1> %Y to <2 x i32>
+ %Q = xor <2 x i32> %Z, <i32 2, i32 2>
+ ret <2 x i32> %Q
}
define i1 @test23(i32 %a, i32 %b) {
; CHECK-LABEL: @test23(
-; CHECK-NEXT: %tmp.4 = icmp eq i32 %b, 0
-; CHECK-NEXT: ret i1 %tmp.4
- %tmp.2 = xor i32 %b, %a ; <i32> [#uses=1]
- %tmp.4 = icmp eq i32 %tmp.2, %a ; <i1> [#uses=1]
- ret i1 %tmp.4
+; CHECK-NEXT: [[T4:%.*]] = icmp eq i32 %b, 0
+; CHECK-NEXT: ret i1 [[T4]]
+;
+ %t2 = xor i32 %b, %a
+ %t4 = icmp eq i32 %t2, %a
+ ret i1 %t4
}
define i1 @test24(i32 %c, i32 %d) {
; CHECK-LABEL: @test24(
-; CHECK-NEXT: %tmp.4 = icmp ne i32 %d, 0
-; CHECK-NEXT: ret i1 %tmp.4
- %tmp.2 = xor i32 %d, %c ; <i32> [#uses=1]
- %tmp.4 = icmp ne i32 %tmp.2, %c ; <i1> [#uses=1]
- ret i1 %tmp.4
+; CHECK-NEXT: [[T4:%.*]] = icmp ne i32 %d, 0
+; CHECK-NEXT: ret i1 [[T4]]
+;
+ %t2 = xor i32 %d, %c
+ %t4 = icmp ne i32 %t2, %c
+ ret i1 %t4
}
define i32 @test25(i32 %g, i32 %h) {
; CHECK-LABEL: @test25(
-; CHECK-NEXT: %tmp4 = and i32 %h, %g
-; CHECK-NEXT: ret i32 %tmp4
- %h2 = xor i32 %h, -1 ; <i32> [#uses=1]
- %tmp2 = and i32 %h2, %g ; <i32> [#uses=1]
- %tmp4 = xor i32 %tmp2, %g ; <i32> [#uses=1]
- ret i32 %tmp4
+; CHECK-NEXT: [[T4:%.*]] = and i32 %h, %g
+; CHECK-NEXT: ret i32 [[T4]]
+;
+ %h2 = xor i32 %h, -1
+ %t2 = and i32 %h2, %g
+ %t4 = xor i32 %t2, %g
+ ret i32 %t4
}
define i32 @test26(i32 %a, i32 %b) {
; CHECK-LABEL: @test26(
-; CHECK-NEXT: %tmp4 = and i32 %a, %b
-; CHECK-NEXT: ret i32 %tmp4
- %b2 = xor i32 %b, -1 ; <i32> [#uses=1]
- %tmp2 = xor i32 %a, %b2 ; <i32> [#uses=1]
- %tmp4 = and i32 %tmp2, %a ; <i32> [#uses=1]
- ret i32 %tmp4
+; CHECK-NEXT: [[T4:%.*]] = and i32 %a, %b
+; CHECK-NEXT: ret i32 [[T4]]
+;
+ %b2 = xor i32 %b, -1
+ %t2 = xor i32 %a, %b2
+ %t4 = and i32 %t2, %a
+ ret i32 %t4
}
define i32 @test27(i32 %b, i32 %c, i32 %d) {
; CHECK-LABEL: @test27(
-; CHECK-NEXT: %tmp = icmp eq i32 %b, %c
-; CHECK-NEXT: %tmp6 = zext i1 %tmp to i32
-; CHECK-NEXT: ret i32 %tmp6
- %tmp2 = xor i32 %d, %b ; <i32> [#uses=1]
- %tmp5 = xor i32 %d, %c ; <i32> [#uses=1]
- %tmp = icmp eq i32 %tmp2, %tmp5 ; <i1> [#uses=1]
- %tmp6 = zext i1 %tmp to i32 ; <i32> [#uses=1]
- ret i32 %tmp6
+; CHECK-NEXT: [[T6:%.*]] = icmp eq i32 %b, %c
+; CHECK-NEXT: [[T7:%.*]] = zext i1 [[T6]] to i32
+; CHECK-NEXT: ret i32 [[T7]]
+;
+ %t2 = xor i32 %d, %b
+ %t5 = xor i32 %d, %c
+ %t6 = icmp eq i32 %t2, %t5
+ %t7 = zext i1 %t6 to i32
+ ret i32 %t7
}
define i32 @test28(i32 %indvar) {
; CHECK-LABEL: @test28(
-; CHECK-NEXT: %tmp214 = add i32 %indvar, 1
-; CHECK-NEXT: ret i32 %tmp214
- %tmp7 = add i32 %indvar, -2147483647 ; <i32> [#uses=1]
- %tmp214 = xor i32 %tmp7, -2147483648 ; <i32> [#uses=1]
- ret i32 %tmp214
+; CHECK-NEXT: [[T214:%.*]] = add i32 %indvar, 1
+; CHECK-NEXT: ret i32 [[T214]]
+;
+ %t7 = add i32 %indvar, -2147483647
+ %t214 = xor i32 %t7, -2147483648
+ ret i32 %t214
}
diff --git a/test/Transforms/InstCombine/zero-point-zero-add.ll b/test/Transforms/InstCombine/zero-point-zero-add.ll
index d07a9f4b9de0..e466e8ad7429 100644
--- a/test/Transforms/InstCombine/zero-point-zero-add.ll
+++ b/test/Transforms/InstCombine/zero-point-zero-add.ll
@@ -1,15 +1,24 @@
-; RUN: opt < %s -instcombine -S | grep 0.0 | count 1
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
-declare double @abs(double)
+declare double @fabs(double) readonly
define double @test(double %X) {
+; CHECK-LABEL: @test(
+; CHECK-NEXT: [[Y:%.*]] = fadd double %X, 0.000000e+00
+; CHECK-NEXT: ret double [[Y]]
+;
%Y = fadd double %X, 0.0 ;; Should be a single add x, 0.0
%Z = fadd double %Y, 0.0
ret double %Z
}
define double @test1(double %X) {
- %Y = call double @abs(double %X)
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[Y:%.*]] = call double @fabs(double %X)
+; CHECK-NEXT: ret double [[Y]]
+;
+ %Y = call double @fabs(double %X)
%Z = fadd double %Y, 0.0
ret double %Z
}
diff --git a/test/Transforms/InstCombine/zeroext-and-reduce.ll b/test/Transforms/InstCombine/zeroext-and-reduce.ll
index 315033dd8882..48dd0fac5fd2 100644
--- a/test/Transforms/InstCombine/zeroext-and-reduce.ll
+++ b/test/Transforms/InstCombine/zeroext-and-reduce.ll
@@ -1,10 +1,15 @@
-; RUN: opt < %s -instcombine -S | \
-; RUN: grep "and i32 %Y, 8"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
define i32 @test1(i8 %X) {
- %Y = zext i8 %X to i32 ; <i32> [#uses=1]
- %Z = and i32 %Y, 65544 ; <i32> [#uses=1]
- ret i32 %Z
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: [[Y:%.*]] = zext i8 %X to i32
+; CHECK-NEXT: [[Z:%.*]] = and i32 [[Y]], 8
+; CHECK-NEXT: ret i32 [[Z]]
+;
+ %Y = zext i8 %X to i32
+ %Z = and i32 %Y, 65544
+ ret i32 %Z
}
diff --git a/test/Transforms/InstCombine/zext-fold.ll b/test/Transforms/InstCombine/zext-fold.ll
index e5f316b8111c..12e49b3f946f 100644
--- a/test/Transforms/InstCombine/zext-fold.ll
+++ b/test/Transforms/InstCombine/zext-fold.ll
@@ -1,12 +1,18 @@
-; RUN: opt < %s -instcombine -S | grep "zext " | count 1
+; NOTE: Assertions have been autogenerated by update_test_checks.py
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
; PR1570
define i32 @test2(float %X, float %Y) {
-entry:
- %tmp3 = fcmp uno float %X, %Y ; <i1> [#uses=1]
- %tmp34 = zext i1 %tmp3 to i8 ; <i8> [#uses=1]
- %tmp = xor i8 %tmp34, 1 ; <i8> [#uses=1]
- %toBoolnot5 = zext i8 %tmp to i32 ; <i32> [#uses=1]
- ret i32 %toBoolnot5
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[TMP3:%.*]] = fcmp ord float %X, %Y
+; CHECK-NEXT: [[TOBOOLNOT5:%.*]] = zext i1 [[TMP3]] to i32
+; CHECK-NEXT: ret i32 [[TOBOOLNOT5]]
+;
+ %tmp3 = fcmp uno float %X, %Y
+ %tmp34 = zext i1 %tmp3 to i8
+ %tmp = xor i8 %tmp34, 1
+ %toBoolnot5 = zext i8 %tmp to i32
+ ret i32 %toBoolnot5
}
diff --git a/test/Transforms/InstCombine/zext-or-icmp.ll b/test/Transforms/InstCombine/zext-or-icmp.ll
index 3a27f9a124cb..8176e459d6f2 100644
--- a/test/Transforms/InstCombine/zext-or-icmp.ll
+++ b/test/Transforms/InstCombine/zext-or-icmp.ll
@@ -1,35 +1,21 @@
-; RUN: opt < %s -instcombine -S | grep icmp | count 1
+; RUN: opt < %s -instcombine -S | FileCheck %s
- %struct.FooBar = type <{ i8, i8, [2 x i8], i8, i8, i8, i8, i16, i16, [4 x i8], [8 x %struct.Rock] }>
- %struct.Rock = type { i16, i16 }
-@some_idx = internal constant [4 x i8] c"\0A\0B\0E\0F" ; <[4 x i8]*> [#uses=1]
+; Remove an icmp by using its operand in the subsequent logic directly.
-define zeroext i8 @t(%struct.FooBar* %up, i8 zeroext %intra_flag, i32 %blk_i) nounwind {
-entry:
- %tmp2 = lshr i32 %blk_i, 1 ; <i32> [#uses=1]
- %tmp3 = and i32 %tmp2, 2 ; <i32> [#uses=1]
- %tmp5 = and i32 %blk_i, 1 ; <i32> [#uses=1]
- %tmp6 = or i32 %tmp3, %tmp5 ; <i32> [#uses=1]
- %tmp8 = getelementptr %struct.FooBar, %struct.FooBar* %up, i32 0, i32 7 ; <i16*> [#uses=1]
- %tmp9 = load i16, i16* %tmp8, align 1 ; <i16> [#uses=1]
- %tmp910 = zext i16 %tmp9 to i32 ; <i32> [#uses=1]
- %tmp12 = getelementptr [4 x i8], [4 x i8]* @some_idx, i32 0, i32 %tmp6 ; <i8*> [#uses=1]
- %tmp13 = load i8, i8* %tmp12, align 1 ; <i8> [#uses=1]
- %tmp1314 = zext i8 %tmp13 to i32 ; <i32> [#uses=1]
- %tmp151 = lshr i32 %tmp910, %tmp1314 ; <i32> [#uses=1]
- %tmp1516 = trunc i32 %tmp151 to i8 ; <i8> [#uses=1]
- %tmp18 = getelementptr %struct.FooBar, %struct.FooBar* %up, i32 0, i32 0 ; <i8*> [#uses=1]
- %tmp19 = load i8, i8* %tmp18, align 1 ; <i8> [#uses=1]
- %tmp22 = and i8 %tmp1516, %tmp19 ; <i8> [#uses=1]
- %tmp24 = getelementptr %struct.FooBar, %struct.FooBar* %up, i32 0, i32 0 ; <i8*> [#uses=1]
- %tmp25 = load i8, i8* %tmp24, align 1 ; <i8> [#uses=1]
- %tmp26.mask = and i8 %tmp25, 1 ; <i8> [#uses=1]
- %toBool = icmp eq i8 %tmp26.mask, 0 ; <i1> [#uses=1]
- %toBool.not = xor i1 %toBool, true ; <i1> [#uses=1]
- %toBool33 = icmp eq i8 %intra_flag, 0 ; <i1> [#uses=1]
- %bothcond = or i1 %toBool.not, %toBool33 ; <i1> [#uses=1]
- %iftmp.1.0 = select i1 %bothcond, i8 0, i8 1 ; <i8> [#uses=1]
- %tmp40 = or i8 %tmp22, %iftmp.1.0 ; <i8> [#uses=1]
- %tmp432 = and i8 %tmp40, 1 ; <i8> [#uses=1]
- ret i8 %tmp432
+define i8 @zext_or_icmp_icmp(i8 %a, i8 %b) {
+ %mask = and i8 %a, 1
+ %toBool1 = icmp eq i8 %mask, 0
+ %toBool2 = icmp eq i8 %b, 0
+ %bothCond = or i1 %toBool1, %toBool2
+ %zext = zext i1 %bothCond to i8
+ ret i8 %zext
+
+; CHECK-LABEL: zext_or_icmp_icmp(
+; CHECK-NEXT: %mask = and i8 %a, 1
+; CHECK-NEXT: %toBool2 = icmp eq i8 %b, 0
+; CHECK-NEXT: %1 = xor i8 %mask, 1
+; CHECK-NEXT: %toBool22 = zext i1 %toBool2 to i8
+; CHECK-NEXT: %zext = or i8 %1, %toBool22
+; CHECK-NEXT: ret i8 %zext
}
+
diff --git a/test/Transforms/InstCombine/zext.ll b/test/Transforms/InstCombine/zext.ll
index b62c626c95c0..2420393bfb11 100644
--- a/test/Transforms/InstCombine/zext.ll
+++ b/test/Transforms/InstCombine/zext.ll
@@ -1,45 +1,75 @@
-; Tests to make sure elimination of casts is working correctly
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instcombine -S | FileCheck %s
define i64 @test_sext_zext(i16 %A) {
- %c1 = zext i16 %A to i32 ; <i32> [#uses=1]
- %c2 = sext i32 %c1 to i64 ; <i64> [#uses=1]
- ret i64 %c2
-
-; CHECK-LABEL: @test_sext_zext
-; CHECK-NOT: %c1
-; CHECK: %c2 = zext i16 %A to i64
-; CHECK: ret i64 %c2
+; CHECK-LABEL: @test_sext_zext(
+; CHECK-NEXT: [[C2:%.*]] = zext i16 %A to i64
+; CHECK-NEXT: ret i64 [[C2]]
+;
+ %c1 = zext i16 %A to i32
+ %c2 = sext i32 %c1 to i64
+ ret i64 %c2
}
define <2 x i64> @test2(<2 x i1> %A) {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: [[TMP1:%.*]] = zext <2 x i1> %A to <2 x i64>
+; CHECK-NEXT: [[ZEXT:%.*]] = xor <2 x i64> [[TMP1]], <i64 1, i64 1>
+; CHECK-NEXT: ret <2 x i64> [[ZEXT]]
+;
%xor = xor <2 x i1> %A, <i1 true, i1 true>
%zext = zext <2 x i1> %xor to <2 x i64>
ret <2 x i64> %zext
-
-; CHECK-LABEL: @test2
-; CHECK-NEXT: zext <2 x i1> %A to <2 x i64>
-; CHECK-NEXT: xor <2 x i64> %1, <i64 1, i64 1>
}
define <2 x i64> @test3(<2 x i64> %A) {
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i64> %A, <i64 23, i64 42>
+; CHECK-NEXT: ret <2 x i64> [[AND]]
+;
%trunc = trunc <2 x i64> %A to <2 x i32>
%and = and <2 x i32> %trunc, <i32 23, i32 42>
%zext = zext <2 x i32> %and to <2 x i64>
ret <2 x i64> %zext
-
-; CHECK-LABEL: @test3
-; CHECK-NEXT: and <2 x i64> %A, <i64 23, i64 42>
}
define <2 x i64> @test4(<2 x i64> %A) {
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: [[TMP1:%.*]] = xor <2 x i64> %A, <i64 4294967295, i64 4294967295>
+; CHECK-NEXT: [[XOR:%.*]] = and <2 x i64> [[TMP1]], <i64 23, i64 42>
+; CHECK-NEXT: ret <2 x i64> [[XOR]]
+;
%trunc = trunc <2 x i64> %A to <2 x i32>
%and = and <2 x i32> %trunc, <i32 23, i32 42>
%xor = xor <2 x i32> %and, <i32 23, i32 42>
%zext = zext <2 x i32> %xor to <2 x i64>
ret <2 x i64> %zext
+}
+
+; FIXME: If the xor was done in the smaller type, the back-to-back zexts would get combined.
-; CHECK-LABEL: @test4
-; CHECK-NEXT: xor <2 x i64> %A, <i64 4294967295, i64 4294967295>
-; CHECK-NEXT: and <2 x i64> %1, <i64 23, i64 42>
+define i64 @fold_xor_zext_sandwich(i1 %a) {
+; CHECK-LABEL: @fold_xor_zext_sandwich(
+; CHECK-NEXT: [[ZEXT1:%.*]] = zext i1 %a to i32
+; CHECK-NEXT: [[XOR:%.*]] = xor i32 [[ZEXT1]], 1
+; CHECK-NEXT: [[ZEXT2:%.*]] = zext i32 [[XOR]] to i64
+; CHECK-NEXT: ret i64 [[ZEXT2]]
+;
+ %zext1 = zext i1 %a to i32
+ %xor = xor i32 %zext1, 1
+ %zext2 = zext i32 %xor to i64
+ ret i64 %zext2
}
+
+define <2 x i64> @fold_xor_zext_sandwich_vec(<2 x i1> %a) {
+; CHECK-LABEL: @fold_xor_zext_sandwich_vec(
+; CHECK-NEXT: [[ZEXT1:%.*]] = zext <2 x i1> %a to <2 x i64>
+; CHECK-NEXT: [[XOR:%.*]] = xor <2 x i64> [[ZEXT1]], <i64 1, i64 1>
+; CHECK-NEXT: ret <2 x i64> [[XOR]]
+;
+ %zext1 = zext <2 x i1> %a to <2 x i32>
+ %xor = xor <2 x i32> %zext1, <i32 1, i32 1>
+ %zext2 = zext <2 x i32> %xor to <2 x i64>
+ ret <2 x i64> %zext2
+}
+
diff --git a/test/Transforms/InstMerge/exceptions.ll b/test/Transforms/InstMerge/exceptions.ll
new file mode 100644
index 000000000000..54c39960f011
--- /dev/null
+++ b/test/Transforms/InstMerge/exceptions.ll
@@ -0,0 +1,61 @@
+; RUN: opt -basicaa -memdep -mldst-motion -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<memdep>',mldst-motion \
+; RUN: -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@r = common global i32 0, align 4
+@s = common global i32 0, align 4
+
+; CHECK-LABEL: define void @test1(
+define void @test1(i1 %cmp, i32* noalias %p) {
+entry:
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ call void @may_exit() nounwind
+ %arrayidx = getelementptr inbounds i32, i32* %p, i64 1
+ %0 = load i32, i32* %arrayidx, align 4
+ store i32 %0, i32* @r, align 4
+ br label %if.end
+; CHECK: call void @may_exit()
+; CHECK-NEXT: %[[gep:.*]] = getelementptr inbounds i32, i32* %p, i64 1
+; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[gep]], align 4
+; CHECK-NEXT: store i32 %[[load]], i32* @r, align 4
+
+if.else: ; preds = %entry
+ %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
+ %1 = load i32, i32* %arrayidx1, align 4
+ store i32 %1, i32* @s, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+; CHECK-LABEL: define void @test2(
+define void @test2(i1 %cmp, i32* noalias %p) {
+entry:
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+ %arrayidx = getelementptr inbounds i32, i32* %p, i64 1
+ store i32 1, i32* %arrayidx, align 4
+ call void @may_throw()
+; CHECK: %[[gep:.*]] = getelementptr inbounds i32, i32* %p, i64 1
+; CHECK-NEXT: store i32 1, i32* %[[gep]], align 4
+; CHECK-NEXT: call void @may_throw()
+ br label %if.end
+
+if.else: ; preds = %entry
+ %arrayidx1 = getelementptr inbounds i32, i32* %p, i64 1
+ store i32 2, i32* %arrayidx1, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+ ret void
+}
+
+declare void @may_throw()
+declare void @may_exit() nounwind
diff --git a/test/Transforms/InstMerge/st_sink_bugfix_22613.ll b/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
index 575f239efb23..48882eca44cc 100644
--- a/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
+++ b/test/Transforms/InstMerge/st_sink_bugfix_22613.ll
@@ -4,7 +4,7 @@ target triple = "x86_64-unknown-linux-gnu"
; RUN: opt -O2 -S < %s | FileCheck %s
-; CHECK_LABEL: main
+; CHECK-LABEL: main
; CHECK: if.end
; CHECK: store
; CHECK: memset
diff --git a/test/Transforms/InstMerge/st_sink_no_barrier_call.ll b/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
index 0ad90f8581eb..c2da0f3d0ecf 100644
--- a/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
+++ b/test/Transforms/InstMerge/st_sink_no_barrier_call.ll
@@ -33,7 +33,7 @@ if.else: ; preds = %entry
%p3 = getelementptr inbounds %struct.node, %struct.node* %node.017, i32 0, i32 6
; CHECK-NOT: store i32
store i32 %add, i32* %p3, align 4
- call i32 @foo(i32 5) ;not a barrier
+ call i32 @foo(i32 5) nounwind ;not a barrier
br label %if.end
; CHECK: if.end
diff --git a/test/Transforms/InstSimplify/2010-12-20-Boolean.ll b/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
index 28c25c0e77ee..33f2176f7c91 100644
--- a/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
+++ b/test/Transforms/InstSimplify/2010-12-20-Boolean.ll
@@ -1,29 +1,34 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
define i1 @add(i1 %x) {
; CHECK-LABEL: @add(
+; CHECK: ret i1 false
+;
%z = add i1 %x, %x
ret i1 %z
-; CHECK: ret i1 false
}
define i1 @sub(i1 %x) {
; CHECK-LABEL: @sub(
+; CHECK: ret i1 %x
+;
%z = sub i1 false, %x
ret i1 %z
-; CHECK: ret i1 %x
}
define i1 @mul(i1 %x) {
; CHECK-LABEL: @mul(
+; CHECK: ret i1 %x
+;
%z = mul i1 %x, %x
ret i1 %z
-; CHECK: ret i1 %x
}
define i1 @ne(i1 %x) {
; CHECK-LABEL: @ne(
+; CHECK: ret i1 %x
+;
%z = icmp ne i1 %x, 0
ret i1 %z
-; CHECK: ret i1 %x
}
diff --git a/test/Transforms/InstSimplify/AndOrXor.ll b/test/Transforms/InstSimplify/AndOrXor.ll
index ce3c2aa6af22..2442e684246f 100644
--- a/test/Transforms/InstSimplify/AndOrXor.ll
+++ b/test/Transforms/InstSimplify/AndOrXor.ll
@@ -1,205 +1,345 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
define i64 @pow2(i32 %x) {
; CHECK-LABEL: @pow2(
+; CHECK-NEXT: [[NEGX:%.*]] = sub i32 0, %x
+; CHECK-NEXT: [[X2:%.*]] = and i32 %x, [[NEGX]]
+; CHECK-NEXT: [[E:%.*]] = zext i32 [[X2]] to i64
+; CHECK-NEXT: ret i64 [[E]]
+;
%negx = sub i32 0, %x
%x2 = and i32 %x, %negx
%e = zext i32 %x2 to i64
%nege = sub i64 0, %e
%e2 = and i64 %e, %nege
ret i64 %e2
-; CHECK: ret i64 %e
}
define i64 @pow2b(i32 %x) {
; CHECK-LABEL: @pow2b(
+; CHECK-NEXT: [[SH:%.*]] = shl i32 2, %x
+; CHECK-NEXT: [[E:%.*]] = zext i32 [[SH]] to i64
+; CHECK-NEXT: ret i64 [[E]]
+;
%sh = shl i32 2, %x
%e = zext i32 %sh to i64
%nege = sub i64 0, %e
%e2 = and i64 %e, %nege
ret i64 %e2
-; CHECK: ret i64 %e
}
define i32 @sub_neg_nuw(i32 %x, i32 %y) {
; CHECK-LABEL: @sub_neg_nuw(
+; CHECK-NEXT: ret i32 %x
+;
%neg = sub nuw i32 0, %y
%sub = sub i32 %x, %neg
ret i32 %sub
-; CHECK: ret i32 %x
}
define i1 @and_of_icmps0(i32 %b) {
; CHECK-LABEL: @and_of_icmps0(
+; CHECK-NEXT: ret i1 false
+;
%1 = add i32 %b, 2
%2 = icmp ult i32 %1, 4
%cmp3 = icmp sgt i32 %b, 2
%cmp = and i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 false
}
define i1 @and_of_icmps1(i32 %b) {
; CHECK-LABEL: @and_of_icmps1(
+; CHECK-NEXT: ret i1 false
+;
%1 = add nsw i32 %b, 2
%2 = icmp slt i32 %1, 4
%cmp3 = icmp sgt i32 %b, 2
%cmp = and i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 false
}
define i1 @and_of_icmps2(i32 %b) {
; CHECK-LABEL: @and_of_icmps2(
+; CHECK-NEXT: ret i1 false
+;
%1 = add i32 %b, 2
%2 = icmp ule i32 %1, 3
%cmp3 = icmp sgt i32 %b, 2
%cmp = and i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 false
}
define i1 @and_of_icmps3(i32 %b) {
; CHECK-LABEL: @and_of_icmps3(
+; CHECK-NEXT: ret i1 false
+;
%1 = add nsw i32 %b, 2
%2 = icmp sle i32 %1, 3
%cmp3 = icmp sgt i32 %b, 2
%cmp = and i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 false
}
define i1 @and_of_icmps4(i32 %b) {
; CHECK-LABEL: @and_of_icmps4(
+; CHECK-NEXT: ret i1 false
+;
%1 = add nuw i32 %b, 2
%2 = icmp ult i32 %1, 4
%cmp3 = icmp ugt i32 %b, 2
%cmp = and i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 false
}
define i1 @and_of_icmps5(i32 %b) {
; CHECK-LABEL: @and_of_icmps5(
+; CHECK-NEXT: ret i1 false
+;
%1 = add nuw i32 %b, 2
%2 = icmp ule i32 %1, 3
%cmp3 = icmp ugt i32 %b, 2
%cmp = and i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 false
}
define i1 @or_of_icmps0(i32 %b) {
; CHECK-LABEL: @or_of_icmps0(
+; CHECK-NEXT: ret i1 true
+;
%1 = add i32 %b, 2
%2 = icmp uge i32 %1, 4
%cmp3 = icmp sle i32 %b, 2
%cmp = or i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 true
}
define i1 @or_of_icmps1(i32 %b) {
; CHECK-LABEL: @or_of_icmps1(
+; CHECK-NEXT: ret i1 true
+;
%1 = add nsw i32 %b, 2
%2 = icmp sge i32 %1, 4
%cmp3 = icmp sle i32 %b, 2
%cmp = or i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 true
}
define i1 @or_of_icmps2(i32 %b) {
; CHECK-LABEL: @or_of_icmps2(
+; CHECK-NEXT: ret i1 true
+;
%1 = add i32 %b, 2
%2 = icmp ugt i32 %1, 3
%cmp3 = icmp sle i32 %b, 2
%cmp = or i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 true
}
define i1 @or_of_icmps3(i32 %b) {
; CHECK-LABEL: @or_of_icmps3(
+; CHECK-NEXT: ret i1 true
+;
%1 = add nsw i32 %b, 2
%2 = icmp sgt i32 %1, 3
%cmp3 = icmp sle i32 %b, 2
%cmp = or i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 true
}
define i1 @or_of_icmps4(i32 %b) {
; CHECK-LABEL: @or_of_icmps4(
+; CHECK-NEXT: ret i1 true
+;
%1 = add nuw i32 %b, 2
%2 = icmp uge i32 %1, 4
%cmp3 = icmp ule i32 %b, 2
%cmp = or i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 true
}
define i1 @or_of_icmps5(i32 %b) {
; CHECK-LABEL: @or_of_icmps5(
+; CHECK-NEXT: ret i1 true
+;
%1 = add nuw i32 %b, 2
%2 = icmp ugt i32 %1, 3
%cmp3 = icmp ule i32 %b, 2
%cmp = or i1 %2, %cmp3
ret i1 %cmp
-; CHECK: ret i1 true
}
define i32 @neg_nuw(i32 %x) {
; CHECK-LABEL: @neg_nuw(
+; CHECK-NEXT: ret i32 0
+;
%neg = sub nuw i32 0, %x
ret i32 %neg
-; CHECK: ret i32 0
}
define i1 @and_icmp1(i32 %x, i32 %y) {
+; CHECK-LABEL: @and_icmp1(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 %x, %y
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%1 = icmp ult i32 %x, %y
%2 = icmp ne i32 %y, 0
%3 = and i1 %1, %2
ret i1 %3
}
-; CHECK-LABEL: @and_icmp1(
-; CHECK: %[[cmp:.*]] = icmp ult i32 %x, %y
-; CHECK: ret i1 %[[cmp]]
define i1 @and_icmp2(i32 %x, i32 %y) {
+; CHECK-LABEL: @and_icmp2(
+; CHECK-NEXT: ret i1 false
+;
%1 = icmp ult i32 %x, %y
%2 = icmp eq i32 %y, 0
%3 = and i1 %1, %2
ret i1 %3
}
-; CHECK-LABEL: @and_icmp2(
-; CHECK: ret i1 false
define i1 @or_icmp1(i32 %x, i32 %y) {
+; CHECK-LABEL: @or_icmp1(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp ne i32 %y, 0
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%1 = icmp ult i32 %x, %y
%2 = icmp ne i32 %y, 0
%3 = or i1 %1, %2
ret i1 %3
}
-; CHECK-LABEL: @or_icmp1(
-; CHECK: %[[cmp:.*]] = icmp ne i32 %y, 0
-; CHECK: ret i1 %[[cmp]]
define i1 @or_icmp2(i32 %x, i32 %y) {
+; CHECK-LABEL: @or_icmp2(
+; CHECK-NEXT: ret i1 true
+;
%1 = icmp uge i32 %x, %y
%2 = icmp ne i32 %y, 0
%3 = or i1 %1, %2
ret i1 %3
}
-; CHECK-LABEL: @or_icmp2(
-; CHECK: ret i1 true
define i1 @or_icmp3(i32 %x, i32 %y) {
+; CHECK-LABEL: @or_icmp3(
+; CHECK-NEXT: [[TMP1:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT: ret i1 [[TMP1]]
+;
%1 = icmp uge i32 %x, %y
%2 = icmp eq i32 %y, 0
%3 = or i1 %1, %2
ret i1 %3
}
-; CHECK-LABEL: @or_icmp3(
-; CHECK: %[[cmp:.*]] = icmp uge i32 %x, %y
-; CHECK: ret i1 %[[cmp]]
+
+define i1 @disjoint_cmps(i32 %A) {
+; CHECK-LABEL: @disjoint_cmps(
+; CHECK-NEXT: ret i1 false
+;
+ %B = icmp eq i32 %A, 1
+ %C = icmp sge i32 %A, 3
+ %D = and i1 %B, %C
+ ret i1 %D
+}
+
+define i1 @disjoint_cmps2(i32 %X) {
+; CHECK-LABEL: @disjoint_cmps2(
+; CHECK-NEXT: ret i1 false
+;
+ %a = icmp ult i32 %X, 31
+ %b = icmp slt i32 %X, 0
+ %c = and i1 %a, %b
+ ret i1 %c
+}
+
+; PR27869 - Look through casts to eliminate cmps and bitwise logic.
+
+define i32 @and_of_zexted_icmps(i32 %i) {
+; CHECK-LABEL: @and_of_zexted_icmps(
+; CHECK-NEXT: ret i32 0
+;
+ %cmp0 = icmp eq i32 %i, 0
+ %conv0 = zext i1 %cmp0 to i32
+ %cmp1 = icmp ugt i32 %i, 4
+ %conv1 = zext i1 %cmp1 to i32
+ %and = and i32 %conv0, %conv1
+ ret i32 %and
+}
+
+; Make sure vectors work too.
+
+define <4 x i32> @and_of_zexted_icmps_vec(<4 x i32> %i) {
+; CHECK-LABEL: @and_of_zexted_icmps_vec(
+; CHECK-NEXT: ret <4 x i32> zeroinitializer
+;
+ %cmp0 = icmp eq <4 x i32> %i, zeroinitializer
+ %conv0 = zext <4 x i1> %cmp0 to <4 x i32>
+ %cmp1 = icmp slt <4 x i32> %i, zeroinitializer
+ %conv1 = zext <4 x i1> %cmp1 to <4 x i32>
+ %and = and <4 x i32> %conv0, %conv1
+ ret <4 x i32> %and
+}
+
+; Try a different cast and weird types.
+
+define i5 @and_of_sexted_icmps(i3 %i) {
+; CHECK-LABEL: @and_of_sexted_icmps(
+; CHECK-NEXT: ret i5 0
+;
+ %cmp0 = icmp eq i3 %i, 0
+ %conv0 = sext i1 %cmp0 to i5
+ %cmp1 = icmp ugt i3 %i, 1
+ %conv1 = sext i1 %cmp1 to i5
+ %and = and i5 %conv0, %conv1
+ ret i5 %and
+}
+
+; Try a different cast and weird vector types.
+
+define i3 @and_of_bitcast_icmps_vec(<3 x i65> %i) {
+; CHECK-LABEL: @and_of_bitcast_icmps_vec(
+; CHECK-NEXT: ret i3 0
+;
+ %cmp0 = icmp sgt <3 x i65> %i, zeroinitializer
+ %conv0 = bitcast <3 x i1> %cmp0 to i3
+ %cmp1 = icmp slt <3 x i65> %i, zeroinitializer
+ %conv1 = bitcast <3 x i1> %cmp1 to i3
+ %and = and i3 %conv0, %conv1
+ ret i3 %and
+}
+
+; We can't do this if the casts are different.
+
+define i16 @and_of_different_cast_icmps(i8 %i) {
+; CHECK-LABEL: @and_of_different_cast_icmps(
+; CHECK-NEXT: [[CMP0:%.*]] = icmp eq i8 %i, 0
+; CHECK-NEXT: [[CONV0:%.*]] = zext i1 [[CMP0]] to i16
+; CHECK-NEXT: [[CMP1:%.*]] = icmp eq i8 %i, 1
+; CHECK-NEXT: [[CONV1:%.*]] = sext i1 [[CMP1]] to i16
+; CHECK-NEXT: [[AND:%.*]] = and i16 [[CONV0]], [[CONV1]]
+; CHECK-NEXT: ret i16 [[AND]]
+;
+ %cmp0 = icmp eq i8 %i, 0
+ %conv0 = zext i1 %cmp0 to i16
+ %cmp1 = icmp eq i8 %i, 1
+ %conv1 = sext i1 %cmp1 to i16
+ %and = and i16 %conv0, %conv1
+ ret i16 %and
+}
+
+define <2 x i3> @and_of_different_cast_icmps_vec(<2 x i8> %i, <2 x i16> %j) {
+; CHECK-LABEL: @and_of_different_cast_icmps_vec(
+; CHECK-NEXT: [[CMP0:%.*]] = icmp eq <2 x i8> %i, zeroinitializer
+; CHECK-NEXT: [[CONV0:%.*]] = zext <2 x i1> [[CMP0]] to <2 x i3>
+; CHECK-NEXT: [[CMP1:%.*]] = icmp ugt <2 x i16> %j, <i16 1, i16 1>
+; CHECK-NEXT: [[CONV1:%.*]] = zext <2 x i1> [[CMP1]] to <2 x i3>
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i3> [[CONV0]], [[CONV1]]
+; CHECK-NEXT: ret <2 x i3> [[AND]]
+;
+ %cmp0 = icmp eq <2 x i8> %i, zeroinitializer
+ %conv0 = zext <2 x i1> %cmp0 to <2 x i3>
+ %cmp1 = icmp ugt <2 x i16> %j, <i16 1, i16 1>
+ %conv1 = zext <2 x i1> %cmp1 to <2 x i3>
+ %and = and <2 x i3> %conv0, %conv1
+ ret <2 x i3> %and
+}
+
diff --git a/test/Transforms/InstSimplify/add-mask.ll b/test/Transforms/InstSimplify/add-mask.ll
index 1e53cc5bc7fa..e30a35f53127 100644
--- a/test/Transforms/InstSimplify/add-mask.ll
+++ b/test/Transforms/InstSimplify/add-mask.ll
@@ -1,8 +1,10 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt -S -instsimplify < %s | FileCheck %s
define i1 @test(i32 %a) {
-; CHECK-LABEL: @test
-; CHECK: ret i1 false
+; CHECK-LABEL: @test(
+; CHECK: ret i1 false
+;
%rhs = add i32 %a, -1
%and = and i32 %a, %rhs
%res = icmp eq i32 %and, 1
@@ -10,8 +12,9 @@ define i1 @test(i32 %a) {
}
define i1 @test2(i32 %a) {
-; CHECK-LABEL: @test2
-; CHECK: ret i1 false
+; CHECK-LABEL: @test2(
+; CHECK: ret i1 false
+;
%rhs = add i32 %a, 1
%and = and i32 %a, %rhs
%res = icmp eq i32 %and, 1
@@ -19,8 +22,9 @@ define i1 @test2(i32 %a) {
}
define i1 @test3(i32 %a) {
-; CHECK-LABEL: @test3
-; CHECK: ret i1 false
+; CHECK-LABEL: @test3(
+; CHECK: ret i1 false
+;
%rhs = add i32 %a, 7
%and = and i32 %a, %rhs
%res = icmp eq i32 %and, 1
@@ -32,8 +36,13 @@ declare void @llvm.assume(i1)
; Known bits without a constant
define i1 @test4(i32 %a) {
-; CHECK-LABEL: @test4
-; CHECK: ret i1 false
+; CHECK-LABEL: @test4(
+; CHECK: [[B:%.*]] = load i32, i32* @B
+; CHECK-NEXT: [[B_AND:%.*]] = and i32 [[B]], 1
+; CHECK-NEXT: [[B_CND:%.*]] = icmp eq i32 [[B_AND]], 1
+; CHECK-NEXT: call void @llvm.assume(i1 [[B_CND]])
+; CHECK-NEXT: ret i1 false
+;
%b = load i32, i32* @B
%b.and = and i32 %b, 1
%b.cnd = icmp eq i32 %b.and, 1
@@ -47,8 +56,12 @@ define i1 @test4(i32 %a) {
; Negative test - even number
define i1 @test5(i32 %a) {
-; CHECK-LABEL: @test5
-; CHECK: ret i1 %res
+; CHECK-LABEL: @test5(
+; CHECK: [[RHS:%.*]] = add i32 %a, 2
+; CHECK-NEXT: [[AND:%.*]] = and i32 %a, [[RHS]]
+; CHECK-NEXT: [[RES:%.*]] = icmp eq i32 [[AND]], 1
+; CHECK-NEXT: ret i1 [[RES]]
+;
%rhs = add i32 %a, 2
%and = and i32 %a, %rhs
%res = icmp eq i32 %and, 1
@@ -56,8 +69,9 @@ define i1 @test5(i32 %a) {
}
define i1 @test6(i32 %a) {
-; CHECK-LABEL: @test6
-; CHECK: ret i1 false
+; CHECK-LABEL: @test6(
+; CHECK: ret i1 false
+;
%lhs = add i32 %a, -1
%and = and i32 %lhs, %a
%res = icmp eq i32 %and, 1
diff --git a/test/Transforms/InstSimplify/apint-or.ll b/test/Transforms/InstSimplify/apint-or.ll
index 36844289aaf0..e3dc2c48fb40 100644
--- a/test/Transforms/InstSimplify/apint-or.ll
+++ b/test/Transforms/InstSimplify/apint-or.ll
@@ -1,7 +1,13 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
; Test the case where integer BitWidth <= 64 && BitWidth % 2 != 0.
define i39 @test1(i39 %V, i39 %M) {
+; CHECK-LABEL: @test1(
+; CHECK: [[N:%.*]] = and i39 %M, -274877906944
+; CHECK-NEXT: [[A:%.*]] = add i39 %V, [[N]]
+; CHECK-NEXT: ret i39 [[A]]
+;
;; If we have: ((V + N) & C1) | (V & C2)
;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
;; replace with V+N.
@@ -12,28 +18,31 @@ define i39 @test1(i39 %V, i39 %M) {
%D = and i39 %V, 274877906943
%R = or i39 %B, %D
ret i39 %R
-; CHECK-LABEL: @test1
-; CHECK-NEXT: and {{.*}}, -274877906944
-; CHECK-NEXT: add
-; CHECK-NEXT: ret
}
define i7 @test2(i7 %X) {
+; CHECK-LABEL: @test2(
+; CHECK: ret i7 %X
+;
%Y = or i7 %X, 0
ret i7 %Y
-; CHECK-LABEL: @test2
-; CHECK-NEXT: ret i7 %X
}
define i17 @test3(i17 %X) {
+; CHECK-LABEL: @test3(
+; CHECK: ret i17 -1
+;
%Y = or i17 %X, -1
ret i17 %Y
-; CHECK-LABEL: @test3
-; CHECK-NEXT: ret i17 -1
}
-; Test the case where Integer BitWidth > 64 && BitWidth <= 1024.
+; Test the case where Integer BitWidth > 64 && BitWidth <= 1024.
define i399 @test4(i399 %V, i399 %M) {
+; CHECK-LABEL: @test4(
+; CHECK: [[N:%.*]] = and i399 %M, 18446742974197923840
+; CHECK-NEXT: [[A:%.*]] = add i399 %V, [[N]]
+; CHECK-NEXT: ret i399 [[A]]
+;
;; If we have: ((V + N) & C1) | (V & C2)
;; .. and C2 = ~C1 and C2 is 0+1+ and (N & C2) == 0
;; replace with V+N.
@@ -44,22 +53,20 @@ define i399 @test4(i399 %V, i399 %M) {
%D = and i399 %V, 274877906943
%R = or i399 %B, %D
ret i399 %R
-; CHECK-LABEL: @test4
-; CHECK-NEXT: and {{.*}}, 18446742974197923840
-; CHECK-NEXT: add
-; CHECK-NEXT: ret
}
define i777 @test5(i777 %X) {
+; CHECK-LABEL: @test5(
+; CHECK: ret i777 %X
+;
%Y = or i777 %X, 0
ret i777 %Y
-; CHECK-LABEL: @test5
-; CHECK-NEXT: ret i777 %X
}
define i117 @test6(i117 %X) {
+; CHECK-LABEL: @test6(
+; CHECK: ret i117 -1
+;
%Y = or i117 %X, -1
ret i117 %Y
-; CHECK-LABEL: @test6
-; CHECK-NEXT: ret i117 -1
}
diff --git a/test/Transforms/InstSimplify/assume.ll b/test/Transforms/InstSimplify/assume.ll
index 4dd0a8f4a82d..2487a9c8bb15 100644
--- a/test/Transforms/InstSimplify/assume.ll
+++ b/test/Transforms/InstSimplify/assume.ll
@@ -1,12 +1,13 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt -instsimplify -S < %s | FileCheck %s
define void @test1() {
+; CHECK-LABEL: @test1(
+; CHECK: ret void
+;
call void @llvm.assume(i1 1)
ret void
-; CHECK-LABEL: @test1
-; CHECK-NOT: llvm.assume
-; CHECK: ret void
}
declare void @llvm.assume(i1) nounwind
diff --git a/test/Transforms/InstSimplify/bswap.ll b/test/Transforms/InstSimplify/bswap.ll
index 7bc3af9e307f..5c67aa0a7643 100644
--- a/test/Transforms/InstSimplify/bswap.ll
+++ b/test/Transforms/InstSimplify/bswap.ll
@@ -1,10 +1,12 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -S -instsimplify | FileCheck %s
declare i16 @llvm.bswap.i16(i16)
define i1 @test1(i16 %arg) {
-; CHECK-LABEL: @test1
-; CHECK: ret i1 false
+; CHECK-LABEL: @test1(
+; CHECK: ret i1 false
+;
%a = or i16 %arg, 1
%b = call i16 @llvm.bswap.i16(i16 %a)
%res = icmp eq i16 %b, 0
@@ -12,8 +14,9 @@ define i1 @test1(i16 %arg) {
}
define i1 @test2(i16 %arg) {
-; CHECK-LABEL: @test2
-; CHECK: ret i1 false
+; CHECK-LABEL: @test2(
+; CHECK: ret i1 false
+;
%a = or i16 %arg, 1024
%b = call i16 @llvm.bswap.i16(i16 %a)
%res = icmp eq i16 %b, 0
@@ -21,8 +24,9 @@ define i1 @test2(i16 %arg) {
}
define i1 @test3(i16 %arg) {
-; CHECK-LABEL: @test3
-; CHECK: ret i1 false
+; CHECK-LABEL: @test3(
+; CHECK: ret i1 false
+;
%a = and i16 %arg, 1
%b = call i16 @llvm.bswap.i16(i16 %a)
%and = and i16 %b, 1
@@ -31,8 +35,9 @@ define i1 @test3(i16 %arg) {
}
define i1 @test4(i16 %arg) {
-; CHECK-LABEL: @test4
-; CHECK: ret i1 false
+; CHECK-LABEL: @test4(
+; CHECK: ret i1 false
+;
%a = and i16 %arg, 511
%b = call i16 @llvm.bswap.i16(i16 %a)
%and = and i16 %b, 256
diff --git a/test/Transforms/InstSimplify/call.ll b/test/Transforms/InstSimplify/call.ll
index b360ecb84342..988ec2b71c50 100644
--- a/test/Transforms/InstSimplify/call.ll
+++ b/test/Transforms/InstSimplify/call.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -instsimplify -S | FileCheck %s
+; RUN: opt < %s -passes=instsimplify -S | FileCheck %s
declare {i8, i1} @llvm.uadd.with.overflow.i8(i8 %a, i8 %b)
declare {i8, i1} @llvm.usub.with.overflow.i8(i8 %a, i8 %b)
@@ -187,4 +188,38 @@ cast.end: ; preds = %cast.notnull, %entr
; CHECK: br i1 %cmp, label %cast.end, label %cast.notnull
}
+define i32 @call_null() {
+entry:
+ %call = call i32 null()
+ ret i32 %call
+}
+; CHECK-LABEL: define i32 @call_null(
+; CHECK: ret i32 undef
+
+define i32 @call_undef() {
+entry:
+ %call = call i32 undef()
+ ret i32 %call
+}
+; CHECK-LABEL: define i32 @call_undef(
+; CHECK: ret i32 undef
+
+@GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49]
+
+define <8 x i32> @partial_masked_load() {
+; CHECK-LABEL: @partial_masked_load(
+; CHECK: ret <8 x i32> <i32 undef, i32 undef, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
+ %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -2) to <8 x i32>*), i32 4, <8 x i1> <i1 false, i1 false, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>, <8 x i32> undef)
+ ret <8 x i32> %masked.load
+}
+
+define <8 x i32> @masked_load_undef_mask(<8 x i32>* %V) {
+; CHECK-LABEL: @masked_load_undef_mask(
+; CHECK: ret <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>
+ %masked.load = call <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>* %V, i32 4, <8 x i1> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0>)
+ ret <8 x i32> %masked.load
+}
+
declare noalias i8* @malloc(i64)
+
+declare <8 x i32> @llvm.masked.load.v8i32.p0v8i32(<8 x i32>*, i32, <8 x i1>, <8 x i32>)
diff --git a/test/Transforms/InstSimplify/compare.ll b/test/Transforms/InstSimplify/compare.ll
index 6e66fbfede9f..9d6fd74ae56f 100644
--- a/test/Transforms/InstSimplify/compare.ll
+++ b/test/Transforms/InstSimplify/compare.ll
@@ -333,6 +333,21 @@ define i1 @or(i32 %x) {
; CHECK: ret i1 false
}
+; Do not simplify if we cannot guarantee that the ConstantExpr is a non-zero
+; constant.
+@GV = common global i32* null
+define i1 @or_constexp(i32 %x) {
+; CHECK-LABEL: @or_constexp(
+entry:
+ %0 = and i32 ptrtoint (i32** @GV to i32), 32
+ %o = or i32 %x, %0
+ %c = icmp eq i32 %o, 0
+ ret i1 %c
+; CHECK: or
+; CHECK-NEXT: icmp eq
+; CHECK-NOT: ret i1 false
+}
+
define i1 @shl1(i32 %x) {
; CHECK-LABEL: @shl1(
%s = shl i32 1, %x
@@ -397,6 +412,22 @@ define i1 @lshr3(i32 %x) {
; CHECK: ret i1 true
}
+define i1 @lshr4(i32 %X, i32 %Y) {
+; CHECK-LABEL: @lshr4(
+ %A = lshr i32 %X, %Y
+ %C = icmp ule i32 %A, %X
+ ret i1 %C
+; CHECK: ret i1 true
+}
+
+define i1 @lshr5(i32 %X, i32 %Y) {
+; CHECK-LABEL: @lshr5(
+ %A = lshr i32 %X, %Y
+ %C = icmp ugt i32 %A, %X
+ ret i1 %C
+; CHECK: ret i1 false
+}
+
define i1 @ashr1(i32 %x) {
; CHECK-LABEL: @ashr1(
%s = ashr i32 -1, %x
@@ -1172,3 +1203,140 @@ define i1 @tautological9(i32 %x) {
; CHECK-LABEL: @tautological9(
; CHECK: ret i1 true
}
+
+declare void @helper_i1(i1)
+; Series of tests for icmp s[lt|ge] (or A, B), A and icmp s[gt|le] A, (or A, B)
+define void @icmp_slt_sge_or(i32 %Ax, i32 %Bx) {
+; 'p' for positive, 'n' for negative, 'x' for potentially either.
+; %D is 'icmp slt (or A, B), A'
+; %E is 'icmp sge (or A, B), A' making it the not of %D
+; %F is 'icmp sgt A, (or A, B)' making it the same as %D
+; %G is 'icmp sle A, (or A, B)' making it the not of %D
+ %Aneg = or i32 %Ax, 2147483648
+ %Apos = and i32 %Ax, 2147483647
+ %Bneg = or i32 %Bx, 2147483648
+ %Bpos = and i32 %Bx, 2147483647
+
+ %Cpp = or i32 %Apos, %Bpos
+ %Dpp = icmp slt i32 %Cpp, %Apos
+ %Epp = icmp sge i32 %Cpp, %Apos
+ %Fpp = icmp sgt i32 %Apos, %Cpp
+ %Gpp = icmp sle i32 %Apos, %Cpp
+ %Cpx = or i32 %Apos, %Bx
+ %Dpx = icmp slt i32 %Cpx, %Apos
+ %Epx = icmp sge i32 %Cpx, %Apos
+ %Fpx = icmp sgt i32 %Apos, %Cpx
+ %Gpx = icmp sle i32 %Apos, %Cpx
+ %Cpn = or i32 %Apos, %Bneg
+ %Dpn = icmp slt i32 %Cpn, %Apos
+ %Epn = icmp sge i32 %Cpn, %Apos
+ %Fpn = icmp sgt i32 %Apos, %Cpn
+ %Gpn = icmp sle i32 %Apos, %Cpn
+
+ %Cxp = or i32 %Ax, %Bpos
+ %Dxp = icmp slt i32 %Cxp, %Ax
+ %Exp = icmp sge i32 %Cxp, %Ax
+ %Fxp = icmp sgt i32 %Ax, %Cxp
+ %Gxp = icmp sle i32 %Ax, %Cxp
+ %Cxx = or i32 %Ax, %Bx
+ %Dxx = icmp slt i32 %Cxx, %Ax
+ %Exx = icmp sge i32 %Cxx, %Ax
+ %Fxx = icmp sgt i32 %Ax, %Cxx
+ %Gxx = icmp sle i32 %Ax, %Cxx
+ %Cxn = or i32 %Ax, %Bneg
+ %Dxn = icmp slt i32 %Cxn, %Ax
+ %Exn = icmp sge i32 %Cxn, %Ax
+ %Fxn = icmp sgt i32 %Ax, %Cxn
+ %Gxn = icmp sle i32 %Ax, %Cxn
+
+ %Cnp = or i32 %Aneg, %Bpos
+ %Dnp = icmp slt i32 %Cnp, %Aneg
+ %Enp = icmp sge i32 %Cnp, %Aneg
+ %Fnp = icmp sgt i32 %Aneg, %Cnp
+ %Gnp = icmp sle i32 %Aneg, %Cnp
+ %Cnx = or i32 %Aneg, %Bx
+ %Dnx = icmp slt i32 %Cnx, %Aneg
+ %Enx = icmp sge i32 %Cnx, %Aneg
+ %Fnx = icmp sgt i32 %Aneg, %Cnx
+ %Gnx = icmp sle i32 %Aneg, %Cnx
+ %Cnn = or i32 %Aneg, %Bneg
+ %Dnn = icmp slt i32 %Cnn, %Aneg
+ %Enn = icmp sge i32 %Cnn, %Aneg
+ %Fnn = icmp sgt i32 %Aneg, %Cnn
+ %Gnn = icmp sle i32 %Aneg, %Cnn
+
+ call void @helper_i1(i1 %Dpp)
+ call void @helper_i1(i1 %Epp)
+ call void @helper_i1(i1 %Fpp)
+ call void @helper_i1(i1 %Gpp)
+ call void @helper_i1(i1 %Dpx)
+ call void @helper_i1(i1 %Epx)
+ call void @helper_i1(i1 %Fpx)
+ call void @helper_i1(i1 %Gpx)
+ call void @helper_i1(i1 %Dpn)
+ call void @helper_i1(i1 %Epn)
+ call void @helper_i1(i1 %Fpn)
+ call void @helper_i1(i1 %Gpn)
+ call void @helper_i1(i1 %Dxp)
+ call void @helper_i1(i1 %Exp)
+ call void @helper_i1(i1 %Fxp)
+ call void @helper_i1(i1 %Gxp)
+ call void @helper_i1(i1 %Dxx)
+ call void @helper_i1(i1 %Exx)
+ call void @helper_i1(i1 %Fxx)
+ call void @helper_i1(i1 %Gxx)
+ call void @helper_i1(i1 %Dxn)
+ call void @helper_i1(i1 %Exn)
+ call void @helper_i1(i1 %Fxn)
+ call void @helper_i1(i1 %Gxn)
+ call void @helper_i1(i1 %Dnp)
+ call void @helper_i1(i1 %Enp)
+ call void @helper_i1(i1 %Fnp)
+ call void @helper_i1(i1 %Gnp)
+ call void @helper_i1(i1 %Dnx)
+ call void @helper_i1(i1 %Enx)
+ call void @helper_i1(i1 %Fnx)
+ call void @helper_i1(i1 %Gnx)
+ call void @helper_i1(i1 %Dnn)
+ call void @helper_i1(i1 %Enn)
+ call void @helper_i1(i1 %Fnn)
+ call void @helper_i1(i1 %Gnn)
+; CHECK-LABEL: @icmp_slt_sge_or
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 %Dpx)
+; CHECK: call void @helper_i1(i1 %Epx)
+; CHECK: call void @helper_i1(i1 %Fpx)
+; CHECK: call void @helper_i1(i1 %Gpx)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 %Dxx)
+; CHECK: call void @helper_i1(i1 %Exx)
+; CHECK: call void @helper_i1(i1 %Fxx)
+; CHECK: call void @helper_i1(i1 %Gxx)
+; CHECK: call void @helper_i1(i1 %Dxn)
+; CHECK: call void @helper_i1(i1 %Exn)
+; CHECK: call void @helper_i1(i1 %Fxn)
+; CHECK: call void @helper_i1(i1 %Gxn)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+; CHECK: call void @helper_i1(i1 false)
+; CHECK: call void @helper_i1(i1 true)
+ ret void
+}
diff --git a/test/Transforms/InstSimplify/fast-math.ll b/test/Transforms/InstSimplify/fast-math.ll
index 90532fa5db84..f4f31236e5cc 100644
--- a/test/Transforms/InstSimplify/fast-math.ll
+++ b/test/Transforms/InstSimplify/fast-math.ll
@@ -1,44 +1,60 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
;; x * 0 ==> 0 when no-nans and no-signed-zero
-; CHECK: mul_zero_1
define float @mul_zero_1(float %a) {
+; CHECK-LABEL: @mul_zero_1(
+; CHECK: ret float 0.000000e+00
+;
%b = fmul nsz nnan float %a, 0.0
-; CHECK: ret float 0.0
ret float %b
}
-; CHECK: mul_zero_2
+
define float @mul_zero_2(float %a) {
+; CHECK-LABEL: @mul_zero_2(
+; CHECK: ret float 0.000000e+00
+;
%b = fmul fast float 0.0, %a
-; CHECK: ret float 0.0
ret float %b
}
;; x * 0 =/=> 0 when there could be nans or -0
-; CHECK: no_mul_zero_1
define float @no_mul_zero_1(float %a) {
+; CHECK-LABEL: @no_mul_zero_1(
+; CHECK: [[B:%.*]] = fmul nsz float %a, 0.000000e+00
+; CHECK-NEXT: ret float [[B]]
+;
%b = fmul nsz float %a, 0.0
-; CHECK: ret float %b
ret float %b
}
-; CHECK: no_mul_zero_2
+
define float @no_mul_zero_2(float %a) {
+; CHECK-LABEL: @no_mul_zero_2(
+; CHECK: [[B:%.*]] = fmul nnan float %a, 0.000000e+00
+; CHECK-NEXT: ret float [[B]]
+;
%b = fmul nnan float %a, 0.0
-; CHECK: ret float %b
ret float %b
}
-; CHECK: no_mul_zero_3
+
define float @no_mul_zero_3(float %a) {
+; CHECK-LABEL: @no_mul_zero_3(
+; CHECK: [[B:%.*]] = fmul float %a, 0.000000e+00
+; CHECK-NEXT: ret float [[B]]
+;
%b = fmul float %a, 0.0
-; CHECK: ret float %b
ret float %b
}
; fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0
; where nnan and ninf have to occur at least once somewhere in this
; expression
-; CHECK: fadd_fsub_0
define float @fadd_fsub_0(float %a) {
+; CHECK-LABEL: @fadd_fsub_0(
+; CHECK: [[NOFOLD:%.*]] = fsub float 0.000000e+00, %a
+; CHECK-NEXT: [[NO_ZERO:%.*]] = fadd nnan float [[NOFOLD]], %a
+; CHECK-NEXT: ret float [[NO_ZERO]]
+;
; X + -X ==> 0
%t1 = fsub nnan ninf float 0.0, %a
%zero1 = fadd nnan ninf float %t1, %a
@@ -53,9 +69,7 @@ define float @fadd_fsub_0(float %a) {
%zero4 = fadd nnan ninf float %t4, %a
; Dont fold this
-; CHECK: %nofold = fsub float 0.0
%nofold = fsub float 0.0, %a
-; CHECK: %no_zero = fadd nnan float %nofold, %a
%no_zero = fadd nnan float %nofold, %a
; Coalesce the folded zeros
@@ -66,108 +80,126 @@ define float @fadd_fsub_0(float %a) {
; Should get folded
%ret = fadd nsz float %no_zero, %zero7
-; CHECK: ret float %no_zero
ret float %ret
}
; fsub nnan x, x ==> 0.0
-; CHECK-LABEL: @fsub_x_x(
define float @fsub_x_x(float %a) {
+; CHECK-LABEL: @fsub_x_x(
+; CHECK: [[NO_ZERO1:%.*]] = fsub ninf float %a, %a
+; CHECK-NEXT: [[NO_ZERO2:%.*]] = fsub float %a, %a
+; CHECK-NEXT: [[NO_ZERO:%.*]] = fadd float [[NO_ZERO1:%.*]], [[NO_ZERO2:%.*]]
+; CHECK-NEXT: ret float [[NO_ZERO]]
+;
; X - X ==> 0
%zero1 = fsub nnan float %a, %a
; Dont fold
-; CHECK: %no_zero1 = fsub
%no_zero1 = fsub ninf float %a, %a
-; CHECK: %no_zero2 = fsub
%no_zero2 = fsub float %a, %a
-; CHECK: %no_zero = fadd
%no_zero = fadd float %no_zero1, %no_zero2
; Should get folded
%ret = fadd nsz float %no_zero, %zero1
-; CHECK: ret float %no_zero
+ ret float %ret
+}
+
+; fsub nsz 0.0, (fsub 0.0, X) ==> X
+define float @fsub_0_0_x(float %a) {
+; CHECK-LABEL: @fsub_0_0_x(
+; CHECK: ret float %a
+;
+ %t1 = fsub float 0.0, %a
+ %ret = fsub nsz float 0.0, %t1
ret float %ret
}
; fadd nsz X, 0 ==> X
-; CHECK-LABEL: @nofold_fadd_x_0(
define float @nofold_fadd_x_0(float %a) {
+; CHECK-LABEL: @nofold_fadd_x_0(
+; CHECK: [[NO_ZERO1:%.*]] = fadd ninf float %a, 0.000000e+00
+; CHECK-NEXT: [[NO_ZERO2:%.*]] = fadd nnan float %a, 0.000000e+00
+; CHECK-NEXT: [[NO_ZERO:%.*]] = fadd float [[NO_ZERO1:%.*]], [[NO_ZERO2:%.*]]
+; CHECK-NEXT: ret float [[NO_ZERO]]
+;
; Dont fold
-; CHECK: %no_zero1 = fadd
%no_zero1 = fadd ninf float %a, 0.0
-; CHECK: %no_zero2 = fadd
%no_zero2 = fadd nnan float %a, 0.0
-; CHECK: %no_zero = fadd
%no_zero = fadd float %no_zero1, %no_zero2
-
-; CHECK: ret float %no_zero
ret float %no_zero
}
; fdiv nsz nnan 0, X ==> 0
define double @fdiv_zero_by_x(double %X) {
; CHECK-LABEL: @fdiv_zero_by_x(
+; CHECK: ret double 0.000000e+00
+;
; 0 / X -> 0
%r = fdiv nnan nsz double 0.0, %X
ret double %r
-; CHECK: ret double 0
}
define float @fdiv_self(float %f) {
+; CHECK-LABEL: @fdiv_self(
+; CHECK: ret float 1.000000e+00
+;
%div = fdiv nnan float %f, %f
ret float %div
-; CHECK-LABEL: fdiv_self
-; CHECK: ret float 1.000000e+00
}
define float @fdiv_self_invalid(float %f) {
+; CHECK-LABEL: @fdiv_self_invalid(
+; CHECK: [[DIV:%.*]] = fdiv float %f, %f
+; CHECK-NEXT: ret float [[DIV]]
+;
%div = fdiv float %f, %f
ret float %div
-; CHECK-LABEL: fdiv_self_invalid
-; CHECK: %div = fdiv float %f, %f
-; CHECK-NEXT: ret float %div
}
define float @fdiv_neg1(float %f) {
+; CHECK-LABEL: @fdiv_neg1(
+; CHECK: ret float -1.000000e+00
+;
%neg = fsub fast float -0.000000e+00, %f
%div = fdiv nnan float %neg, %f
ret float %div
-; CHECK-LABEL: fdiv_neg1
-; CHECK: ret float -1.000000e+00
}
define float @fdiv_neg2(float %f) {
+; CHECK-LABEL: @fdiv_neg2(
+; CHECK: ret float -1.000000e+00
+;
%neg = fsub fast float 0.000000e+00, %f
%div = fdiv nnan float %neg, %f
ret float %div
-; CHECK-LABEL: fdiv_neg2
-; CHECK: ret float -1.000000e+00
}
define float @fdiv_neg_invalid(float %f) {
+; CHECK-LABEL: @fdiv_neg_invalid(
+; CHECK: [[NEG:%.*]] = fsub fast float -0.000000e+00, %f
+; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[NEG]], %f
+; CHECK-NEXT: ret float [[DIV]]
+;
%neg = fsub fast float -0.000000e+00, %f
%div = fdiv float %neg, %f
ret float %div
-; CHECK-LABEL: fdiv_neg_invalid
-; CHECK: %neg = fsub fast float -0.000000e+00, %f
-; CHECK-NEXT: %div = fdiv float %neg, %f
-; CHECK-NEXT: ret float %div
}
define float @fdiv_neg_swapped1(float %f) {
+; CHECK-LABEL: @fdiv_neg_swapped1(
+; CHECK: ret float -1.000000e+00
+;
%neg = fsub float -0.000000e+00, %f
%div = fdiv nnan float %f, %neg
ret float %div
-; CHECK-LABEL: fdiv_neg_swapped1
-; CHECK: ret float -1.000000e+00
}
define float @fdiv_neg_swapped2(float %f) {
+; CHECK-LABEL: @fdiv_neg_swapped2(
+; CHECK: ret float -1.000000e+00
+;
%neg = fsub float 0.000000e+00, %f
%div = fdiv nnan float %f, %neg
ret float %div
-; CHECK-LABEL: fdiv_neg_swapped2
-; CHECK: ret float -1.000000e+00
}
diff --git a/test/Transforms/InstSimplify/fdiv.ll b/test/Transforms/InstSimplify/fdiv.ll
index 53ad25d07476..bb7f443f4238 100644
--- a/test/Transforms/InstSimplify/fdiv.ll
+++ b/test/Transforms/InstSimplify/fdiv.ll
@@ -1,17 +1,20 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
define double @fdiv_of_undef(double %X) {
; CHECK-LABEL: @fdiv_of_undef(
+; CHECK: ret double undef
+;
; undef / X -> undef
%r = fdiv double undef, %X
ret double %r
-; CHECK: ret double undef
}
define double @fdiv_by_undef(double %X) {
; CHECK-LABEL: @fdiv_by_undef(
+; CHECK: ret double undef
+;
; X / undef -> undef
%r = fdiv double %X, undef
ret double %r
-; CHECK: ret double undef
}
diff --git a/test/Transforms/InstSimplify/floating-point-arithmetic.ll b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
index b0957a817739..b825ac80b6d6 100644
--- a/test/Transforms/InstSimplify/floating-point-arithmetic.ll
+++ b/test/Transforms/InstSimplify/floating-point-arithmetic.ll
@@ -1,51 +1,83 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
-; fsub 0, (fsub 0, X) ==> X
-; CHECK-LABEL: @fsub_0_0_x(
-define float @fsub_0_0_x(float %a) {
+; fsub -0.0, (fsub -0.0, X) ==> X
+define float @fsub_-0_-0_x(float %a) {
+; CHECK-LABEL: @fsub_-0_-0_x(
+; CHECK: ret float %a
+;
%t1 = fsub float -0.0, %a
%ret = fsub float -0.0, %t1
+ ret float %ret
+}
+
+; fsub 0.0, (fsub -0.0, X) != X
+define float @fsub_0_-0_x(float %a) {
+; CHECK-LABEL: @fsub_0_-0_x(
+; CHECK: [[T1:%.*]] = fsub float 0.000000e+00, %a
+; CHECK-NEXT: [[RET:%.*]] = fsub float -0.000000e+00, [[T1]]
+; CHECK-NEXT: ret float [[RET]]
+;
+ %t1 = fsub float 0.0, %a
+ %ret = fsub float -0.0, %t1
+ ret float %ret
+}
-; CHECK: ret float %a
+; fsub -0.0, (fsub 0.0, X) != X
+define float @fsub_-0_0_x(float %a) {
+; CHECK-LABEL: @fsub_-0_0_x(
+; CHECK: [[T1:%.*]] = fsub float -0.000000e+00, %a
+; CHECK-NEXT: [[RET:%.*]] = fsub float 0.000000e+00, [[T1]]
+; CHECK-NEXT: ret float [[RET]]
+;
+ %t1 = fsub float -0.0, %a
+ %ret = fsub float 0.0, %t1
ret float %ret
}
; fsub X, 0 ==> X
-; CHECK-LABEL: @fsub_x_0(
define float @fsub_x_0(float %a) {
+; CHECK-LABEL: @fsub_x_0(
+; CHECK: ret float %a
+;
%ret = fsub float %a, 0.0
-; CHECK: ret float %a
ret float %ret
}
; fadd X, -0 ==> X
-; CHECK-LABEL: @fadd_x_n0(
define float @fadd_x_n0(float %a) {
+; CHECK-LABEL: @fadd_x_n0(
+; CHECK: ret float %a
+;
%ret = fadd float %a, -0.0
-; CHECK: ret float %a
ret float %ret
}
; fmul X, 1.0 ==> X
-; CHECK-LABEL: @fmul_X_1(
define double @fmul_X_1(double %a) {
- %b = fmul double 1.000000e+00, %a ; <double> [#uses=1]
- ; CHECK: ret double %a
+; CHECK-LABEL: @fmul_X_1(
+; CHECK: ret double %a
+;
+ %b = fmul double 1.000000e+00, %a
ret double %b
}
; We can't optimize away the fadd in this test because the input
-; value to the function and subsequently to the fadd may be -0.0.
+; value to the function and subsequently to the fadd may be -0.0.
; In that one special case, the result of the fadd should be +0.0
; rather than the first parameter of the fadd.
-; Fragile test warning: We need 6 sqrt calls to trigger the bug
-; because the internal logic has a magic recursion limit of 6.
+; Fragile test warning: We need 6 sqrt calls to trigger the bug
+; because the internal logic has a magic recursion limit of 6.
; This is presented without any explanation or ability to customize.
declare float @sqrtf(float)
define float @PR22688(float %x) {
+; CHECK-LABEL: @PR22688(
+; CHECK: [[TMP7:%.*]] = fadd float {{%.*}}, 0.000000e+00
+; CHECK-NEXT: ret float [[TMP7]]
+;
%1 = call float @sqrtf(float %x)
%2 = call float @sqrtf(float %1)
%3 = call float @sqrtf(float %2)
@@ -54,8 +86,5 @@ define float @PR22688(float %x) {
%6 = call float @sqrtf(float %5)
%7 = fadd float %6, 0.0
ret float %7
-
-; CHECK-LABEL: @PR22688(
-; CHECK: fadd float %6, 0.0
}
diff --git a/test/Transforms/InstSimplify/floating-point-compare.ll b/test/Transforms/InstSimplify/floating-point-compare.ll
index b148d9961d33..7c67ffb4be0d 100644
--- a/test/Transforms/InstSimplify/floating-point-compare.ll
+++ b/test/Transforms/InstSimplify/floating-point-compare.ll
@@ -1,7 +1,8 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
; These tests choose arbitrarily between float and double,
-; and between uge and olt, to give reasonble coverage
+; and between uge and olt, to give reasonble coverage
; without combinatorial explosion.
declare float @llvm.fabs.f32(float)
@@ -15,8 +16,10 @@ declare float @llvm.fma.f32(float,float,float)
declare void @expect_equal(i1,i1)
-; CHECK-LABEL: @orderedLessZeroTree(
define i1 @orderedLessZeroTree(float,float,float,float) {
+; CHECK-LABEL: @orderedLessZeroTree(
+; CHECK: ret i1 true
+;
%square = fmul float %0, %0
%abs = call float @llvm.fabs.f32(float %1)
%sqrt = call float @llvm.sqrt.f32(float %2)
@@ -25,30 +28,33 @@ define i1 @orderedLessZeroTree(float,float,float,float) {
%rem = frem float %sqrt, %fma
%add = fadd float %div, %rem
%uge = fcmp uge float %add, 0.000000e+00
-; CHECK: ret i1 true
ret i1 %uge
}
-; CHECK-LABEL: @orderedLessZeroExpExt(
define i1 @orderedLessZeroExpExt(float) {
+; CHECK-LABEL: @orderedLessZeroExpExt(
+; CHECK: ret i1 true
+;
%a = call float @llvm.exp.f32(float %0)
%b = fpext float %a to double
%uge = fcmp uge double %b, 0.000000e+00
-; CHECK: ret i1 true
ret i1 %uge
}
-; CHECK-LABEL: @orderedLessZeroExp2Trunc(
define i1 @orderedLessZeroExp2Trunc(double) {
+; CHECK-LABEL: @orderedLessZeroExp2Trunc(
+; CHECK: ret i1 false
+;
%a = call double @llvm.exp2.f64(double %0)
%b = fptrunc double %a to float
%olt = fcmp olt float %b, 0.000000e+00
-; CHECK: ret i1 false
ret i1 %olt
}
-; CHECK-LABEL: @orderedLessZeroPowi(
define i1 @orderedLessZeroPowi(double,double) {
+; CHECK-LABEL: @orderedLessZeroPowi(
+; CHECK: ret i1 false
+;
; Even constant exponent
%a = call double @llvm.powi.f64(double %0, i32 2)
%square = fmul double %1, %1
@@ -56,61 +62,64 @@ define i1 @orderedLessZeroPowi(double,double) {
%b = call double @llvm.powi.f64(double %square, i32 3)
%c = fadd double %a, %b
%olt = fcmp olt double %b, 0.000000e+00
-; CHECK: ret i1 false
ret i1 %olt
}
-; CHECK-LABEL: @orderedLessZeroUIToFP(
define i1 @orderedLessZeroUIToFP(i32) {
+; CHECK-LABEL: @orderedLessZeroUIToFP(
+; CHECK: ret i1 true
+;
%a = uitofp i32 %0 to float
%uge = fcmp uge float %a, 0.000000e+00
-; CHECK: ret i1 true
ret i1 %uge
}
-; CHECK-LABEL: @orderedLessZeroSelect(
define i1 @orderedLessZeroSelect(float, float) {
+; CHECK-LABEL: @orderedLessZeroSelect(
+; CHECK: ret i1 true
+;
%a = call float @llvm.exp.f32(float %0)
%b = call float @llvm.fabs.f32(float %1)
%c = fcmp olt float %0, %1
%d = select i1 %c, float %a, float %b
%e = fadd float %d, 1.0
%uge = fcmp uge float %e, 0.000000e+00
-; CHECK: ret i1 true
ret i1 %uge
}
-; CHECK-LABEL: @orderedLessZeroMinNum(
define i1 @orderedLessZeroMinNum(float, float) {
+; CHECK-LABEL: @orderedLessZeroMinNum(
+; CHECK: ret i1 true
+;
%a = call float @llvm.exp.f32(float %0)
%b = call float @llvm.fabs.f32(float %1)
%c = call float @llvm.minnum.f32(float %a, float %b)
%uge = fcmp uge float %c, 0.000000e+00
-; CHECK: ret i1 true
ret i1 %uge
}
-; CHECK-LABEL: @orderedLessZeroMaxNum(
define i1 @orderedLessZeroMaxNum(float, float) {
+; CHECK-LABEL: @orderedLessZeroMaxNum(
+; CHECK: ret i1 true
+;
%a = call float @llvm.exp.f32(float %0)
%b = call float @llvm.maxnum.f32(float %a, float %1)
%uge = fcmp uge float %b, 0.000000e+00
-; CHECK: ret i1 true
ret i1 %uge
}
define i1 @nonans1(double %in1, double %in2) {
+; CHECK-LABEL: @nonans1(
+; CHECK: ret i1 false
+;
%cmp = fcmp nnan uno double %in1, %in2
ret i1 %cmp
-
-; CHECK-LABEL: @nonans1
-; CHECK-NEXT: ret i1 false
}
define i1 @nonans2(double %in1, double %in2) {
+; CHECK-LABEL: @nonans2(
+; CHECK: ret i1 true
+;
%cmp = fcmp nnan ord double %in1, %in2
ret i1 %cmp
-
-; CHECK-LABEL: @nonans2
-; CHECK-NEXT: ret i1 true
}
diff --git a/test/Transforms/InstSimplify/implies.ll b/test/Transforms/InstSimplify/implies.ll
index 2e3c9591b079..56e1e6a29c49 100644
--- a/test/Transforms/InstSimplify/implies.ll
+++ b/test/Transforms/InstSimplify/implies.ll
@@ -1,9 +1,11 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt -S %s -instsimplify | FileCheck %s
; A ==> A -> true
define i1 @test(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test
-; CHECK: ret i1 true
+; CHECK-LABEL: @test(
+; CHECK: ret i1 true
+;
%var29 = icmp slt i32 %i, %length.i
%res = icmp uge i1 %var29, %var29
ret i1 %res
@@ -11,8 +13,9 @@ define i1 @test(i32 %length.i, i32 %i) {
; i +_{nsw} C_{>0} <s L ==> i <s L -> true
define i1 @test2(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test2
-; CHECK: ret i1 true
+; CHECK-LABEL: @test2(
+; CHECK: ret i1 true
+;
%iplus1 = add nsw i32 %i, 1
%var29 = icmp slt i32 %i, %length.i
%var30 = icmp slt i32 %iplus1, %length.i
@@ -22,8 +25,13 @@ define i1 @test2(i32 %length.i, i32 %i) {
; i + C_{>0} <s L ==> i <s L -> unknown without the nsw
define i1 @test2_neg(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test2_neg
-; CHECK: ret i1 %res
+; CHECK-LABEL: @test2_neg(
+; CHECK: [[IPLUS1:%.*]] = add i32 %i, 1
+; CHECK-NEXT: [[VAR29:%.*]] = icmp slt i32 %i, %length.i
+; CHECK-NEXT: [[VAR30:%.*]] = icmp slt i32 [[IPLUS1]], %length.i
+; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[VAR30]], [[VAR29]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
%iplus1 = add i32 %i, 1
%var29 = icmp slt i32 %i, %length.i
%var30 = icmp slt i32 %iplus1, %length.i
@@ -33,8 +41,13 @@ define i1 @test2_neg(i32 %length.i, i32 %i) {
; sle is not implication
define i1 @test2_neg2(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test2_neg2
-; CHECK: ret i1 %res
+; CHECK-LABEL: @test2_neg2(
+; CHECK: [[IPLUS1:%.*]] = add i32 %i, 1
+; CHECK-NEXT: [[VAR29:%.*]] = icmp slt i32 %i, %length.i
+; CHECK-NEXT: [[VAR30:%.*]] = icmp slt i32 [[IPLUS1]], %length.i
+; CHECK-NEXT: [[RES:%.*]] = icmp sle i1 [[VAR30]], [[VAR29]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
%iplus1 = add i32 %i, 1
%var29 = icmp slt i32 %i, %length.i
%var30 = icmp slt i32 %iplus1, %length.i
@@ -44,8 +57,13 @@ define i1 @test2_neg2(i32 %length.i, i32 %i) {
; The binary operator has to be an add
define i1 @test2_neg3(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test2_neg3
-; CHECK: ret i1 %res
+; CHECK-LABEL: @test2_neg3(
+; CHECK: [[IPLUS1:%.*]] = sub nsw i32 %i, 1
+; CHECK-NEXT: [[VAR29:%.*]] = icmp slt i32 %i, %length.i
+; CHECK-NEXT: [[VAR30:%.*]] = icmp slt i32 [[IPLUS1]], %length.i
+; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[VAR30]], [[VAR29]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
%iplus1 = sub nsw i32 %i, 1
%var29 = icmp slt i32 %i, %length.i
%var30 = icmp slt i32 %iplus1, %length.i
@@ -56,8 +74,9 @@ define i1 @test2_neg3(i32 %length.i, i32 %i) {
; i +_{nsw} C_{>0} <s L ==> i <s L -> true
; With an inverted conditional (ule B A rather than canonical ugt A B
define i1 @test3(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test3
-; CHECK: ret i1 true
+; CHECK-LABEL: @test3(
+; CHECK: ret i1 true
+;
%iplus1 = add nsw i32 %i, 1
%var29 = icmp slt i32 %i, %length.i
%var30 = icmp slt i32 %iplus1, %length.i
@@ -67,8 +86,9 @@ define i1 @test3(i32 %length.i, i32 %i) {
; i +_{nuw} C <u L ==> i <u L
define i1 @test4(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test4
-; CHECK: ret i1 true
+; CHECK-LABEL: @test4(
+; CHECK: ret i1 true
+;
%iplus1 = add nuw i32 %i, 1
%var29 = icmp ult i32 %i, %length.i
%var30 = icmp ult i32 %iplus1, %length.i
@@ -78,16 +98,19 @@ define i1 @test4(i32 %length.i, i32 %i) {
; A ==> A for vectors
define <4 x i1> @test5(<4 x i1> %vec) {
-; CHECK-LABEL: @test5
-; CHECK: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+; CHECK-LABEL: @test5(
+; CHECK: ret <4 x i1> <i1 true, i1 true, i1 true, i1 true>
+;
%res = icmp ule <4 x i1> %vec, %vec
ret <4 x i1> %res
}
; Don't crash on vector inputs - pr25040
define <4 x i1> @test6(<4 x i1> %a, <4 x i1> %b) {
-; CHECK-LABEL: @test6
-; CHECK: ret <4 x i1> %res
+; CHECK-LABEL: @test6(
+; CHECK: [[RES:%.*]] = icmp ule <4 x i1> %a, %b
+; CHECK-NEXT: ret <4 x i1> [[RES]]
+;
%res = icmp ule <4 x i1> %a, %b
ret <4 x i1> %res
}
@@ -95,7 +118,8 @@ define <4 x i1> @test6(<4 x i1> %a, <4 x i1> %b) {
; i +_{nsw} 1 <s L ==> i < L +_{nsw} 1
define i1 @test7(i32 %length.i, i32 %i) {
; CHECK-LABEL: @test7(
-; CHECK: ret i1 true
+; CHECK: ret i1 true
+;
%iplus1 = add nsw i32 %i, 1
%len.plus.one = add nsw i32 %length.i, 1
%var29 = icmp slt i32 %i, %len.plus.one
@@ -104,10 +128,11 @@ define i1 @test7(i32 %length.i, i32 %i) {
ret i1 %res
}
-; i +_{nuw} 1 <s L ==> i < L +_{nuw} 1
+; i +_{nuw} 1 <u L ==> i < L +_{nuw} 1
define i1 @test8(i32 %length.i, i32 %i) {
; CHECK-LABEL: @test8(
-; CHECK: ret i1 true
+; CHECK: ret i1 true
+;
%iplus1 = add nuw i32 %i, 1
%len.plus.one = add nuw i32 %length.i, 1
%var29 = icmp ult i32 %i, %len.plus.one
@@ -116,10 +141,11 @@ define i1 @test8(i32 %length.i, i32 %i) {
ret i1 %res
}
-; i +_{nuw} C <s L ==> i < L, even if C is negative
+; i +_{nuw} C <u L ==> i < L, even if C is negative
define i1 @test9(i32 %length.i, i32 %i) {
; CHECK-LABEL: @test9(
-; CHECK: ret i1 true
+; CHECK: ret i1 true
+;
%iplus1 = add nuw i32 %i, -100
%var29 = icmp ult i32 %i, %length.i
%var30 = icmp ult i32 %iplus1, %length.i
@@ -129,8 +155,8 @@ define i1 @test9(i32 %length.i, i32 %i) {
define i1 @test10(i32 %length.i, i32 %x.full) {
; CHECK-LABEL: @test10(
-; CHECK: ret i1 true
-
+; CHECK: ret i1 true
+;
%x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000
%large = or i32 %x, 100
%small = or i32 %x, 90
@@ -142,9 +168,13 @@ define i1 @test10(i32 %length.i, i32 %x.full) {
define i1 @test11(i32 %length.i, i32 %x) {
; CHECK-LABEL: @test11(
-; CHECK: %res = icmp ule i1 %known, %to.prove
-; CHECK: ret i1 %res
-
+; CHECK: [[LARGE:%.*]] = or i32 %x, 100
+; CHECK-NEXT: [[SMALL:%.*]] = or i32 %x, 90
+; CHECK-NEXT: [[KNOWN:%.*]] = icmp ult i32 [[LARGE]], %length.i
+; CHECK-NEXT: [[TO_PROVE:%.*]] = icmp ult i32 [[SMALL]], %length.i
+; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[KNOWN]], [[TO_PROVE]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
%large = or i32 %x, 100
%small = or i32 %x, 90
%known = icmp ult i32 %large, %length.i
@@ -155,9 +185,14 @@ define i1 @test11(i32 %length.i, i32 %x) {
define i1 @test12(i32 %length.i, i32 %x.full) {
; CHECK-LABEL: @test12(
-; CHECK: %res = icmp ule i1 %known, %to.prove
-; CHECK: ret i1 %res
-
+; CHECK: [[X:%.*]] = and i32 [[X:%.*]].full, -65536
+; CHECK-NEXT: [[LARGE:%.*]] = or i32 [[X]], 65536
+; CHECK-NEXT: [[SMALL:%.*]] = or i32 [[X]], 90
+; CHECK-NEXT: [[KNOWN:%.*]] = icmp ult i32 [[LARGE]], %length.i
+; CHECK-NEXT: [[TO_PROVE:%.*]] = icmp ult i32 [[SMALL]], %length.i
+; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[KNOWN]], [[TO_PROVE]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
%x = and i32 %x.full, 4294901760 ;; 4294901760 == 0xffff0000
%large = or i32 %x, 65536 ;; 65536 == 0x00010000
%small = or i32 %x, 90
@@ -169,8 +204,8 @@ define i1 @test12(i32 %length.i, i32 %x.full) {
define i1 @test13(i32 %length.i, i32 %x) {
; CHECK-LABEL: @test13(
-; CHECK: ret i1 true
-
+; CHECK: ret i1 true
+;
%large = add nuw i32 %x, 100
%small = add nuw i32 %x, 90
%known = icmp ult i32 %large, %length.i
@@ -181,8 +216,8 @@ define i1 @test13(i32 %length.i, i32 %x) {
define i1 @test14(i32 %length.i, i32 %x.full) {
; CHECK-LABEL: @test14(
-; CHECK: ret i1 true
-
+; CHECK: ret i1 true
+;
%x = and i32 %x.full, 4294905615 ;; 4294905615 == 0xffff0f0f
%large = or i32 %x, 8224 ;; == 0x2020
%small = or i32 %x, 4112 ;; == 0x1010
@@ -194,9 +229,13 @@ define i1 @test14(i32 %length.i, i32 %x.full) {
define i1 @test15(i32 %length.i, i32 %x) {
; CHECK-LABEL: @test15(
-; CHECK: %res = icmp ule i1 %known, %to.prove
-; CHECK: ret i1 %res
-
+; CHECK: [[LARGE:%.*]] = add nuw i32 %x, 100
+; CHECK-NEXT: [[SMALL:%.*]] = add nuw i32 %x, 110
+; CHECK-NEXT: [[KNOWN:%.*]] = icmp ult i32 [[LARGE]], %length.i
+; CHECK-NEXT: [[TO_PROVE:%.*]] = icmp ult i32 [[SMALL]], %length.i
+; CHECK-NEXT: [[RES:%.*]] = icmp ule i1 [[KNOWN]], [[TO_PROVE]]
+; CHECK-NEXT: ret i1 [[RES]]
+;
%large = add nuw i32 %x, 100
%small = add nuw i32 %x, 110
%known = icmp ult i32 %large, %length.i
@@ -207,8 +246,9 @@ define i1 @test15(i32 %length.i, i32 %x) {
; X >=(s) Y == X ==> Y (i1 1 becomes -1 for reasoning)
define i1 @test_sge(i32 %length.i, i32 %i) {
-; CHECK-LABEL: @test_sge
-; CHECK: ret i1 true
+; CHECK-LABEL: @test_sge(
+; CHECK: ret i1 true
+;
%iplus1 = add nsw nuw i32 %i, 1
%var29 = icmp ult i32 %i, %length.i
%var30 = icmp ult i32 %iplus1, %length.i
diff --git a/test/Transforms/InstSimplify/load-relative-32.ll b/test/Transforms/InstSimplify/load-relative-32.ll
new file mode 100644
index 000000000000..a38de8549dbd
--- /dev/null
+++ b/test/Transforms/InstSimplify/load-relative-32.ll
@@ -0,0 +1,19 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
+target triple = "i386-unknown-linux-gnu"
+
+@a = external global i8
+
+@c1 = constant [3 x i32] [i32 0, i32 0,
+i32 sub (i32 ptrtoint (i8* @a to i32), i32 ptrtoint (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i32))
+]
+
+; CHECK: @f1
+define i8* @f1() {
+ ; CHECK: ret i8* @a
+ %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([3 x i32], [3 x i32]* @c1, i32 0, i32 2) to i8*), i32 0)
+ ret i8* %l
+}
+
+declare i8* @llvm.load.relative.i32(i8*, i32)
diff --git a/test/Transforms/InstSimplify/load-relative.ll b/test/Transforms/InstSimplify/load-relative.ll
new file mode 100644
index 000000000000..3074ede2b697
--- /dev/null
+++ b/test/Transforms/InstSimplify/load-relative.ll
@@ -0,0 +1,75 @@
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = external global i8
+@b = external global i8
+
+@c1 = constant i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* @c1 to i64)) to i32)
+@c2 = constant [7 x i32] [i32 0, i32 0,
+i32 trunc (i64 sub (i64 ptrtoint (i8* @a to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
+i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
+i32 trunc (i64 add (i64 ptrtoint (i8* @b to i64), i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32),
+i32 trunc (i64 sub (i64 ptrtoint (i8* @b to i64), i64 1) to i32),
+i32 trunc (i64 sub (i64 0, i64 ptrtoint (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i32 0, i32 2) to i64)) to i32)
+]
+
+; CHECK: @f1
+define i8* @f1() {
+ ; CHECK: ret i8* @a
+ %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* @c1 to i8*), i32 0)
+ ret i8* %l
+}
+
+; CHECK: @f2
+define i8* @f2() {
+ ; CHECK: ret i8* @a
+ %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 0)
+ ret i8* %l
+}
+
+; CHECK: @f3
+define i8* @f3() {
+ ; CHECK: ret i8* @b
+ %l = call i8* @llvm.load.relative.i64(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i64 4)
+ ret i8* %l
+}
+
+; CHECK: @f4
+define i8* @f4() {
+ ; CHECK: ret i8* %
+ %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 1)
+ ret i8* %l
+}
+
+; CHECK: @f5
+define i8* @f5() {
+ ; CHECK: ret i8* %
+ %l = call i8* @llvm.load.relative.i32(i8* zeroinitializer, i32 0)
+ ret i8* %l
+}
+
+; CHECK: @f6
+define i8* @f6() {
+ ; CHECK: ret i8* %
+ %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 8)
+ ret i8* %l
+}
+
+; CHECK: @f7
+define i8* @f7() {
+ ; CHECK: ret i8* %
+ %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 12)
+ ret i8* %l
+}
+
+; CHECK: @f8
+define i8* @f8() {
+ ; CHECK: ret i8* %
+ %l = call i8* @llvm.load.relative.i32(i8* bitcast (i32* getelementptr ([7 x i32], [7 x i32]* @c2, i64 0, i64 2) to i8*), i32 16)
+ ret i8* %l
+}
+
+declare i8* @llvm.load.relative.i32(i8*, i32)
+declare i8* @llvm.load.relative.i64(i8*, i64)
diff --git a/test/Transforms/InstSimplify/load.ll b/test/Transforms/InstSimplify/load.ll
index ab87d4b9c535..8b2b5a17660f 100644
--- a/test/Transforms/InstSimplify/load.ll
+++ b/test/Transforms/InstSimplify/load.ll
@@ -1,19 +1,30 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
@zeroinit = constant {} zeroinitializer
@undef = constant {} undef
define i32 @crash_on_zeroinit() {
-; CHECK-LABEL: @crash_on_zeroinit
-; CHECK: ret i32 0
+; CHECK-LABEL: @crash_on_zeroinit(
+; CHECK: ret i32 0
+;
%load = load i32, i32* bitcast ({}* @zeroinit to i32*)
ret i32 %load
}
define i32 @crash_on_undef() {
-; CHECK-LABEL: @crash_on_undef
-; CHECK: ret i32 undef
+; CHECK-LABEL: @crash_on_undef(
+; CHECK: ret i32 undef
+;
%load = load i32, i32* bitcast ({}* @undef to i32*)
ret i32 %load
}
+@GV = private constant [8 x i32] [i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49]
+
+define <8 x i32> @partial_load() {
+; CHECK-LABEL: @partial_load(
+; CHECK: ret <8 x i32> <i32 0, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48>
+ %load = load <8 x i32>, <8 x i32>* bitcast (i32* getelementptr ([8 x i32], [8 x i32]* @GV, i64 0, i64 -1) to <8 x i32>*)
+ ret <8 x i32> %load
+}
diff --git a/test/Transforms/InstSimplify/maxmin.ll b/test/Transforms/InstSimplify/maxmin.ll
index 3c643ed3e03e..3fcbfec2f63a 100644
--- a/test/Transforms/InstSimplify/maxmin.ll
+++ b/test/Transforms/InstSimplify/maxmin.ll
@@ -1,269 +1,302 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
define i1 @max1(i32 %x, i32 %y) {
; CHECK-LABEL: @max1(
+; CHECK: ret i1 false
+;
%c = icmp sgt i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp slt i32 %m, %x
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @max2(i32 %x, i32 %y) {
; CHECK-LABEL: @max2(
+; CHECK: ret i1 true
+;
%c = icmp sge i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp sge i32 %m, %x
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @max3(i32 %x, i32 %y) {
; CHECK-LABEL: @max3(
+; CHECK: ret i1 false
+;
%c = icmp ugt i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp ult i32 %m, %x
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @max4(i32 %x, i32 %y) {
; CHECK-LABEL: @max4(
+; CHECK: ret i1 true
+;
%c = icmp uge i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp uge i32 %m, %x
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @max5(i32 %x, i32 %y) {
; CHECK-LABEL: @max5(
+; CHECK: ret i1 false
+;
%c = icmp sgt i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp sgt i32 %x, %m
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @max6(i32 %x, i32 %y) {
; CHECK-LABEL: @max6(
+; CHECK: ret i1 true
+;
%c = icmp sge i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp sle i32 %x, %m
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @max7(i32 %x, i32 %y) {
; CHECK-LABEL: @max7(
+; CHECK: ret i1 false
+;
%c = icmp ugt i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp ugt i32 %x, %m
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @max8(i32 %x, i32 %y) {
; CHECK-LABEL: @max8(
+; CHECK: ret i1 true
+;
%c = icmp uge i32 %x, %y
%m = select i1 %c, i32 %x, i32 %y
%r = icmp ule i32 %x, %m
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @min1(i32 %x, i32 %y) {
; CHECK-LABEL: @min1(
+; CHECK: ret i1 false
+;
%c = icmp sgt i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp sgt i32 %m, %x
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @min2(i32 %x, i32 %y) {
; CHECK-LABEL: @min2(
+; CHECK: ret i1 true
+;
%c = icmp sge i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp sle i32 %m, %x
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @min3(i32 %x, i32 %y) {
; CHECK-LABEL: @min3(
+; CHECK: ret i1 false
+;
%c = icmp ugt i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp ugt i32 %m, %x
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @min4(i32 %x, i32 %y) {
; CHECK-LABEL: @min4(
+; CHECK: ret i1 true
+;
%c = icmp uge i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp ule i32 %m, %x
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @min5(i32 %x, i32 %y) {
; CHECK-LABEL: @min5(
+; CHECK: ret i1 false
+;
%c = icmp sgt i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp slt i32 %x, %m
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @min6(i32 %x, i32 %y) {
; CHECK-LABEL: @min6(
+; CHECK: ret i1 true
+;
%c = icmp sge i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp sge i32 %x, %m
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @min7(i32 %x, i32 %y) {
; CHECK-LABEL: @min7(
+; CHECK: ret i1 false
+;
%c = icmp ugt i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp ult i32 %x, %m
ret i1 %r
-; CHECK: ret i1 false
}
define i1 @min8(i32 %x, i32 %y) {
; CHECK-LABEL: @min8(
+; CHECK: ret i1 true
+;
%c = icmp uge i32 %x, %y
%m = select i1 %c, i32 %y, i32 %x
%r = icmp uge i32 %x, %m
ret i1 %r
-; CHECK: ret i1 true
}
define i1 @maxmin1(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin1(
+; CHECK: ret i1 true
+;
%c1 = icmp sge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp sge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp sge i32 %max, %min
ret i1 %c
-; CHECK: ret i1 true
}
define i1 @maxmin2(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin2(
+; CHECK: ret i1 false
+;
%c1 = icmp sge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp sge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp sgt i32 %min, %max
ret i1 %c
-; CHECK: ret i1 false
}
define i1 @maxmin3(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin3(
+; CHECK: ret i1 true
+;
%c1 = icmp sge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp sge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp sle i32 %min, %max
ret i1 %c
-; CHECK: ret i1 true
}
define i1 @maxmin4(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin4(
+; CHECK: ret i1 false
+;
%c1 = icmp sge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp sge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp slt i32 %max, %min
ret i1 %c
-; CHECK: ret i1 false
}
define i1 @maxmin5(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin5(
+; CHECK: ret i1 true
+;
%c1 = icmp uge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp uge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp uge i32 %max, %min
ret i1 %c
-; CHECK: ret i1 true
}
define i1 @maxmin6(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin6(
+; CHECK: ret i1 false
+;
%c1 = icmp uge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp uge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp ugt i32 %min, %max
ret i1 %c
-; CHECK: ret i1 false
}
define i1 @maxmin7(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin7(
+; CHECK: ret i1 true
+;
%c1 = icmp uge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp uge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp ule i32 %min, %max
ret i1 %c
-; CHECK: ret i1 true
}
define i1 @maxmin8(i32 %x, i32 %y, i32 %z) {
; CHECK-LABEL: @maxmin8(
+; CHECK: ret i1 false
+;
%c1 = icmp uge i32 %x, %y
%max = select i1 %c1, i32 %x, i32 %y
%c2 = icmp uge i32 %x, %z
%min = select i1 %c2, i32 %z, i32 %x
%c = icmp ult i32 %max, %min
ret i1 %c
-; CHECK: ret i1 false
}
define i1 @eqcmp1(i32 %x, i32 %y) {
; CHECK-LABEL: @eqcmp1(
+; CHECK: [[C:%.*]] = icmp sge i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%c = icmp sge i32 %x, %y
%max = select i1 %c, i32 %x, i32 %y
%r = icmp eq i32 %max, %x
ret i1 %r
-; CHECK: ret i1 %c
}
define i1 @eqcmp2(i32 %x, i32 %y) {
; CHECK-LABEL: @eqcmp2(
+; CHECK: [[C:%.*]] = icmp sge i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%c = icmp sge i32 %x, %y
%max = select i1 %c, i32 %x, i32 %y
%r = icmp eq i32 %x, %max
ret i1 %r
-; CHECK: ret i1 %c
}
define i1 @eqcmp3(i32 %x, i32 %y) {
; CHECK-LABEL: @eqcmp3(
+; CHECK: [[C:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%c = icmp uge i32 %x, %y
%max = select i1 %c, i32 %x, i32 %y
%r = icmp eq i32 %max, %x
ret i1 %r
-; CHECK: ret i1 %c
}
define i1 @eqcmp4(i32 %x, i32 %y) {
; CHECK-LABEL: @eqcmp4(
+; CHECK: [[C:%.*]] = icmp uge i32 %x, %y
+; CHECK-NEXT: ret i1 [[C]]
+;
%c = icmp uge i32 %x, %y
%max = select i1 %c, i32 %x, i32 %y
%r = icmp eq i32 %x, %max
ret i1 %r
-; CHECK: ret i1 %c
}
diff --git a/test/Transforms/InstSimplify/past-the-end.ll b/test/Transforms/InstSimplify/past-the-end.ll
index b676e9db2c77..b47db7defcbd 100644
--- a/test/Transforms/InstSimplify/past-the-end.ll
+++ b/test/Transforms/InstSimplify/past-the-end.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
target datalayout = "p:32:32"
@@ -9,21 +10,23 @@ target datalayout = "p:32:32"
; Comparing base addresses of two distinct globals. Never equal.
define zeroext i1 @no_offsets() {
+; CHECK-LABEL: @no_offsets(
+; CHECK: ret i1 false
+;
%t = icmp eq i32* @opte_a, @opte_b
ret i1 %t
- ; CHECK: no_offsets(
- ; CHECK: ret i1 false
}
; Comparing past-the-end addresses of two distinct globals. Never equal.
define zeroext i1 @both_past_the_end() {
+; CHECK-LABEL: @both_past_the_end(
+; CHECK: ret i1 icmp eq (i32* getelementptr inbounds (i32, i32* @opte_a, i32 1), i32* getelementptr inbounds (i32, i32* @opte_b, i32 1))
+;
%x = getelementptr i32, i32* @opte_a, i32 1
%y = getelementptr i32, i32* @opte_b, i32 1
%t = icmp eq i32* %x, %y
ret i1 %t
- ; CHECK: both_past_the_end(
- ; CHECK-NOT: ret i1 true
; TODO: refine this
}
@@ -31,35 +34,43 @@ define zeroext i1 @both_past_the_end() {
; of another. Can't fold this.
define zeroext i1 @just_one_past_the_end() {
+; CHECK-LABEL: @just_one_past_the_end(
+; CHECK: ret i1 icmp eq (i32* getelementptr inbounds (i32, i32* @opte_a, i32 1), i32* @opte_b)
+;
%x = getelementptr i32, i32* @opte_a, i32 1
%t = icmp eq i32* %x, @opte_b
ret i1 %t
- ; CHECK: just_one_past_the_end(
- ; CHECK: ret i1 icmp eq (i32* getelementptr inbounds (i32, i32* @opte_a, i32 1), i32* @opte_b)
}
; Comparing base addresses of two distinct allocas. Never equal.
define zeroext i1 @no_alloca_offsets() {
+; CHECK-LABEL: @no_alloca_offsets(
+; CHECK: ret i1 false
+;
%m = alloca i32
%n = alloca i32
%t = icmp eq i32* %m, %n
ret i1 %t
- ; CHECK: no_alloca_offsets(
- ; CHECK: ret i1 false
}
; Comparing past-the-end addresses of two distinct allocas. Never equal.
define zeroext i1 @both_past_the_end_alloca() {
+; CHECK-LABEL: @both_past_the_end_alloca(
+; CHECK: [[M:%.*]] = alloca i32
+; CHECK-NEXT: [[N:%.*]] = alloca i32
+; CHECK-NEXT: [[X:%.*]] = getelementptr i32, i32* [[M]], i32 1
+; CHECK-NEXT: [[Y:%.*]] = getelementptr i32, i32* [[N]], i32 1
+; CHECK-NEXT: [[T:%.*]] = icmp eq i32* [[X]], [[Y]]
+; CHECK-NEXT: ret i1 [[T]]
+;
%m = alloca i32
%n = alloca i32
%x = getelementptr i32, i32* %m, i32 1
%y = getelementptr i32, i32* %n, i32 1
%t = icmp eq i32* %x, %y
ret i1 %t
- ; CHECK: both_past_the_end_alloca(
- ; CHECK-NOT: ret i1 true
; TODO: refine this
}
@@ -67,11 +78,16 @@ define zeroext i1 @both_past_the_end_alloca() {
; of another. Can't fold this.
define zeroext i1 @just_one_past_the_end_alloca() {
+; CHECK-LABEL: @just_one_past_the_end_alloca(
+; CHECK: [[M:%.*]] = alloca i32
+; CHECK-NEXT: [[N:%.*]] = alloca i32
+; CHECK-NEXT: [[X:%.*]] = getelementptr i32, i32* [[M]], i32 1
+; CHECK-NEXT: [[T:%.*]] = icmp eq i32* [[X]], [[N]]
+; CHECK-NEXT: ret i1 [[T]]
+;
%m = alloca i32
%n = alloca i32
%x = getelementptr i32, i32* %m, i32 1
%t = icmp eq i32* %x, %n
ret i1 %t
- ; CHECK: just_one_past_the_end_alloca(
- ; CHECK: ret i1 %t
}
diff --git a/test/Transforms/InstSimplify/phi.ll b/test/Transforms/InstSimplify/phi.ll
index 5b7aaa93caf1..b0040ffeb033 100644
--- a/test/Transforms/InstSimplify/phi.ll
+++ b/test/Transforms/InstSimplify/phi.ll
@@ -1,8 +1,11 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
; PR12189
define i1 @test1(i32 %x) {
; CHECK-LABEL: @test1(
+; CHECK: ret i1 %e
+;
br i1 true, label %a, label %b
a:
@@ -18,5 +21,4 @@ c:
%d = urem i32 %cc, 2
%e = icmp eq i32 %d, 0
ret i1 %e
-; CHECK: ret i1 %e
}
diff --git a/test/Transforms/InstSimplify/ptr_diff.ll b/test/Transforms/InstSimplify/ptr_diff.ll
index 6c27e6f44678..c57fab997866 100644
--- a/test/Transforms/InstSimplify/ptr_diff.ll
+++ b/test/Transforms/InstSimplify/ptr_diff.ll
@@ -1,11 +1,12 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
define i64 @ptrdiff1(i8* %ptr) {
; CHECK-LABEL: @ptrdiff1(
-; CHECK-NEXT: ret i64 42
-
+; CHECK: ret i64 42
+;
%first = getelementptr inbounds i8, i8* %ptr, i32 0
%last = getelementptr inbounds i8, i8* %ptr, i32 42
%first.int = ptrtoint i8* %first to i64
@@ -16,8 +17,8 @@ define i64 @ptrdiff1(i8* %ptr) {
define i64 @ptrdiff2(i8* %ptr) {
; CHECK-LABEL: @ptrdiff2(
-; CHECK-NEXT: ret i64 42
-
+; CHECK: ret i64 42
+;
%first1 = getelementptr inbounds i8, i8* %ptr, i32 0
%first2 = getelementptr inbounds i8, i8* %first1, i32 1
%first3 = getelementptr inbounds i8, i8* %first2, i32 2
@@ -35,10 +36,12 @@ define i64 @ptrdiff2(i8* %ptr) {
define i64 @ptrdiff3(i8* %ptr) {
; Don't bother with non-inbounds GEPs.
; CHECK-LABEL: @ptrdiff3(
-; CHECK: getelementptr
-; CHECK: sub
-; CHECK: ret
-
+; CHECK: [[LAST:%.*]] = getelementptr i8, i8* %ptr, i32 42
+; CHECK-NEXT: [[FIRST_INT:%.*]] = ptrtoint i8* %ptr to i64
+; CHECK-NEXT: [[LAST_INT:%.*]] = ptrtoint i8* [[LAST]] to i64
+; CHECK-NEXT: [[DIFF:%.*]] = sub i64 [[LAST_INT]], [[FIRST_INT]]
+; CHECK-NEXT: ret i64 [[DIFF]]
+;
%first = getelementptr i8, i8* %ptr, i32 0
%last = getelementptr i8, i8* %ptr, i32 42
%first.int = ptrtoint i8* %first to i64
@@ -50,7 +53,8 @@ define i64 @ptrdiff3(i8* %ptr) {
define <4 x i32> @ptrdiff4(<4 x i8*> %arg) nounwind {
; Handle simple cases of vectors of pointers.
; CHECK-LABEL: @ptrdiff4(
-; CHECK: ret <4 x i32> zeroinitializer
+; CHECK: ret <4 x i32> zeroinitializer
+;
%p1 = ptrtoint <4 x i8*> %arg to <4 x i32>
%bc = bitcast <4 x i8*> %arg to <4 x i32*>
%p2 = ptrtoint <4 x i32*> %bc to <4 x i32>
@@ -63,6 +67,10 @@ define <4 x i32> @ptrdiff4(<4 x i8*> %arg) nounwind {
@global = internal global %struct.ham zeroinitializer, align 4
define i32 @ptrdiff5() nounwind {
+; CHECK-LABEL: @ptrdiff5(
+; CHECK: bb:
+; CHECK-NEXT: ret i32 0
+;
bb:
%tmp = getelementptr inbounds %struct.ham, %struct.ham* @global, i32 0, i32 1
%tmp1 = getelementptr inbounds [2 x [2 x i32]], [2 x [2 x i32]]* %tmp, i32 0, i32 0
@@ -73,6 +81,4 @@ bb:
%tmp6 = ptrtoint [2 x i32]* %tmp5 to i32
%tmp7 = sub i32 %tmp3, %tmp6
ret i32 %tmp7
-; CHECK-LABEL: @ptrdiff5(
-; CHECK: ret i32 0
}
diff --git a/test/Transforms/InstSimplify/reassociate.ll b/test/Transforms/InstSimplify/reassociate.ll
index d44f7155ffda..335df32a5054 100644
--- a/test/Transforms/InstSimplify/reassociate.ll
+++ b/test/Transforms/InstSimplify/reassociate.ll
@@ -1,195 +1,223 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
define i32 @add1(i32 %x) {
; CHECK-LABEL: @add1(
+; CHECK: ret i32 %x
+;
; (X + -1) + 1 -> X
%l = add i32 %x, -1
%r = add i32 %l, 1
ret i32 %r
-; CHECK: ret i32 %x
}
define i32 @and1(i32 %x, i32 %y) {
; CHECK-LABEL: @and1(
+; CHECK: [[L:%.*]] = and i32 %x, %y
+; CHECK-NEXT: ret i32 [[L]]
+;
; (X & Y) & X -> X & Y
%l = and i32 %x, %y
%r = and i32 %l, %x
ret i32 %r
-; CHECK: ret i32 %l
}
define i32 @and2(i32 %x, i32 %y) {
; CHECK-LABEL: @and2(
+; CHECK: [[R:%.*]] = and i32 %x, %y
+; CHECK-NEXT: ret i32 [[R]]
+;
; X & (X & Y) -> X & Y
%r = and i32 %x, %y
%l = and i32 %x, %r
ret i32 %l
-; CHECK: ret i32 %r
}
define i32 @or1(i32 %x, i32 %y) {
; CHECK-LABEL: @or1(
+; CHECK: [[L:%.*]] = or i32 %x, %y
+; CHECK-NEXT: ret i32 [[L]]
+;
; (X | Y) | X -> X | Y
%l = or i32 %x, %y
%r = or i32 %l, %x
ret i32 %r
-; CHECK: ret i32 %l
}
define i32 @or2(i32 %x, i32 %y) {
; CHECK-LABEL: @or2(
+; CHECK: [[R:%.*]] = or i32 %x, %y
+; CHECK-NEXT: ret i32 [[R]]
+;
; X | (X | Y) -> X | Y
%r = or i32 %x, %y
%l = or i32 %x, %r
ret i32 %l
-; CHECK: ret i32 %r
}
define i32 @xor1(i32 %x, i32 %y) {
; CHECK-LABEL: @xor1(
+; CHECK: ret i32 %y
+;
; (X ^ Y) ^ X = Y
%l = xor i32 %x, %y
%r = xor i32 %l, %x
ret i32 %r
-; CHECK: ret i32 %y
}
define i32 @xor2(i32 %x, i32 %y) {
; CHECK-LABEL: @xor2(
+; CHECK: ret i32 %y
+;
; X ^ (X ^ Y) = Y
%r = xor i32 %x, %y
%l = xor i32 %x, %r
ret i32 %l
-; CHECK: ret i32 %y
}
define i32 @sub1(i32 %x, i32 %y) {
; CHECK-LABEL: @sub1(
+; CHECK: ret i32 %y
+;
%d = sub i32 %x, %y
%r = sub i32 %x, %d
ret i32 %r
-; CHECK: ret i32 %y
}
define i32 @sub2(i32 %x) {
; CHECK-LABEL: @sub2(
+; CHECK: ret i32 -1
+;
; X - (X + 1) -> -1
%xp1 = add i32 %x, 1
%r = sub i32 %x, %xp1
ret i32 %r
-; CHECK: ret i32 -1
}
define i32 @sub3(i32 %x, i32 %y) {
; CHECK-LABEL: @sub3(
+; CHECK: ret i32 %x
+;
; ((X + 1) + Y) - (Y + 1) -> X
%xp1 = add i32 %x, 1
%lhs = add i32 %xp1, %y
%rhs = add i32 %y, 1
%r = sub i32 %lhs, %rhs
ret i32 %r
-; CHECK: ret i32 %x
}
define i32 @sdiv1(i32 %x, i32 %y) {
; CHECK-LABEL: @sdiv1(
+; CHECK: ret i32 %x
+;
; (no overflow X * Y) / Y -> X
%mul = mul nsw i32 %x, %y
%r = sdiv i32 %mul, %y
ret i32 %r
-; CHECK: ret i32 %x
}
define i32 @sdiv2(i32 %x, i32 %y) {
; CHECK-LABEL: @sdiv2(
+; CHECK: [[DIV:%.*]] = sdiv i32 %x, %y
+; CHECK-NEXT: ret i32 [[DIV]]
+;
; (((X / Y) * Y) / Y) -> X / Y
%div = sdiv i32 %x, %y
%mul = mul i32 %div, %y
%r = sdiv i32 %mul, %y
ret i32 %r
-; CHECK: ret i32 %div
}
define i32 @sdiv3(i32 %x, i32 %y) {
; CHECK-LABEL: @sdiv3(
+; CHECK: ret i32 0
+;
; (X rem Y) / Y -> 0
%rem = srem i32 %x, %y
%div = sdiv i32 %rem, %y
ret i32 %div
-; CHECK: ret i32 0
}
define i32 @sdiv4(i32 %x, i32 %y) {
; CHECK-LABEL: @sdiv4(
+; CHECK: ret i32 %x
+;
; (X / Y) * Y -> X if the division is exact
%div = sdiv exact i32 %x, %y
%mul = mul i32 %div, %y
ret i32 %mul
-; CHECK: ret i32 %x
}
define i32 @sdiv5(i32 %x, i32 %y) {
; CHECK-LABEL: @sdiv5(
+; CHECK: ret i32 %x
+;
; Y * (X / Y) -> X if the division is exact
%div = sdiv exact i32 %x, %y
%mul = mul i32 %y, %div
ret i32 %mul
-; CHECK: ret i32 %x
}
define i32 @udiv1(i32 %x, i32 %y) {
; CHECK-LABEL: @udiv1(
+; CHECK: ret i32 %x
+;
; (no overflow X * Y) / Y -> X
%mul = mul nuw i32 %x, %y
%r = udiv i32 %mul, %y
ret i32 %r
-; CHECK: ret i32 %x
}
define i32 @udiv2(i32 %x, i32 %y) {
; CHECK-LABEL: @udiv2(
+; CHECK: [[DIV:%.*]] = udiv i32 %x, %y
+; CHECK-NEXT: ret i32 [[DIV]]
+;
; (((X / Y) * Y) / Y) -> X / Y
%div = udiv i32 %x, %y
%mul = mul i32 %div, %y
%r = udiv i32 %mul, %y
ret i32 %r
-; CHECK: ret i32 %div
}
define i32 @udiv3(i32 %x, i32 %y) {
; CHECK-LABEL: @udiv3(
+; CHECK: ret i32 0
+;
; (X rem Y) / Y -> 0
%rem = urem i32 %x, %y
%div = udiv i32 %rem, %y
ret i32 %div
-; CHECK: ret i32 0
}
define i32 @udiv4(i32 %x, i32 %y) {
; CHECK-LABEL: @udiv4(
+; CHECK: ret i32 %x
+;
; (X / Y) * Y -> X if the division is exact
%div = udiv exact i32 %x, %y
%mul = mul i32 %div, %y
ret i32 %mul
-; CHECK: ret i32 %x
}
define i32 @udiv5(i32 %x, i32 %y) {
; CHECK-LABEL: @udiv5(
+; CHECK: ret i32 %x
+;
; Y * (X / Y) -> X if the division is exact
%div = udiv exact i32 %x, %y
%mul = mul i32 %y, %div
ret i32 %mul
-; CHECK: ret i32 %x
}
define i16 @trunc1(i32 %x) {
; CHECK-LABEL: @trunc1(
+; CHECK: ret i16 1
+;
%y = add i32 %x, 1
%tx = trunc i32 %x to i16
%ty = trunc i32 %y to i16
%d = sub i16 %ty, %tx
ret i16 %d
-; CHECK: ret i16 1
}
diff --git a/test/Transforms/InstSimplify/rem.ll b/test/Transforms/InstSimplify/rem.ll
index f5ea45107e2a..df3f659b782e 100644
--- a/test/Transforms/InstSimplify/rem.ll
+++ b/test/Transforms/InstSimplify/rem.ll
@@ -1,25 +1,29 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
define i32 @select1(i32 %x, i1 %b) {
; CHECK-LABEL: @select1(
+; CHECK: ret i32 0
+;
%rhs = select i1 %b, i32 %x, i32 1
%rem = srem i32 %x, %rhs
ret i32 %rem
-; CHECK: ret i32 0
}
define i32 @select2(i32 %x, i1 %b) {
; CHECK-LABEL: @select2(
+; CHECK: ret i32 0
+;
%rhs = select i1 %b, i32 %x, i32 1
%rem = urem i32 %x, %rhs
ret i32 %rem
-; CHECK: ret i32 0
}
define i32 @rem1(i32 %x, i32 %n) {
; CHECK-LABEL: @rem1(
-; CHECK-NEXT: %mod = srem i32 %x, %n
-; CHECK-NEXT: ret i32 %mod
+; CHECK: [[MOD:%.*]] = srem i32 %x, %n
+; CHECK-NEXT: ret i32 [[MOD]]
+;
%mod = srem i32 %x, %n
%mod1 = srem i32 %mod, %n
ret i32 %mod1
@@ -27,8 +31,9 @@ define i32 @rem1(i32 %x, i32 %n) {
define i32 @rem2(i32 %x, i32 %n) {
; CHECK-LABEL: @rem2(
-; CHECK-NEXT: %mod = urem i32 %x, %n
-; CHECK-NEXT: ret i32 %mod
+; CHECK: [[MOD:%.*]] = urem i32 %x, %n
+; CHECK-NEXT: ret i32 [[MOD]]
+;
%mod = urem i32 %x, %n
%mod1 = urem i32 %mod, %n
ret i32 %mod1
@@ -36,9 +41,10 @@ define i32 @rem2(i32 %x, i32 %n) {
define i32 @rem3(i32 %x, i32 %n) {
; CHECK-LABEL: @rem3(
-; CHECK-NEXT: %[[srem:.*]] = srem i32 %x, %n
-; CHECK-NEXT: %[[urem:.*]] = urem i32 %[[srem]], %n
-; CHECK-NEXT: ret i32 %[[urem]]
+; CHECK: [[MOD:%.*]] = srem i32 %x, %n
+; CHECK-NEXT: [[MOD1:%.*]] = urem i32 [[MOD]], %n
+; CHECK-NEXT: ret i32 [[MOD1]]
+;
%mod = srem i32 %x, %n
%mod1 = urem i32 %mod, %n
ret i32 %mod1
diff --git a/test/Transforms/InstSimplify/returned.ll b/test/Transforms/InstSimplify/returned.ll
new file mode 100644
index 000000000000..0e89e91085dc
--- /dev/null
+++ b/test/Transforms/InstSimplify/returned.ll
@@ -0,0 +1,30 @@
+; RUN: opt -instsimplify -S < %s | FileCheck %s
+
+define i1 @bitcast() {
+; CHECK-LABEL: @bitcast(
+ %a = alloca i32
+ %b = alloca i64
+ %x = bitcast i32* %a to i8*
+ %z = bitcast i64* %b to i8*
+ %y = call i8* @func1(i8* %z)
+ %cmp = icmp eq i8* %x, %y
+ ret i1 %cmp
+; CHECK-NEXT: ret i1 false
+}
+
+%gept = type { i32, i32 }
+
+define i1 @gep3() {
+; CHECK-LABEL: @gep3(
+ %x = alloca %gept, align 8
+ %a = getelementptr %gept, %gept* %x, i64 0, i32 0
+ %y = call %gept* @func2(%gept* %x)
+ %b = getelementptr %gept, %gept* %y, i64 0, i32 1
+ %equal = icmp eq i32* %a, %b
+ ret i1 %equal
+; CHECK-NEXT: ret i1 false
+}
+
+declare i8* @func1(i8* returned) nounwind readnone
+declare %gept* @func2(%gept* returned) nounwind readnone
+
diff --git a/test/Transforms/InstSimplify/shift-128-kb.ll b/test/Transforms/InstSimplify/shift-128-kb.ll
index 3f69ecccaf5b..76f1da57bbf9 100644
--- a/test/Transforms/InstSimplify/shift-128-kb.ll
+++ b/test/Transforms/InstSimplify/shift-128-kb.ll
@@ -1,9 +1,21 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt -S -instsimplify < %s | FileCheck %s
target datalayout = "E-m:e-i64:64-n32:64"
target triple = "powerpc64-unknown-linux-gnu"
define zeroext i1 @_Z10isNegativemj(i64 %Val, i32 zeroext %IntegerBitWidth) {
+; CHECK-LABEL: @_Z10isNegativemj(
+; CHECK: [[CONV:%.*]] = zext i32 %IntegerBitWidth to i64
+; CHECK-NEXT: [[SUB:%.*]] = sub i64 128, [[CONV]]
+; CHECK-NEXT: [[CONV1:%.*]] = trunc i64 [[SUB]] to i32
+; CHECK-NEXT: [[CONV2:%.*]] = zext i64 %Val to i128
+; CHECK-NEXT: [[SH_PROM:%.*]] = zext i32 [[CONV1]] to i128
+; CHECK-NEXT: [[SHL:%.*]] = shl i128 [[CONV2]], [[SH_PROM]]
+; CHECK-NEXT: [[SHR:%.*]] = ashr i128 [[SHL]], [[SH_PROM]]
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i128 [[SHR]], 0
+; CHECK-NEXT: ret i1 [[CMP]]
+;
entry:
%conv = zext i32 %IntegerBitWidth to i64
%sub = sub i64 128, %conv
@@ -16,7 +28,3 @@ entry:
ret i1 %cmp
}
-; CHECK-LABEL: @_Z10isNegativemj
-; CHECK-NOT: ret i1 false
-; CHECK: ret i1 %cmp
-
diff --git a/test/Transforms/InstSimplify/shift-knownbits.ll b/test/Transforms/InstSimplify/shift-knownbits.ll
new file mode 100644
index 000000000000..f50ea0582c6c
--- /dev/null
+++ b/test/Transforms/InstSimplify/shift-knownbits.ll
@@ -0,0 +1,147 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+; If any bits of the shift amount are known to make it exceed or equal
+; the number of bits in the type, the shift causes undefined behavior.
+
+define i32 @shl_amount_is_known_bogus(i32 %a, i32 %b) {
+; CHECK-LABEL: @shl_amount_is_known_bogus(
+; CHECK-NEXT: ret i32 undef
+;
+ %or = or i32 %b, 32
+ %shl = shl i32 %a, %or
+ ret i32 %shl
+}
+
+; Check some weird types and the other shift ops.
+
+define i31 @lshr_amount_is_known_bogus(i31 %a, i31 %b) {
+; CHECK-LABEL: @lshr_amount_is_known_bogus(
+; CHECK-NEXT: ret i31 undef
+;
+ %or = or i31 %b, 31
+ %shr = lshr i31 %a, %or
+ ret i31 %shr
+}
+
+define i33 @ashr_amount_is_known_bogus(i33 %a, i33 %b) {
+; CHECK-LABEL: @ashr_amount_is_known_bogus(
+; CHECK-NEXT: ret i33 undef
+;
+ %or = or i33 %b, 33
+ %shr = ashr i33 %a, %or
+ ret i33 %shr
+}
+
+
+; If all valid bits of the shift amount are known 0, there's no shift.
+; It doesn't matter if high bits are set because that would be undefined.
+; Therefore, the only possible valid result of these shifts is %a.
+
+define i16 @ashr_amount_is_zero(i16 %a, i16 %b) {
+; CHECK-LABEL: @ashr_amount_is_zero(
+; CHECK-NEXT: ret i16 %a
+;
+ %and = and i16 %b, 65520 ; 0xfff0
+ %shr = ashr i16 %a, %and
+ ret i16 %shr
+}
+
+define i300 @lshr_amount_is_zero(i300 %a, i300 %b) {
+; CHECK-LABEL: @lshr_amount_is_zero(
+; CHECK-NEXT: ret i300 %a
+;
+ %and = and i300 %b, 2048
+ %shr = lshr i300 %a, %and
+ ret i300 %shr
+}
+
+define i9 @shl_amount_is_zero(i9 %a, i9 %b) {
+; CHECK-LABEL: @shl_amount_is_zero(
+; CHECK-NEXT: ret i9 %a
+;
+ %and = and i9 %b, 496 ; 0x1f0
+ %shl = shl i9 %a, %and
+ ret i9 %shl
+}
+
+
+; Verify that we've calculated the log2 boundary of valid bits correctly for a weird type.
+
+define i9 @shl_amount_is_not_known_zero(i9 %a, i9 %b) {
+; CHECK-LABEL: @shl_amount_is_not_known_zero(
+; CHECK-NEXT: [[AND:%.*]] = and i9 %b, -8
+; CHECK-NEXT: [[SHL:%.*]] = shl i9 %a, [[AND]]
+; CHECK-NEXT: ret i9 [[SHL]]
+;
+ %and = and i9 %b, 504 ; 0x1f8
+ %shl = shl i9 %a, %and
+ ret i9 %shl
+}
+
+
+; For vectors, we need all scalar elements to meet the requirements to optimize.
+
+define <2 x i32> @ashr_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @ashr_vector_bogus(
+; CHECK-NEXT: ret <2 x i32> undef
+;
+ %or = or <2 x i32> %b, <i32 32, i32 32>
+ %shr = ashr <2 x i32> %a, %or
+ ret <2 x i32> %shr
+}
+
+; FIXME: This is undef, but computeKnownBits doesn't handle the union.
+define <2 x i32> @shl_vector_bogus(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @shl_vector_bogus(
+; CHECK-NEXT: [[OR:%.*]] = or <2 x i32> %b, <i32 32, i32 64>
+; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> %a, [[OR]]
+; CHECK-NEXT: ret <2 x i32> [[SHL]]
+;
+ %or = or <2 x i32> %b, <i32 32, i32 64>
+ %shl = shl <2 x i32> %a, %or
+ ret <2 x i32> %shl
+}
+
+define <2 x i32> @lshr_vector_zero(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @lshr_vector_zero(
+; CHECK-NEXT: ret <2 x i32> %a
+;
+ %and = and <2 x i32> %b, <i32 64, i32 256>
+ %shr = lshr <2 x i32> %a, %and
+ ret <2 x i32> %shr
+}
+
+; Make sure that weird vector types work too.
+define <2 x i15> @shl_vector_zero(<2 x i15> %a, <2 x i15> %b) {
+; CHECK-LABEL: @shl_vector_zero(
+; CHECK-NEXT: ret <2 x i15> %a
+;
+ %and = and <2 x i15> %b, <i15 1024, i15 1024>
+ %shl = shl <2 x i15> %a, %and
+ ret <2 x i15> %shl
+}
+
+define <2 x i32> @shl_vector_for_real(<2 x i32> %a, <2 x i32> %b) {
+; CHECK-LABEL: @shl_vector_for_real(
+; CHECK-NEXT: [[AND:%.*]] = and <2 x i32> %b, <i32 3, i32 3>
+; CHECK-NEXT: [[SHL:%.*]] = shl <2 x i32> %a, [[AND]]
+; CHECK-NEXT: ret <2 x i32> [[SHL]]
+;
+ %and = and <2 x i32> %b, <i32 3, i32 3> ; a necessary mask op
+ %shl = shl <2 x i32> %a, %and
+ ret <2 x i32> %shl
+}
+
+
+; We calculate the valid bits of the shift using log2, and log2 of 1 (the type width) is 0.
+; That should be ok. Either the shift amount is 0 or invalid (1), so we can always return %a.
+
+define i1 @shl_i1(i1 %a, i1 %b) {
+; CHECK-LABEL: @shl_i1(
+; CHECK-NEXT: ret i1 %a
+;
+ %shl = shl i1 %a, %b
+ ret i1 %shl
+}
+
diff --git a/test/Transforms/InstSimplify/shr-nop.ll b/test/Transforms/InstSimplify/shr-nop.ll
index edabcc314ea6..9b0f4e9fe501 100644
--- a/test/Transforms/InstSimplify/shr-nop.ll
+++ b/test/Transforms/InstSimplify/shr-nop.ll
@@ -1,346 +1,431 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt < %s -instsimplify -S | FileCheck %s
-; CHECK-LABEL: @foo
-; CHECK: %[[and:.*]] = and i32 %x, 1
-; CHECK-NEXT: %[[add:.*]] = add i32 %[[and]], -1
-; CHECK-NEXT: ret i32 %[[add]]
define i32 @foo(i32 %x) {
- %o = and i32 %x, 1
- %n = add i32 %o, -1
- %t = ashr i32 %n, 17
- ret i32 %t
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: [[O:%.*]] = and i32 %x, 1
+; CHECK-NEXT: [[N:%.*]] = add i32 [[O]], -1
+; CHECK-NEXT: ret i32 [[N]]
+;
+ %o = and i32 %x, 1
+ %n = add i32 %o, -1
+ %t = ashr i32 %n, 17
+ ret i32 %t
}
-; CHECK-LABEL: @exact_lshr_eq_both_zero
-; CHECK-NEXT: ret i1 true
define i1 @exact_lshr_eq_both_zero(i8 %a) {
- %shr = lshr exact i8 0, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_eq_both_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = lshr exact i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_eq_both_zero
-; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_eq_both_zero(i8 %a) {
- %shr = ashr exact i8 0, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_eq_both_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr exact i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_ashr_eq_both_zero
-; CHECK-NEXT: ret i1 true
define i1 @nonexact_ashr_eq_both_zero(i8 %a) {
- %shr = ashr i8 0, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_ashr_eq_both_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_ne_both_zero
-; CHECK-NEXT: ret i1 false
define i1 @exact_lshr_ne_both_zero(i8 %a) {
- %shr = lshr exact i8 0, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_ne_both_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr exact i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_ne_both_zero
-; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_ne_both_zero(i8 %a) {
- %shr = ashr exact i8 0, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_ne_both_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr exact i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_lshr_ne_both_zero
-; CHECK-NEXT: ret i1 false
define i1 @nonexact_lshr_ne_both_zero(i8 %a) {
- %shr = lshr i8 0, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_lshr_ne_both_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_ashr_ne_both_zero
-; CHECK-NEXT: ret i1 false
define i1 @nonexact_ashr_ne_both_zero(i8 %a) {
- %shr = ashr i8 0, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_ashr_ne_both_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr i8 0, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_eq_last_zero
-; CHECK-NEXT: ret i1 false
define i1 @exact_lshr_eq_last_zero(i8 %a) {
- %shr = lshr exact i8 128, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_eq_last_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr exact i8 128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_eq_last_zero
-; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_eq_last_zero(i8 %a) {
- %shr = ashr exact i8 -128, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_eq_last_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_lshr_eq_both_zero
-; CHECK-NEXT: ret i1 true
define i1 @nonexact_lshr_eq_both_zero(i8 %a) {
- %shr = lshr i8 0, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_lshr_eq_both_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = lshr i8 0, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_ne_last_zero
-; CHECK-NEXT: ret i1 true
define i1 @exact_lshr_ne_last_zero(i8 %a) {
- %shr = lshr exact i8 128, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_ne_last_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = lshr exact i8 128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_ne_last_zero
-; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_ne_last_zero(i8 %a) {
- %shr = ashr exact i8 -128, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_ne_last_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_lshr_eq_last_zero
-; CHECK-NEXT: ret i1 false
define i1 @nonexact_lshr_eq_last_zero(i8 %a) {
- %shr = lshr i8 128, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_lshr_eq_last_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr i8 128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_ashr_eq_last_zero
-; CHECK-NEXT: ret i1 false
define i1 @nonexact_ashr_eq_last_zero(i8 %a) {
- %shr = ashr i8 -128, %a
- %cmp = icmp eq i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_ashr_eq_last_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_lshr_ne_last_zero
-; CHECK-NEXT: ret i1 true
define i1 @nonexact_lshr_ne_last_zero(i8 %a) {
- %shr = lshr i8 128, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_lshr_ne_last_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = lshr i8 128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_ashr_ne_last_zero
-; CHECK-NEXT: ret i1 true
define i1 @nonexact_ashr_ne_last_zero(i8 %a) {
- %shr = ashr i8 -128, %a
- %cmp = icmp ne i8 %shr, 0
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_ashr_ne_last_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 0
+ ret i1 %cmp
}
-; CHECK-LABEL: @lshr_eq_first_zero
-; CHECK-NEXT: ret i1 false
define i1 @lshr_eq_first_zero(i8 %a) {
- %shr = lshr i8 0, %a
- %cmp = icmp eq i8 %shr, 2
- ret i1 %cmp
+; CHECK-LABEL: @lshr_eq_first_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr i8 0, %a
+ %cmp = icmp eq i8 %shr, 2
+ ret i1 %cmp
}
-; CHECK-LABEL: @ashr_eq_first_zero
-; CHECK-NEXT: ret i1 false
define i1 @ashr_eq_first_zero(i8 %a) {
- %shr = ashr i8 0, %a
- %cmp = icmp eq i8 %shr, 2
- ret i1 %cmp
+; CHECK-LABEL: @ashr_eq_first_zero(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr i8 0, %a
+ %cmp = icmp eq i8 %shr, 2
+ ret i1 %cmp
}
-; CHECK-LABEL: @lshr_ne_first_zero
-; CHECK-NEXT: ret i1 true
define i1 @lshr_ne_first_zero(i8 %a) {
- %shr = lshr i8 0, %a
- %cmp = icmp ne i8 %shr, 2
- ret i1 %cmp
+; CHECK-LABEL: @lshr_ne_first_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = lshr i8 0, %a
+ %cmp = icmp ne i8 %shr, 2
+ ret i1 %cmp
}
-; CHECK-LABEL: @ashr_ne_first_zero
-; CHECK-NEXT: ret i1 true
define i1 @ashr_ne_first_zero(i8 %a) {
- %shr = ashr i8 0, %a
- %cmp = icmp ne i8 %shr, 2
- ret i1 %cmp
+; CHECK-LABEL: @ashr_ne_first_zero(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr i8 0, %a
+ %cmp = icmp ne i8 %shr, 2
+ ret i1 %cmp
}
-; CHECK-LABEL: @ashr_eq_both_minus1
-; CHECK-NEXT: ret i1 true
define i1 @ashr_eq_both_minus1(i8 %a) {
- %shr = ashr i8 -1, %a
- %cmp = icmp eq i8 %shr, -1
- ret i1 %cmp
+; CHECK-LABEL: @ashr_eq_both_minus1(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr i8 -1, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
}
-; CHECK-LABEL: @ashr_ne_both_minus1
-; CHECK-NEXT: ret i1 false
define i1 @ashr_ne_both_minus1(i8 %a) {
- %shr = ashr i8 -1, %a
- %cmp = icmp ne i8 %shr, -1
- ret i1 %cmp
+; CHECK-LABEL: @ashr_ne_both_minus1(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr i8 -1, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_eq_both_minus1
-; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_eq_both_minus1(i8 %a) {
- %shr = ashr exact i8 -1, %a
- %cmp = icmp eq i8 %shr, -1
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_eq_both_minus1(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr exact i8 -1, %a
+ %cmp = icmp eq i8 %shr, -1
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_ne_both_minus1
-; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_ne_both_minus1(i8 %a) {
- %shr = ashr exact i8 -1, %a
- %cmp = icmp ne i8 %shr, -1
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_ne_both_minus1(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr exact i8 -1, %a
+ %cmp = icmp ne i8 %shr, -1
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_eq_opposite_msb
-; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_eq_opposite_msb(i8 %a) {
- %shr = ashr exact i8 -128, %a
- %cmp = icmp eq i8 %shr, 1
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_eq_opposite_msb(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_eq_noexactlog
-; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_eq_noexactlog(i8 %a) {
- %shr = ashr exact i8 -90, %a
- %cmp = icmp eq i8 %shr, -30
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_eq_noexactlog(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr exact i8 -90, %a
+ %cmp = icmp eq i8 %shr, -30
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_ne_opposite_msb
-; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_ne_opposite_msb(i8 %a) {
- %shr = ashr exact i8 -128, %a
- %cmp = icmp ne i8 %shr, 1
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_ne_opposite_msb(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr exact i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
}
-; CHECK-LABEL: @ashr_eq_opposite_msb
-; CHECK-NEXT: ret i1 false
define i1 @ashr_eq_opposite_msb(i8 %a) {
- %shr = ashr i8 -128, %a
- %cmp = icmp eq i8 %shr, 1
- ret i1 %cmp
+; CHECK-LABEL: @ashr_eq_opposite_msb(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr i8 -128, %a
+ %cmp = icmp eq i8 %shr, 1
+ ret i1 %cmp
}
-; CHECK-LABEL: @ashr_ne_opposite_msb
-; CHECK-NEXT: ret i1 true
define i1 @ashr_ne_opposite_msb(i8 %a) {
- %shr = ashr i8 -128, %a
- %cmp = icmp ne i8 %shr, 1
- ret i1 %cmp
+; CHECK-LABEL: @ashr_ne_opposite_msb(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr i8 -128, %a
+ %cmp = icmp ne i8 %shr, 1
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_eq_shift_gt
-; CHECK-NEXT: ret i1 false
define i1 @exact_ashr_eq_shift_gt(i8 %a) {
- %shr = ashr exact i8 -2, %a
- %cmp = icmp eq i8 %shr, -8
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_eq_shift_gt(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr exact i8 -2, %a
+ %cmp = icmp eq i8 %shr, -8
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_ne_shift_gt
-; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_ne_shift_gt(i8 %a) {
- %shr = ashr exact i8 -2, %a
- %cmp = icmp ne i8 %shr, -8
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_ne_shift_gt(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr exact i8 -2, %a
+ %cmp = icmp ne i8 %shr, -8
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_ashr_eq_shift_gt
-; CHECK-NEXT: ret i1 false
define i1 @nonexact_ashr_eq_shift_gt(i8 %a) {
- %shr = ashr i8 -2, %a
- %cmp = icmp eq i8 %shr, -8
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_ashr_eq_shift_gt(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = ashr i8 -2, %a
+ %cmp = icmp eq i8 %shr, -8
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_ashr_ne_shift_gt
-; CHECK-NEXT: ret i1 true
define i1 @nonexact_ashr_ne_shift_gt(i8 %a) {
- %shr = ashr i8 -2, %a
- %cmp = icmp ne i8 %shr, -8
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_ashr_ne_shift_gt(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr i8 -2, %a
+ %cmp = icmp ne i8 %shr, -8
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_eq_shift_gt
-; CHECK-NEXT: ret i1 false
define i1 @exact_lshr_eq_shift_gt(i8 %a) {
- %shr = lshr exact i8 2, %a
- %cmp = icmp eq i8 %shr, 8
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_eq_shift_gt(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr exact i8 2, %a
+ %cmp = icmp eq i8 %shr, 8
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_ne_shift_gt
-; CHECK-NEXT: ret i1 true
define i1 @exact_lshr_ne_shift_gt(i8 %a) {
- %shr = lshr exact i8 2, %a
- %cmp = icmp ne i8 %shr, 8
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_ne_shift_gt(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = lshr exact i8 2, %a
+ %cmp = icmp ne i8 %shr, 8
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_lshr_eq_shift_gt
-; CHECK-NEXT: ret i1 false
define i1 @nonexact_lshr_eq_shift_gt(i8 %a) {
- %shr = lshr i8 2, %a
- %cmp = icmp eq i8 %shr, 8
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_lshr_eq_shift_gt(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr i8 2, %a
+ %cmp = icmp eq i8 %shr, 8
+ ret i1 %cmp
}
-; CHECK-LABEL: @nonexact_lshr_ne_shift_gt
-; CHECK-NEXT: ret i1 true
define i1 @nonexact_lshr_ne_shift_gt(i8 %a) {
- %shr = ashr i8 2, %a
- %cmp = icmp ne i8 %shr, 8
- ret i1 %cmp
+; CHECK-LABEL: @nonexact_lshr_ne_shift_gt(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr i8 2, %a
+ %cmp = icmp ne i8 %shr, 8
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_ashr_ne_noexactlog
-; CHECK-NEXT: ret i1 true
define i1 @exact_ashr_ne_noexactlog(i8 %a) {
- %shr = ashr exact i8 -90, %a
- %cmp = icmp ne i8 %shr, -30
- ret i1 %cmp
+; CHECK-LABEL: @exact_ashr_ne_noexactlog(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = ashr exact i8 -90, %a
+ %cmp = icmp ne i8 %shr, -30
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_eq_noexactlog
-; CHECK-NEXT: ret i1 false
define i1 @exact_lshr_eq_noexactlog(i8 %a) {
- %shr = lshr exact i8 90, %a
- %cmp = icmp eq i8 %shr, 30
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_eq_noexactlog(
+; CHECK-NEXT: ret i1 false
+;
+ %shr = lshr exact i8 90, %a
+ %cmp = icmp eq i8 %shr, 30
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_ne_noexactlog
-; CHECK-NEXT: ret i1 true
define i1 @exact_lshr_ne_noexactlog(i8 %a) {
- %shr = lshr exact i8 90, %a
- %cmp = icmp ne i8 %shr, 30
- ret i1 %cmp
+; CHECK-LABEL: @exact_lshr_ne_noexactlog(
+; CHECK-NEXT: ret i1 true
+;
+ %shr = lshr exact i8 90, %a
+ %cmp = icmp ne i8 %shr, 30
+ ret i1 %cmp
}
-; CHECK-LABEL: @exact_lshr_lowbit
-; CHECK-NEXT: ret i32 7
define i32 @exact_lshr_lowbit(i32 %shiftval) {
+; CHECK-LABEL: @exact_lshr_lowbit(
+; CHECK-NEXT: ret i32 7
+;
%shr = lshr exact i32 7, %shiftval
ret i32 %shr
}
-; CHECK-LABEL: @exact_ashr_lowbit
-; CHECK-NEXT: ret i32 7
define i32 @exact_ashr_lowbit(i32 %shiftval) {
+; CHECK-LABEL: @exact_ashr_lowbit(
+; CHECK-NEXT: ret i32 7
+;
%shr = ashr exact i32 7, %shiftval
ret i32 %shr
}
+
+define i32 @ashr_zero(i32 %shiftval) {
+; CHECK-LABEL: @ashr_zero(
+; CHECK-NEXT: ret i32 0
+;
+ %shr = ashr i32 0, %shiftval
+ ret i32 %shr
+}
+
+define i257 @ashr_minus1(i257 %shiftval) {
+; CHECK-LABEL: @ashr_minus1(
+; CHECK-NEXT: ret i257 -1
+;
+ %shr = ashr i257 -1, %shiftval
+ ret i257 %shr
+}
+
+define <2 x i4097> @ashr_zero_vec(<2 x i4097> %shiftval) {
+; CHECK-LABEL: @ashr_zero_vec(
+; CHECK-NEXT: ret <2 x i4097> zeroinitializer
+;
+ %shr = ashr <2 x i4097> zeroinitializer, %shiftval
+ ret <2 x i4097> %shr
+}
+
+define <2 x i64> @ashr_minus1_vec(<2 x i64> %shiftval) {
+; CHECK-LABEL: @ashr_minus1_vec(
+; CHECK-NEXT: ret <2 x i64> <i64 -1, i64 -1>
+;
+ %shr = ashr <2 x i64> <i64 -1, i64 -1>, %shiftval
+ ret <2 x i64> %shr
+}
+
+define <2 x i4> @ashr_zero_minus1_vec(<2 x i4> %shiftval) {
+; CHECK-LABEL: @ashr_zero_minus1_vec(
+; CHECK-NEXT: ret <2 x i4> <i4 0, i4 -1>
+;
+ %shr = ashr <2 x i4> <i4 0, i4 -1>, %shiftval
+ ret <2 x i4> %shr
+}
+
diff --git a/test/Transforms/InstSimplify/undef.ll b/test/Transforms/InstSimplify/undef.ll
index d75dc364243c..b92184bb6882 100644
--- a/test/Transforms/InstSimplify/undef.ll
+++ b/test/Transforms/InstSimplify/undef.ll
@@ -1,281 +1,347 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt -instsimplify -S < %s | FileCheck %s
-; @test0
-; CHECK: ret i64 undef
define i64 @test0() {
+; CHECK-LABEL: @test0(
+; CHECK: ret i64 undef
+;
%r = mul i64 undef, undef
ret i64 %r
}
-; @test1
-; CHECK: ret i64 undef
define i64 @test1() {
+; CHECK-LABEL: @test1(
+; CHECK: ret i64 undef
+;
%r = mul i64 3, undef
ret i64 %r
}
-; @test2
-; CHECK: ret i64 undef
define i64 @test2() {
+; CHECK-LABEL: @test2(
+; CHECK: ret i64 undef
+;
%r = mul i64 undef, 3
ret i64 %r
}
-; @test3
-; CHECK: ret i64 0
define i64 @test3() {
+; CHECK-LABEL: @test3(
+; CHECK: ret i64 0
+;
%r = mul i64 undef, 6
ret i64 %r
}
-; @test4
-; CHECK: ret i64 0
define i64 @test4() {
+; CHECK-LABEL: @test4(
+; CHECK: ret i64 0
+;
%r = mul i64 6, undef
ret i64 %r
}
-; @test5
-; CHECK: ret i64 undef
define i64 @test5() {
+; CHECK-LABEL: @test5(
+; CHECK: ret i64 undef
+;
%r = and i64 undef, undef
ret i64 %r
}
-; @test6
-; CHECK: ret i64 undef
define i64 @test6() {
+; CHECK-LABEL: @test6(
+; CHECK: ret i64 undef
+;
%r = or i64 undef, undef
ret i64 %r
}
-; @test7
-; CHECK: ret i64 undef
define i64 @test7() {
+; CHECK-LABEL: @test7(
+; CHECK: ret i64 undef
+;
%r = udiv i64 undef, 1
ret i64 %r
}
-; @test8
-; CHECK: ret i64 undef
define i64 @test8() {
+; CHECK-LABEL: @test8(
+; CHECK: ret i64 undef
+;
%r = sdiv i64 undef, 1
ret i64 %r
}
-; @test9
-; CHECK: ret i64 0
define i64 @test9() {
+; CHECK-LABEL: @test9(
+; CHECK: ret i64 0
+;
%r = urem i64 undef, 1
ret i64 %r
}
-; @test10
-; CHECK: ret i64 0
define i64 @test10() {
+; CHECK-LABEL: @test10(
+; CHECK: ret i64 0
+;
%r = srem i64 undef, 1
ret i64 %r
}
-; @test11
-; CHECK: ret i64 undef
define i64 @test11() {
+; CHECK-LABEL: @test11(
+; CHECK: ret i64 undef
+;
%r = shl i64 undef, undef
ret i64 %r
}
-; @test11b
-; CHECK: ret i64 undef
define i64 @test11b(i64 %a) {
+; CHECK-LABEL: @test11b(
+; CHECK: ret i64 undef
+;
%r = shl i64 %a, undef
ret i64 %r
}
-; @test12
-; CHECK: ret i64 undef
define i64 @test12() {
+; CHECK-LABEL: @test12(
+; CHECK: ret i64 undef
+;
%r = ashr i64 undef, undef
ret i64 %r
}
-; @test12b
-; CHECK: ret i64 undef
define i64 @test12b(i64 %a) {
+; CHECK-LABEL: @test12b(
+; CHECK: ret i64 undef
+;
%r = ashr i64 %a, undef
ret i64 %r
}
-; @test13
-; CHECK: ret i64 undef
define i64 @test13() {
+; CHECK-LABEL: @test13(
+; CHECK: ret i64 undef
+;
%r = lshr i64 undef, undef
ret i64 %r
}
-; @test13b
-; CHECK: ret i64 undef
define i64 @test13b(i64 %a) {
+; CHECK-LABEL: @test13b(
+; CHECK: ret i64 undef
+;
%r = lshr i64 %a, undef
ret i64 %r
}
-; @test14
-; CHECK: ret i1 undef
define i1 @test14() {
+; CHECK-LABEL: @test14(
+; CHECK: ret i1 undef
+;
%r = icmp slt i64 undef, undef
ret i1 %r
}
-; @test15
-; CHECK: ret i1 undef
define i1 @test15() {
+; CHECK-LABEL: @test15(
+; CHECK: ret i1 undef
+;
%r = icmp ult i64 undef, undef
ret i1 %r
}
-; @test16
-; CHECK: ret i64 undef
define i64 @test16(i64 %a) {
+; CHECK-LABEL: @test16(
+; CHECK: ret i64 undef
+;
%r = select i1 undef, i64 %a, i64 undef
ret i64 %r
}
-; @test17
-; CHECK: ret i64 undef
define i64 @test17(i64 %a) {
+; CHECK-LABEL: @test17(
+; CHECK: ret i64 undef
+;
%r = select i1 undef, i64 undef, i64 %a
ret i64 %r
}
-; @test18
-; CHECK: ret i64 undef
define i64 @test18(i64 %a) {
+; CHECK-LABEL: @test18(
+; CHECK: [[R:%.*]] = call i64 undef(i64 %a)
+; CHECK-NEXT: ret i64 undef
+;
%r = call i64 (i64) undef(i64 %a)
ret i64 %r
}
-; CHECK-LABEL: @test19
-; CHECK: ret <4 x i8> undef
define <4 x i8> @test19(<4 x i8> %a) {
+; CHECK-LABEL: @test19(
+; CHECK: ret <4 x i8> undef
+;
%b = shl <4 x i8> %a, <i8 8, i8 9, i8 undef, i8 -1>
ret <4 x i8> %b
}
-; CHECK-LABEL: @test20
-; CHECK: ret i32 undef
define i32 @test20(i32 %a) {
+; CHECK-LABEL: @test20(
+; CHECK: ret i32 undef
+;
%b = udiv i32 %a, 0
ret i32 %b
}
-; CHECK-LABEL: @test21
-; CHECK: ret i32 undef
define i32 @test21(i32 %a) {
+; CHECK-LABEL: @test21(
+; CHECK: ret i32 undef
+;
%b = sdiv i32 %a, 0
ret i32 %b
}
-; CHECK-LABEL: @test22
-; CHECK: ret i32 undef
define i32 @test22(i32 %a) {
+; CHECK-LABEL: @test22(
+; CHECK: ret i32 undef
+;
%b = ashr exact i32 undef, %a
ret i32 %b
}
-; CHECK-LABEL: @test23
-; CHECK: ret i32 undef
define i32 @test23(i32 %a) {
+; CHECK-LABEL: @test23(
+; CHECK: ret i32 undef
+;
%b = lshr exact i32 undef, %a
ret i32 %b
}
-; CHECK-LABEL: @test24
-; CHECK: ret i32 undef
define i32 @test24() {
+; CHECK-LABEL: @test24(
+; CHECK: ret i32 undef
+;
%b = udiv i32 undef, 0
ret i32 %b
}
-; CHECK-LABEL: @test25
-; CHECK: ret i32 undef
define i32 @test25() {
+; CHECK-LABEL: @test25(
+; CHECK: ret i32 undef
+;
%b = lshr i32 0, undef
ret i32 %b
}
-; CHECK-LABEL: @test26
-; CHECK: ret i32 undef
define i32 @test26() {
+; CHECK-LABEL: @test26(
+; CHECK: ret i32 undef
+;
%b = ashr i32 0, undef
ret i32 %b
}
-; CHECK-LABEL: @test27
-; CHECK: ret i32 undef
define i32 @test27() {
+; CHECK-LABEL: @test27(
+; CHECK: ret i32 undef
+;
%b = shl i32 0, undef
ret i32 %b
}
-; CHECK-LABEL: @test28
-; CHECK: ret i32 undef
define i32 @test28(i32 %a) {
+; CHECK-LABEL: @test28(
+; CHECK: ret i32 undef
+;
%b = shl nsw i32 undef, %a
ret i32 %b
}
-; CHECK-LABEL: @test29
-; CHECK: ret i32 undef
define i32 @test29(i32 %a) {
+; CHECK-LABEL: @test29(
+; CHECK: ret i32 undef
+;
%b = shl nuw i32 undef, %a
ret i32 %b
}
-; CHECK-LABEL: @test30
-; CHECK: ret i32 undef
define i32 @test30(i32 %a) {
+; CHECK-LABEL: @test30(
+; CHECK: ret i32 undef
+;
%b = shl nsw nuw i32 undef, %a
ret i32 %b
}
-; CHECK-LABEL: @test31
-; CHECK: ret i32 0
define i32 @test31(i32 %a) {
+; CHECK-LABEL: @test31(
+; CHECK: ret i32 0
+;
%b = shl i32 undef, %a
ret i32 %b
}
-; CHECK-LABEL: @test32
-; CHECK: ret i32 undef
define i32 @test32(i32 %a) {
+; CHECK-LABEL: @test32(
+; CHECK: ret i32 undef
+;
%b = shl i32 undef, 0
ret i32 %b
}
-; CHECK-LABEL: @test33
-; CHECK: ret i32 undef
define i32 @test33(i32 %a) {
+; CHECK-LABEL: @test33(
+; CHECK: ret i32 undef
+;
%b = ashr i32 undef, 0
ret i32 %b
}
-; CHECK-LABEL: @test34
-; CHECK: ret i32 undef
define i32 @test34(i32 %a) {
+; CHECK-LABEL: @test34(
+; CHECK: ret i32 undef
+;
%b = lshr i32 undef, 0
ret i32 %b
}
-; CHECK-LABEL: @test35
-; CHECK: ret i32 undef
define i32 @test35(<4 x i32> %V) {
+; CHECK-LABEL: @test35(
+; CHECK: ret i32 undef
+;
%b = extractelement <4 x i32> %V, i32 4
ret i32 %b
}
-; CHECK-LABEL: @test36
-; CHECK: ret i32 undef
define i32 @test36(i32 %V) {
+; CHECK-LABEL: @test36(
+; CHECK: ret i32 undef
+;
%b = extractelement <4 x i32> undef, i32 %V
ret i32 %b
}
+
+define i32 @test37() {
+; CHECK-LABEL: @test37(
+; CHECK: ret i32 undef
+;
+ %b = udiv i32 undef, undef
+ ret i32 %b
+}
+
+define i32 @test38(i32 %a) {
+; CHECK-LABEL: @test38(
+; CHECK: ret i32 undef
+;
+ %b = udiv i32 %a, undef
+ ret i32 %b
+}
+
+define i32 @test39() {
+; CHECK-LABEL: @test39(
+; CHECK: ret i32 undef
+;
+ %b = udiv i32 0, undef
+ ret i32 %b
+}
diff --git a/test/Transforms/InstSimplify/vec-cmp.ll b/test/Transforms/InstSimplify/vec-cmp.ll
new file mode 100644
index 000000000000..ca6361a18ac4
--- /dev/null
+++ b/test/Transforms/InstSimplify/vec-cmp.ll
@@ -0,0 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -instsimplify -S | FileCheck %s
+
+define <2 x i1> @nonzero_vec_splat(<2 x i32> %x) {
+; CHECK-LABEL: @nonzero_vec_splat(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %y = or <2 x i32> %x, <i32 1, i32 1>
+ %c = icmp eq <2 x i32> %y, zeroinitializer
+ ret <2 x i1> %c
+}
+
+define <2 x i1> @nonzero_vec_nonsplat(<2 x i32> %x) {
+; CHECK-LABEL: @nonzero_vec_nonsplat(
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
+ %y = or <2 x i32> %x, <i32 2, i32 1>
+ %c = icmp ne <2 x i32> %y, zeroinitializer
+ ret <2 x i1> %c
+}
+
+define <2 x i1> @nonzero_vec_undef_elt(<2 x i32> %x) {
+; CHECK-LABEL: @nonzero_vec_undef_elt(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %y = or <2 x i32> %x, <i32 undef, i32 1>
+ %c = icmp eq <2 x i32> %y, zeroinitializer
+ ret <2 x i1> %c
+}
+
+define <2 x i1> @may_be_zero_vec(<2 x i32> %x) {
+; CHECK-LABEL: @may_be_zero_vec(
+; CHECK-NEXT: [[Y:%.*]] = or <2 x i32> %x, <i32 0, i32 1>
+; CHECK-NEXT: [[C:%.*]] = icmp ne <2 x i32> [[Y]], zeroinitializer
+; CHECK-NEXT: ret <2 x i1> [[C]]
+;
+ %y = or <2 x i32> %x, <i32 0, i32 1>
+ %c = icmp ne <2 x i32> %y, zeroinitializer
+ ret <2 x i1> %c
+}
+
+; Multiplies of non-zero numbers are non-zero if there is no unsigned overflow.
+define <2 x i1> @nonzero_vec_mul_nuw(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @nonzero_vec_mul_nuw(
+; CHECK-NEXT: ret <2 x i1> zeroinitializer
+;
+ %xnz = or <2 x i32> %x, <i32 1, i32 2>
+ %ynz = or <2 x i32> %y, <i32 3, i32 undef>
+ %m = mul nuw <2 x i32> %xnz, %ynz
+ %c = icmp eq <2 x i32> %m, zeroinitializer
+ ret <2 x i1> %c
+}
+
+; Multiplies of non-zero numbers are non-zero if there is no signed overflow.
+define <2 x i1> @nonzero_vec_mul_nsw(<2 x i32> %x, <2 x i32> %y) {
+; CHECK-LABEL: @nonzero_vec_mul_nsw(
+; CHECK-NEXT: ret <2 x i1> <i1 true, i1 true>
+;
+ %xnz = or <2 x i32> %x, <i32 undef, i32 2>
+ %ynz = or <2 x i32> %y, <i32 3, i32 4>
+ %m = mul nsw <2 x i32> %xnz, %ynz
+ %c = icmp ne <2 x i32> %m, zeroinitializer
+ ret <2 x i1> %c
+}
+
diff --git a/test/Transforms/InstSimplify/vector_gep.ll b/test/Transforms/InstSimplify/vector_gep.ll
index 5c3062047c30..54887e99ee38 100644
--- a/test/Transforms/InstSimplify/vector_gep.ll
+++ b/test/Transforms/InstSimplify/vector_gep.ll
@@ -53,3 +53,12 @@ define <4 x i8*> @test5() {
; CHECK-LABEL: @test5
; CHECK-NEXT: ret <4 x i8*> getelementptr (i8, <4 x i8*> <i8* inttoptr (i64 1 to i8*), i8* inttoptr (i64 2 to i8*), i8* inttoptr (i64 3 to i8*), i8* inttoptr (i64 4 to i8*)>, <4 x i32> <i32 1, i32 1, i32 1, i32 1>)
}
+
+@v = global [24 x [42 x [3 x i32]]] zeroinitializer, align 16
+
+define <16 x i32*> @test6() {
+; CHECK-LABEL: @test6
+; CHECK-NEXT: ret <16 x i32*> getelementptr ([24 x [42 x [3 x i32]]], [24 x [42 x [3 x i32]]]* @v, <16 x i64> zeroinitializer, <16 x i64> zeroinitializer, <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, <16 x i64> zeroinitializer)
+ %VectorGep = getelementptr [24 x [42 x [3 x i32]]], [24 x [42 x [3 x i32]]]* @v, i64 0, i64 0, <16 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7, i64 8, i64 9, i64 10, i64 11, i64 12, i64 13, i64 14, i64 15>, i64 0
+ ret <16 x i32*> %VectorGep
+} \ No newline at end of file
diff --git a/test/Transforms/Internalize/stackguard.ll b/test/Transforms/Internalize/stackguard.ll
new file mode 100644
index 000000000000..e9dc6cc2365f
--- /dev/null
+++ b/test/Transforms/Internalize/stackguard.ll
@@ -0,0 +1,9 @@
+; __stack_chk_guard and __stack_chk_fail should not be internalized.
+; RUN: opt < %s -internalize -S | FileCheck %s
+; RUN: opt < %s -passes=internalize -S | FileCheck %s
+
+; CHECK: @__stack_chk_guard = hidden global [8 x i64] zeroinitializer, align 16
+@__stack_chk_guard = hidden global [8 x i64] zeroinitializer, align 16
+
+; CHECK: @__stack_chk_fail = hidden global [8 x i64] zeroinitializer, align 16
+@__stack_chk_fail = hidden global [8 x i64] zeroinitializer, align 16
diff --git a/test/Transforms/Internalize/used.ll b/test/Transforms/Internalize/used.ll
index 85b85acd5083..7c1c7413d462 100644
--- a/test/Transforms/Internalize/used.ll
+++ b/test/Transforms/Internalize/used.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -internalize -S | FileCheck %s
+; RUN: opt < %s -passes=internalize -S | FileCheck %s
@llvm.used = appending global [1 x void ()*] [void ()* @f], section "llvm.metadata"
diff --git a/test/Transforms/JumpThreading/basic.ll b/test/Transforms/JumpThreading/basic.ll
index 46c92bc1f577..14cd1fbe1c87 100644
--- a/test/Transforms/JumpThreading/basic.ll
+++ b/test/Transforms/JumpThreading/basic.ll
@@ -476,6 +476,40 @@ exit1:
; CHECK: }
}
+;;; Verify that we can handle constraint propagation through cast.
+define i32 @test16(i1 %cond) {
+Entry:
+; CHECK-LABEL: @test16(
+ br i1 %cond, label %Merge, label %F1
+
+; CHECK: Entry:
+; CHECK-NEXT: br i1 %cond, label %F2, label %Merge
+
+F1:
+ %v1 = call i32 @f1()
+ br label %Merge
+
+Merge:
+ %B = phi i32 [0, %Entry], [%v1, %F1]
+ %M = icmp eq i32 %B, 0
+ %M1 = zext i1 %M to i32
+ %N = icmp eq i32 %M1, 0
+ br i1 %N, label %T2, label %F2
+
+; CHECK: Merge:
+; CHECK-NOT: phi
+; CHECK-NEXT: %v1 = call i32 @f1()
+
+T2:
+ %Q = call i32 @f2()
+ ret i32 %Q
+
+F2:
+ ret i32 %B
+; CHECK: F2:
+; CHECK-NEXT: phi i32
+}
+
; In this test we check that block duplication is inhibited by the presence
; of a function with the 'noduplicate' attribute.
diff --git a/test/Transforms/JumpThreading/crash-assertingvh.ll b/test/Transforms/JumpThreading/crash-assertingvh.ll
new file mode 100644
index 000000000000..e78431992239
--- /dev/null
+++ b/test/Transforms/JumpThreading/crash-assertingvh.ll
@@ -0,0 +1,19 @@
+; RUN: opt -disable-output < %s -passes='module(function(jump-threading),globaldce)'
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare i32 @bar()
+
+define internal i32 @foo() {
+entry:
+ %call4 = call i32 @bar()
+ %cmp5 = icmp eq i32 %call4, 0
+ br i1 %cmp5, label %if.then6, label %if.end8
+
+if.then6:
+ ret i32 0
+
+if.end8:
+ ret i32 1
+}
diff --git a/test/Transforms/JumpThreading/implied-cond.ll b/test/Transforms/JumpThreading/implied-cond.ll
index 3d1717e91261..6da05791456d 100644
--- a/test/Transforms/JumpThreading/implied-cond.ll
+++ b/test/Transforms/JumpThreading/implied-cond.ll
@@ -96,3 +96,82 @@ define void @test2(i32 %i, i32 %len, i1* %c.ptr) {
call void @side_effect(i32 %t)
ret void
}
+
+; A s<= B implies A s> B is false.
+; CHECK-LABEL: @test3(
+; CHECK: entry:
+; CHECK: br i1 %cmp, label %if.end, label %if.end3
+; CHECK-NOT: br i1 %cmp1, label %if.then2, label %if.end
+; CHECK-NOT: call void @side_effect(i32 0)
+; CHECK: br label %if.end3
+; CHECK: ret void
+
+define void @test3(i32 %a, i32 %b) {
+entry:
+ %cmp = icmp sle i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.end3
+
+if.then:
+ %cmp1 = icmp sgt i32 %a, %b
+ br i1 %cmp1, label %if.then2, label %if.end
+
+if.then2:
+ call void @side_effect(i32 0)
+ br label %if.end
+
+if.end:
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+declare void @is(i1)
+
+; If A >=s B is false then A <=s B is implied true.
+; CHECK-LABEL: @test_sge_sle
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_sge_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp sge i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A <=s B is false then A <=s B is implied false.
+; CHECK-LABEL: @test_sle_sle
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_sle_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp sle i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
diff --git a/test/Transforms/JumpThreading/induction.ll b/test/Transforms/JumpThreading/induction.ll
new file mode 100644
index 000000000000..714c28d1443a
--- /dev/null
+++ b/test/Transforms/JumpThreading/induction.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -jump-threading < %s | FileCheck %s
+
+define i8 @test(i32 %a, i32 %length) {
+; CHECK-LABEL: @test
+entry:
+; CHECK: br label %backedge
+ br label %loop
+
+loop:
+; CHECK-LABEL: backedge:
+; CHECK: phi i32
+; CHECK: br i1 %cont, label %backedge, label %exit
+ %iv = phi i32 [0, %entry], [%iv.next, %backedge]
+ ;; We can use an inductive argument to prove %iv is always positive
+ %cnd = icmp sge i32 %iv, 0
+ br i1 %cnd, label %backedge, label %exit
+
+backedge:
+ %iv.next = add nsw i32 %iv, 1
+ %cont = icmp slt i32 %iv.next, 400
+ br i1 %cont, label %loop, label %exit
+exit:
+ ret i8 0
+}
+
diff --git a/test/Transforms/JumpThreading/pr26096.ll b/test/Transforms/JumpThreading/pr26096.ll
index 2671e82b6177..096d43e24d2c 100644
--- a/test/Transforms/JumpThreading/pr26096.ll
+++ b/test/Transforms/JumpThreading/pr26096.ll
@@ -10,19 +10,24 @@ entry:
br i1 %B, label %if.end, label %if.then
if.then: ; preds = %entry
- call void @fn2()
+ call void @fn2(i1 %B)
ret void
if.end: ; preds = %entry
- call void @fn2()
+ call void @fn2(i1 %B)
ret void
}
-define internal void @fn2() unnamed_addr {
+define internal void @fn2(i1 %B) unnamed_addr {
entry:
call void @fn1()
call void @fn1()
call void @fn1()
+ br i1 %B, label %if.end, label %if.then
+if.then:
+ unreachable
+
+if.end:
unreachable
}
diff --git a/test/Transforms/JumpThreading/thread-loads.ll b/test/Transforms/JumpThreading/thread-loads.ll
index f76c1ec51857..4b482cb15f9a 100644
--- a/test/Transforms/JumpThreading/thread-loads.ll
+++ b/test/Transforms/JumpThreading/thread-loads.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -jump-threading -S | FileCheck %s
+; RUN: opt < %s -passes=jump-threading -S | FileCheck %s
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
target triple = "i386-apple-darwin7"
@@ -106,6 +107,145 @@ return:
ret i32 13
}
+define i32 @test4(i32* %P) {
+; CHECK-LABEL: @test4(
+entry:
+ %v0 = tail call i32 (...) @f1()
+ %v1 = icmp eq i32 %v0, 0
+ br i1 %v1, label %bb1, label %bb
+
+bb:
+; CHECK: bb1.thread:
+; CHECK: store atomic
+; CHECK: br label %bb3
+ store atomic i32 42, i32* %P unordered, align 4
+ br label %bb1
+
+bb1:
+; CHECK: bb1:
+; CHECK-NOT: phi
+; CHECK: load atomic
+ %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+ %v2 = load atomic i32, i32* %P unordered, align 4
+ %v3 = icmp sgt i32 %v2, 36
+ br i1 %v3, label %bb3, label %bb2
+
+bb2:
+ %v4 = tail call i32 (...) @f2()
+ ret i32 %res.0
+
+bb3:
+ ret i32 %res.0
+}
+
+define i32 @test5(i32* %P) {
+; Negative test
+
+; CHECK-LABEL: @test5(
+entry:
+ %v0 = tail call i32 (...) @f1()
+ %v1 = icmp eq i32 %v0, 0
+ br i1 %v1, label %bb1, label %bb
+
+bb:
+; CHECK: bb:
+; CHECK-NEXT: store atomic i32 42, i32* %P release, align 4
+; CHECK-NEXT: br label %bb1
+ store atomic i32 42, i32* %P release, align 4
+ br label %bb1
+
+bb1:
+; CHECK: bb1:
+; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4
+; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36
+; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2
+
+ %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+ %v2 = load atomic i32, i32* %P acquire, align 4
+ %v3 = icmp sgt i32 %v2, 36
+ br i1 %v3, label %bb3, label %bb2
+
+bb2:
+ %v4 = tail call i32 (...) @f2()
+ ret i32 %res.0
+
+bb3:
+ ret i32 %res.0
+}
+
+define i32 @test6(i32* %P) {
+; Negative test
+
+; CHECK-LABEL: @test6(
+entry:
+ %v0 = tail call i32 (...) @f1()
+ %v1 = icmp eq i32 %v0, 0
+ br i1 %v1, label %bb1, label %bb
+
+bb:
+; CHECK: bb:
+; CHECK-NEXT: store i32 42, i32* %P
+; CHECK-NEXT: br label %bb1
+ store i32 42, i32* %P
+ br label %bb1
+
+bb1:
+; CHECK: bb1:
+; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4
+; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36
+; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2
+
+ %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+ %v2 = load atomic i32, i32* %P acquire, align 4
+ %v3 = icmp sgt i32 %v2, 36
+ br i1 %v3, label %bb3, label %bb2
+
+bb2:
+ %v4 = tail call i32 (...) @f2()
+ ret i32 %res.0
+
+bb3:
+ ret i32 %res.0
+}
+
+define i32 @test7(i32* %P) {
+; Negative test
+
+; CHECK-LABEL: @test7(
+entry:
+ %v0 = tail call i32 (...) @f1()
+ %v1 = icmp eq i32 %v0, 0
+ br i1 %v1, label %bb1, label %bb
+
+bb:
+; CHECK: bb:
+; CHECK-NEXT: %val = load i32, i32* %P
+; CHECK-NEXT: br label %bb1
+ %val = load i32, i32* %P
+ br label %bb1
+
+bb1:
+; CHECK: bb1:
+; CHECK-NEXT: %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+; CHECK-NEXT: %v2 = load atomic i32, i32* %P acquire, align 4
+; CHECK-NEXT: %v3 = icmp sgt i32 %v2, 36
+; CHECK-NEXT: br i1 %v3, label %bb3, label %bb2
+
+ %res.0 = phi i32 [ 1, %bb ], [ 0, %entry ]
+ %v2 = load atomic i32, i32* %P acquire, align 4
+ %v3 = icmp sgt i32 %v2, 36
+ br i1 %v3, label %bb3, label %bb2
+
+bb2:
+ %v4 = tail call i32 (...) @f2()
+ ret i32 %res.0
+
+bb3:
+ ret i32 %res.0
+}
+
!0 = !{!3, !3, i64 0}
!1 = !{!"omnipotent char", !2}
!2 = !{!"Simple C/C++ TBAA", null}
diff --git a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
index a6abfa5f3c4d..773cd890f89b 100644
--- a/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
+++ b/test/Transforms/LCSSA/2006-06-03-IncorrectIDFPhis.ll
@@ -1,15 +1,16 @@
-; RUN: opt < %s -loop-simplify -lcssa -S | \
-; RUN: grep "%%SJE.0.0.lcssa = phi .struct.SetJmpMapEntry"
+; RUN: opt < %s -loop-simplify -lcssa -S | FileCheck %s
%struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
define void @__llvm_sjljeh_try_catching_longjmp_exception() {
+; CHECK-LABEL: @__llvm_sjljeh_try_catching_longjmp_exception
entry:
br i1 false, label %UnifiedReturnBlock, label %no_exit
no_exit: ; preds = %endif, %entry
%SJE.0.0 = phi %struct.SetJmpMapEntry* [ %tmp.24, %endif ], [ null, %entry ] ; <%struct.SetJmpMapEntry*> [#uses=1]
br i1 false, label %then, label %endif
then: ; preds = %no_exit
+; CHECK: %SJE.0.0.lcssa = phi %struct.SetJmpMapEntry
%tmp.20 = getelementptr %struct.SetJmpMapEntry, %struct.SetJmpMapEntry* %SJE.0.0, i32 0, i32 1 ; <i32*> [#uses=0]
ret void
endif: ; preds = %no_exit
diff --git a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
index 575f8163c94b..5f9fd2633c8d 100644
--- a/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
+++ b/test/Transforms/LCSSA/2006-06-12-MultipleExitsSameBlock.ll
@@ -1,13 +1,12 @@
-; RUN: opt < %s -lcssa -S | \
-; RUN: grep "%X.1.lcssa"
-; RUN: opt < %s -lcssa -S | \
-; RUN: not grep "%X.1.lcssa1"
+; RUN: opt < %s -lcssa -S | FileCheck %s
+; RUN: opt < %s -passes=lcssa -S | FileCheck %s
declare i1 @c1()
declare i1 @c2()
define i32 @foo() {
+; CHECK-LABEL: @foo
entry:
br label %loop_begin
loop_begin: ; preds = %loop_body.2, %entry
@@ -20,8 +19,10 @@ loop_body.2: ; preds = %loop_body.1
%rel.2 = call i1 @c2( ) ; <i1> [#uses=1]
br i1 %rel.2, label %loop_exit, label %loop_begin
loop_exit: ; preds = %loop_body.2, %loop_body.1
+; CHECK: %X.1.lcssa = phi
ret i32 %X.1
loop_exit2: ; preds = %loop_begin
ret i32 1
+; CHECK-NOT: %X.1.lcssa1
}
diff --git a/test/Transforms/LCSSA/2006-07-09-NoDominator.ll b/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
index bc3d150fbdfe..786744401d09 100644
--- a/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
+++ b/test/Transforms/LCSSA/2006-07-09-NoDominator.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -lcssa
+; RUN: opt < %s -passes=lcssa
%struct.SetJmpMapEntry = type { i8*, i32, %struct.SetJmpMapEntry* }
diff --git a/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll b/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
index ecb1be5c674e..66760c565b83 100644
--- a/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
+++ b/test/Transforms/LCSSA/2006-10-31-UnreachableBlock.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -lcssa -disable-output
+; RUN: opt < %s -passes=lcssa -disable-output
; PR977
; END.
diff --git a/test/Transforms/LCSSA/basictest.ll b/test/Transforms/LCSSA/basictest.ll
index 4b05ad995305..e13c244d9e0b 100644
--- a/test/Transforms/LCSSA/basictest.ll
+++ b/test/Transforms/LCSSA/basictest.ll
@@ -1,9 +1,8 @@
-; RUN: opt < %s -lcssa -S | \
-; RUN: grep "X3.lcssa = phi i32"
-; RUN: opt < %s -lcssa -S | \
-; RUN: grep "X4 = add i32 3, %X3.lcssa"
+; RUN: opt < %s -lcssa -S | FileCheck %s
+; RUN: opt < %s -passes=lcssa -S | FileCheck %s
define void @lcssa(i1 %S2) {
+; CHECK-LABEL: @lcssa
entry:
br label %loop.interior
loop.interior: ; preds = %post.if, %entry
@@ -18,6 +17,8 @@ post.if: ; preds = %if.false, %if.true
%X3 = phi i32 [ %X1, %if.true ], [ %X2, %if.false ] ; <i32> [#uses=1]
br i1 %S2, label %loop.exit, label %loop.interior
loop.exit: ; preds = %post.if
+; CHECK: %X3.lcssa = phi i32
+; CHECK: %X4 = add i32 3, %X3.lcssa
%X4 = add i32 3, %X3 ; <i32> [#uses=0]
ret void
}
diff --git a/test/Transforms/LCSSA/invoke-dest.ll b/test/Transforms/LCSSA/invoke-dest.ll
index 1523d4ff1f64..05a0e2a5e959 100644
--- a/test/Transforms/LCSSA/invoke-dest.ll
+++ b/test/Transforms/LCSSA/invoke-dest.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -lcssa
+; RUN: opt < %s -passes=lcssa
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
diff --git a/test/Transforms/LCSSA/mixed-catch.ll b/test/Transforms/LCSSA/mixed-catch.ll
index 95d5b17bf081..1ae4cf8cdd0f 100644
--- a/test/Transforms/LCSSA/mixed-catch.ll
+++ b/test/Transforms/LCSSA/mixed-catch.ll
@@ -1,4 +1,5 @@
; RUN: opt -lcssa -S < %s | FileCheck %s
+; RUN: opt -passes=lcssa -S < %s | FileCheck %s
; This test is based on the following C++ code:
;
diff --git a/test/Transforms/LCSSA/unused-phis.ll b/test/Transforms/LCSSA/unused-phis.ll
index 01b214b8e36c..2c503f32713f 100644
--- a/test/Transforms/LCSSA/unused-phis.ll
+++ b/test/Transforms/LCSSA/unused-phis.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -lcssa -S | FileCheck %s
+; RUN: opt < %s -passes=lcssa -S | FileCheck %s
; CHECK: exit1:
; CHECK: .lcssa =
; CHECK: exit2:
diff --git a/test/Transforms/LICM/AliasSetMemSet.ll b/test/Transforms/LICM/AliasSetMemSet.ll
new file mode 100644
index 000000000000..d60b321e278e
--- /dev/null
+++ b/test/Transforms/LICM/AliasSetMemSet.ll
@@ -0,0 +1,51 @@
+; RUN: opt < %s -loop-deletion -licm -loop-idiom -disable-output
+; Check no assertion when loop-idiom deletes the MemSet already analyzed by licm
+define void @set_array() {
+ br i1 false, label %bb3.preheader.lr.ph, label %bb9
+
+bb3.preheader.lr.ph: ; preds = %0
+ br label %bb3.preheader
+
+bb4: ; preds = %bb4.lr.ph, %bb7
+ %j.3.06 = phi i8 [ %j.3.17, %bb4.lr.ph ], [ %_tmp13, %bb7 ]
+ br label %bb6
+
+bb6: ; preds = %bb4, %bb6
+ %k.4.04 = phi i8 [ 0, %bb4 ], [ %_tmp9, %bb6 ]
+ %_tmp31 = sext i8 %j.3.06 to i64
+ %_tmp4 = mul i64 %_tmp31, 10
+ %_tmp5 = getelementptr i8, i8* undef, i64 %_tmp4
+ %_tmp7 = getelementptr i8, i8* %_tmp5, i8 %k.4.04
+ store i8 42, i8* %_tmp7
+ %_tmp9 = add i8 %k.4.04, 1
+ %_tmp11 = icmp slt i8 %_tmp9, 10
+ br i1 %_tmp11, label %bb6, label %bb7
+
+bb7: ; preds = %bb6
+ %_tmp13 = add i8 %j.3.06, 1
+ %_tmp15 = icmp slt i8 %_tmp13, 2
+ br i1 %_tmp15, label %bb4, label %bb3.bb1.loopexit_crit_edge
+
+bb3.bb1.loopexit_crit_edge: ; preds = %bb7
+ %split = phi i8 [ %_tmp13, %bb7 ]
+ br label %bb1.loopexit
+
+bb1.loopexit: ; preds = %bb3.bb1.loopexit_crit_edge, %bb3.preheader
+ %j.3.0.lcssa = phi i8 [ %split, %bb3.bb1.loopexit_crit_edge ], [ %j.3.17, %bb3.preheader ]
+ br i1 false, label %bb3.preheader, label %bb1.bb9_crit_edge
+
+bb3.preheader: ; preds = %bb3.preheader.lr.ph, %bb1.loopexit
+ %j.3.17 = phi i8 [ undef, %bb3.preheader.lr.ph ], [ %j.3.0.lcssa, %bb1.loopexit ]
+ %_tmp155 = icmp slt i8 %j.3.17, 2
+ br i1 %_tmp155, label %bb4.lr.ph, label %bb1.loopexit
+
+bb4.lr.ph: ; preds = %bb3.preheader
+ br label %bb4
+
+bb1.bb9_crit_edge: ; preds = %bb1.loopexit
+ br label %bb9
+
+bb9: ; preds = %bb1.bb9_crit_edge, %0
+ ret void
+}
+
diff --git a/test/Transforms/LICM/alias-set-tracker-loss.ll b/test/Transforms/LICM/alias-set-tracker-loss.ll
new file mode 100644
index 000000000000..378d908f6987
--- /dev/null
+++ b/test/Transforms/LICM/alias-set-tracker-loss.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -licm -loop-unroll < %s
+;
+; This test contains a carefully rotated set of three nested loops. The middle
+; loop can be unrolled leaving one copy of the inner loop inside the outer
+; loop. Because of how LICM works, when this middle loop is unrolled and
+; removed, its alias set tracker is destroyed and no longer available when LICM
+; runs on the outer loop.
+
+define void @f() {
+entry:
+ br label %l1
+
+l2.l1.loopexit_crit_edge:
+ br label %l1.loopexit
+
+l1.loopexit:
+ br label %l1.backedge
+
+l1:
+ br i1 undef, label %l1.backedge, label %l2.preheader
+
+l1.backedge:
+ br label %l1
+
+l2.preheader:
+ br i1 true, label %l1.loopexit, label %l3.preheader.lr.ph
+
+l3.preheader.lr.ph:
+ br label %l3.preheader
+
+l2.loopexit:
+ br i1 true, label %l2.l1.loopexit_crit_edge, label %l3.preheader
+
+l3.preheader:
+ br label %l3
+
+l3:
+ br i1 true, label %l3, label %l2.loopexit
+}
diff --git a/test/Transforms/LICM/argmemonly-call.ll b/test/Transforms/LICM/argmemonly-call.ll
index e2640a1c8deb..18d7f8351dca 100644
--- a/test/Transforms/LICM/argmemonly-call.ll
+++ b/test/Transforms/LICM/argmemonly-call.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -basicaa -licm %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
declare i32 @foo() readonly argmemonly nounwind
declare i32 @foo2() readonly nounwind
declare i32 @bar(i32* %loc2) readonly argmemonly nounwind
diff --git a/test/Transforms/LICM/assume.ll b/test/Transforms/LICM/assume.ll
new file mode 100644
index 000000000000..f6369ac659f0
--- /dev/null
+++ b/test/Transforms/LICM/assume.ll
@@ -0,0 +1,52 @@
+; RUN: opt -licm -basicaa < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
+
+define void @f_0(i1 %p) nounwind ssp {
+; CHECK-LABEL: @f_0(
+entry:
+ br label %for.body
+
+for.body:
+ br i1 undef, label %if.then, label %for.cond.backedge
+
+for.cond.backedge:
+ br i1 undef, label %for.end104, label %for.body
+
+if.then:
+ br i1 undef, label %if.then27, label %if.end.if.end.split_crit_edge.critedge
+
+if.then27:
+; CHECK: tail call void @llvm.assume
+ tail call void @llvm.assume(i1 %p)
+ br label %for.body61.us
+
+if.end.if.end.split_crit_edge.critedge:
+ br label %for.body61
+
+for.body61.us:
+ br i1 undef, label %for.cond.backedge, label %for.body61.us
+
+for.body61:
+ br i1 undef, label %for.cond.backedge, label %for.body61
+
+for.end104:
+ ret void
+}
+
+define void @f_1(i1 %cond, i32* %ptr) {
+; CHECK-LABEL: @f_1(
+; CHECK: %val = load i32, i32* %ptr
+; CHECK-NEXT: br label %loop
+
+entry:
+ br label %loop
+
+loop:
+ %x = phi i32 [ 0, %entry ], [ %x.inc, %loop ]
+ call void @llvm.assume(i1 %cond)
+ %val = load i32, i32* %ptr
+ %x.inc = add i32 %x, %val
+ br label %loop
+}
+
+declare void @llvm.assume(i1)
diff --git a/test/Transforms/LICM/atomics.ll b/test/Transforms/LICM/atomics.ll
index 4fe197abf5d3..5dcd4bb8c05a 100644
--- a/test/Transforms/LICM/atomics.ll
+++ b/test/Transforms/LICM/atomics.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -basicaa -licm | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
; Check that we can hoist unordered loads
define i32 @test1(i32* nocapture %y) nounwind uwtable ssp {
diff --git a/test/Transforms/LICM/basictest.ll b/test/Transforms/LICM/basictest.ll
index 1dbb4dc6b499..570e226d2372 100644
--- a/test/Transforms/LICM/basictest.ll
+++ b/test/Transforms/LICM/basictest.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -licm | llvm-dis
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s | llvm-dis
define void @testfunc(i32 %i) {
; <label>:0
diff --git a/test/Transforms/LICM/constexpr.ll b/test/Transforms/LICM/constexpr.ll
index 506721f25f8f..726246776dc6 100644
--- a/test/Transforms/LICM/constexpr.ll
+++ b/test/Transforms/LICM/constexpr.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -S -basicaa -licm | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
; This fixes PR22460
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
diff --git a/test/Transforms/LICM/crash.ll b/test/Transforms/LICM/crash.ll
index 7fa41157338d..75c27b8def0c 100644
--- a/test/Transforms/LICM/crash.ll
+++ b/test/Transforms/LICM/crash.ll
@@ -1,4 +1,5 @@
; RUN: opt -licm -disable-output < %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -disable-output < %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LICM/debug-value.ll b/test/Transforms/LICM/debug-value.ll
index d8ae5e576641..ab77caa2bae0 100644
--- a/test/Transforms/LICM/debug-value.ll
+++ b/test/Transforms/LICM/debug-value.ll
@@ -1,4 +1,5 @@
; RUN: opt -licm -basicaa < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
define void @dgefa() nounwind ssp {
entry:
@@ -34,19 +35,18 @@ for.end104: ; preds = %for.cond.backedge
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
!llvm.module.flags = !{!26}
-!llvm.dbg.sp = !{!0, !6, !9, !10}
+!llvm.dbg.cu = !{!2}
-!0 = distinct !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "idamax", line: 112, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !25, scope: !1, type: !3)
!1 = !DIFile(filename: "/Volumes/Lalgate/work/llvm/projects/llvm-test/SingleSource/Benchmarks/CoyoteBench/lpbench.c", directory: "/private/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: 0, file: !25, enums: !8, retainedTypes: !8, subprograms: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127169)", isOptimized: true, emissionKind: FullDebug, file: !25)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = distinct !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
-!7 = !DISubroutineType(types: !8)
-!8 = !{null}
-!9 = distinct !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
-!10 = distinct !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !25, scope: !1, type: !7)
+!6 = distinct !DISubprogram(name: "dscal", line: 206, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !25, scope: !1, type: !7)
+!7 = !DISubroutineType(types: !{null})
+!9 = distinct !DISubprogram(name: "daxpy", line: 230, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !25, scope: !1, type: !7)
+!10 = distinct !DISubprogram(name: "dgefa", line: 267, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !25, scope: !1, type: !7)
!11 = !DILocation(line: 281, column: 9, scope: !12)
!12 = distinct !DILexicalBlock(line: 272, column: 5, file: !25, scope: !13)
!13 = distinct !DILexicalBlock(line: 271, column: 5, file: !25, scope: !14)
diff --git a/test/Transforms/LICM/extra-copies.ll b/test/Transforms/LICM/extra-copies.ll
index ef52f9f404c1..84a3bc9ec6a6 100644
--- a/test/Transforms/LICM/extra-copies.ll
+++ b/test/Transforms/LICM/extra-copies.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -licm -S | FileCheck %s
+; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
; PR19835
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LICM/funclet.ll b/test/Transforms/LICM/funclet.ll
index ef4be2969151..9bdc6dbcde88 100644
--- a/test/Transforms/LICM/funclet.ll
+++ b/test/Transforms/LICM/funclet.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -licm -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s -S | FileCheck %s
target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
target triple = "i386-pc-windows-msvc18.0.0"
diff --git a/test/Transforms/LICM/hoist-bitcast-load.ll b/test/Transforms/LICM/hoist-bitcast-load.ll
index 47c474c17dde..5752aecde387 100644
--- a/test/Transforms/LICM/hoist-bitcast-load.ll
+++ b/test/Transforms/LICM/hoist-bitcast-load.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LICM/hoist-deref-load.ll b/test/Transforms/LICM/hoist-deref-load.ll
index fd10c5d7503d..ed6ec7694d3c 100644
--- a/test/Transforms/LICM/hoist-deref-load.ll
+++ b/test/Transforms/LICM/hoist-deref-load.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='loop-simplify,require<aa>,require<targetir>,require<scalar-evolution>,loop(simplify-cfg,licm)' -S < %s | FileCheck %s
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@@ -432,5 +433,127 @@ for.end: ; preds = %for.inc, %entry
ret void
}
+define void @test11(i32* noalias %a, i32* %b, i32** dereferenceable(8) %cptr, i32 %n) #0 {
+; CHECK-LABEL: @test11(
+entry:
+ %cmp11 = icmp sgt i32 %n, 0
+ br i1 %cmp11, label %for.body, label %for.end
+
+; CHECK: for.body.preheader:
+; CHECK: %c = load i32*, i32** %cptr, !dereferenceable !0
+; CHECK: %d = load i32, i32* %c, align 4
+
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ %c = load i32*, i32** %cptr, !dereferenceable !0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %d = load i32, i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %e = load i32, i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %e, %d
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry
+ ret void
+}
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @test12(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n) #0 {
+; Prove non-null ness of %c via a guard, not a branch.
+
+; CHECK-LABEL: @test12(
+entry:
+ %not_null = icmp ne i32* %c, null
+ call void(i1, ...) @llvm.experimental.guard(i1 %not_null) [ "deopt"() ]
+ %cmp11 = icmp sgt i32 %n, 0
+ br i1 %cmp11, label %for.body, label %for.end
+
+; CHECK: for.body.preheader:
+; CHECK-NEXT: [[VAL:%[^ ]]] = load i32, i32* %c, align 4
+; CHECK-NEXT: br label %for.body
+
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ %1 = load i32, i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry, %entry
+ ret void
+}
+
+define void @test13(i32* noalias %a, i32* %b, i32* dereferenceable_or_null(4) %c, i32 %n) #0 {
+; Like @test12, but has a post-dominating guard, which cannot be used
+; to prove %c is nonnull at the point of the load.
+
+; CHECK-LABEL: @test13(
+entry:
+ %not_null = icmp ne i32* %c, null
+ %cmp11 = icmp sgt i32 %n, 0
+ br i1 %cmp11, label %for.body, label %for.end
+
+; CHECK: for.body.preheader:
+; CHECK-NOT: load i32, i32* %c
+; CHECK: br label %for.body
+
+for.body: ; preds = %entry, %for.inc
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %0, 0
+ br i1 %cmp1, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+; CHECK: if.then:
+; CHECK: load i32, i32* %c
+; CHECK: br label %for.inc
+ %1 = load i32, i32* %c, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx3, align 4
+ %mul = mul nsw i32 %2, %1
+ store i32 %mul, i32* %arrayidx, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc, %entry, %entry
+ call void(i1, ...) @llvm.experimental.guard(i1 %not_null) [ "deopt"() ]
+ ret void
+}
+
attributes #0 = { nounwind uwtable }
!0 = !{i64 4}
diff --git a/test/Transforms/LICM/hoist-nounwind.ll b/test/Transforms/LICM/hoist-nounwind.ll
new file mode 100644
index 000000000000..081729f808bf
--- /dev/null
+++ b/test/Transforms/LICM/hoist-nounwind.ll
@@ -0,0 +1,72 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='lcssa,require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @f() nounwind
+
+; Don't hoist load past nounwind call.
+define i32 @test1(i32* noalias nocapture readonly %a) nounwind uwtable {
+; CHECK-LABEL: @test1(
+entry:
+ br label %for.body
+
+; CHECK: tail call void @f()
+; CHECK-NEXT: load i32
+for.body:
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ tail call void @f() nounwind
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %x.05
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+}
+
+; Don't hoist division past nounwind call.
+define i32 @test2(i32 %N, i32 %c) nounwind uwtable {
+; CHECK-LABEL: @test2(
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body, label %for.cond.cleanup
+
+; CHECK: tail call void @f()
+; CHECK-NEXT: sdiv i32
+for.body:
+ %i.05 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ tail call void @f() nounwind
+ %div = sdiv i32 5, %c
+ %add = add i32 %i.05, 1
+ %inc = add i32 %add, %div
+ %cmp = icmp slt i32 %inc, %N
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret i32 0
+}
+
+; Don't hoist load past volatile load.
+define i32 @test3(i32* noalias nocapture readonly %a, i32* %v) nounwind uwtable {
+; CHECK-LABEL: @test3(
+entry:
+ br label %for.body
+
+; CHECK: load volatile i32
+; CHECK-NEXT: load i32
+for.body:
+ %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %x.05 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %xxx = load volatile i32, i32* %v, align 4
+ %i1 = load i32, i32* %a, align 4
+ %add = add nsw i32 %i1, %x.05
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, 1000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret i32 %add
+}
diff --git a/test/Transforms/LICM/hoist-round.ll b/test/Transforms/LICM/hoist-round.ll
new file mode 100644
index 000000000000..a87709b810d2
--- /dev/null
+++ b/test/Transforms/LICM/hoist-round.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+
+target datalayout = "E-m:e-p:32:32-i8:8:8-i16:16:16-i64:32:32-f64:32:32-v64:32:32-v128:32:32-a0:0:32-n32"
+
+; This test verifies that ceil, floor, nearbyint, trunc, rint, round,
+; copysign, minnum, maxnum and fabs intrinsics are considered safe
+; to speculate.
+
+; CHECK-LABEL: @test
+; CHECK: call float @llvm.ceil.f32
+; CHECK: call float @llvm.floor.f32
+; CHECK: call float @llvm.nearbyint.f32
+; CHECK: call float @llvm.rint.f32
+; CHECK: call float @llvm.round.f32
+; CHECK: call float @llvm.trunc.f32
+; CHECK: call float @llvm.fabs.f32
+; CHECK: call float @llvm.copysign.f32
+; CHECK: call float @llvm.minnum.f32
+; CHECK: call float @llvm.maxnum.f32
+; CHECK: for.body:
+
+define void @test(float %arg1, float %arg2) {
+entry:
+ br label %for.head
+
+for.head:
+ %IND = phi i32 [ 0, %entry ], [ %IND.new, %for.body ]
+ %CMP = icmp slt i32 %IND, 10
+ br i1 %CMP, label %for.body, label %exit
+
+for.body:
+ %tmp.1 = call float @llvm.ceil.f32(float %arg1)
+ %tmp.2 = call float @llvm.floor.f32(float %tmp.1)
+ %tmp.3 = call float @llvm.nearbyint.f32(float %tmp.2)
+ %tmp.4 = call float @llvm.rint.f32(float %tmp.3)
+ %tmp.5 = call float @llvm.round.f32(float %tmp.4)
+ %tmp.6 = call float @llvm.trunc.f32(float %tmp.5)
+ %tmp.7 = call float @llvm.fabs.f32(float %tmp.6)
+ %tmp.8 = call float @llvm.copysign.f32(float %tmp.7, float %arg2)
+ %tmp.9 = call float @llvm.minnum.f32(float %tmp.8, float %arg2)
+ %tmp.10 = call float @llvm.maxnum.f32(float %tmp.9, float %arg2)
+ call void @consume(float %tmp.10)
+ %IND.new = add i32 %IND, 1
+ br label %for.head
+
+exit:
+ ret void
+}
+
+declare void @consume(float)
+
+declare float @llvm.ceil.f32(float)
+declare float @llvm.floor.f32(float)
+declare float @llvm.nearbyint.f32(float)
+declare float @llvm.rint.f32(float)
+declare float @llvm.round.f32(float)
+declare float @llvm.trunc.f32(float)
+declare float @llvm.fabs.f32(float)
+declare float @llvm.copysign.f32(float, float)
+declare float @llvm.minnum.f32(float, float)
+declare float @llvm.maxnum.f32(float, float)
diff --git a/test/Transforms/LICM/hoisting.ll b/test/Transforms/LICM/hoisting.ll
index 8609407cc599..cb6981ede1e7 100644
--- a/test/Transforms/LICM/hoisting.ll
+++ b/test/Transforms/LICM/hoisting.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -licm -S | FileCheck %s
+; RUN: opt -lcssa %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S | FileCheck %s
@X = global i32 0 ; <i32*> [#uses=1]
@@ -37,16 +38,21 @@ define i32 @test2(i1 %c) {
; CHECK-LABEL: @test2(
; CHECK-NEXT: load i32, i32* @X
; CHECK-NEXT: %B = sdiv i32 4, %A
- %A = load i32, i32* @X ; <i32> [#uses=2]
- br label %Loop
+ %A = load i32, i32* @X
+ br label %Loop
+
Loop:
- ;; Should have hoisted this div!
- %B = sdiv i32 4, %A ; <i32> [#uses=2]
- call void @foo2( i32 %B )
- br i1 %c, label %Loop, label %Out
-Out: ; preds = %Loop
- %C = sub i32 %A, %B ; <i32> [#uses=1]
- ret i32 %C
+ ;; Should have hoisted this div!
+ %B = sdiv i32 4, %A
+ br label %loop2
+
+loop2:
+ call void @foo2( i32 %B )
+ br i1 %c, label %Loop, label %Out
+
+Out:
+ %C = sub i32 %A, %B
+ ret i32 %C
}
diff --git a/test/Transforms/LICM/lcssa-ssa-promoter.ll b/test/Transforms/LICM/lcssa-ssa-promoter.ll
index b0cae8772f3e..d466b3baffc8 100644
--- a/test/Transforms/LICM/lcssa-ssa-promoter.ll
+++ b/test/Transforms/LICM/lcssa-ssa-promoter.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s| FileCheck %s
;
; Manually validate LCSSA form is preserved even after SSAUpdater is used to
; promote things in the loop bodies.
diff --git a/test/Transforms/LICM/no-preheader-test.ll b/test/Transforms/LICM/no-preheader-test.ll
index bd3eea38ef3e..4b6847cdad51 100644
--- a/test/Transforms/LICM/no-preheader-test.ll
+++ b/test/Transforms/LICM/no-preheader-test.ll
@@ -1,5 +1,6 @@
; Test that LICM works when there is not a loop-preheader
; RUN: opt < %s -licm | llvm-dis
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' < %s | llvm-dis
define void @testfunc(i32 %i.s, i1 %ifcond) {
br i1 %ifcond, label %Then, label %Else
diff --git a/test/Transforms/LICM/pr26843.ll b/test/Transforms/LICM/pr26843.ll
new file mode 100644
index 000000000000..a14acbef964f
--- /dev/null
+++ b/test/Transforms/LICM/pr26843.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+@v = common global i32 zeroinitializer, align 4
+
+; Make sure the store to v is not sunk past the memset
+; CHECK-LABEL: @main
+; CHECK: for.body:
+; CHECK-NEXT: store i32 1, i32* @v
+; CHECK-NEXT: tail call void @llvm.memset
+; CHECK: end:
+; CHECK-NEXT: ret i32 0
+
+define i32 @main(i1 %k) {
+entry:
+ br label %for.body
+
+for.body:
+ store i32 1, i32* @v, align 4
+ tail call void @llvm.memset.p0i8.i32(i8* bitcast (i32* @v to i8*), i8 0, i32 4, i32 4, i1 false)
+ br label %for.latch
+
+for.latch:
+ br i1 %k, label %for.body, label %end
+
+end:
+ ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
diff --git a/test/Transforms/LICM/pr27262.ll b/test/Transforms/LICM/pr27262.ll
new file mode 100644
index 000000000000..5fc6d9389e0d
--- /dev/null
+++ b/test/Transforms/LICM/pr27262.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -basicaa -licm < %s | FileCheck %s
+
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+; Make sure the store to v is not sunk past the memset
+; CHECK-LABEL: @main
+; CHECK: for.body:
+; CHECK-NEXT: store i8 1, i8* %p
+; CHECK-NEXT: store i8 2, i8* %p1
+; CHECK-NEXT: call void @llvm.memset
+; CHECK: end:
+; CHECK-NEXT: ret i32 0
+
+define i32 @main(i1 %k, i8* %p) {
+entry:
+ %p1 = getelementptr i8, i8* %p, i32 1
+ br label %for.body
+
+for.body:
+ store i8 1, i8* %p, align 1
+ store i8 2, i8* %p1, align 1
+ call void @llvm.memset.p0i8.i32(i8* %p, i8 255, i32 4, i32 1, i1 false)
+ br label %for.latch
+
+for.latch:
+ br i1 %k, label %for.body, label %end
+
+end:
+ ret i32 0
+}
+
+declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1)
diff --git a/test/Transforms/LICM/preheader-safe.ll b/test/Transforms/LICM/preheader-safe.ll
index 260a5f653b77..adc4f4237a29 100644
--- a/test/Transforms/LICM/preheader-safe.ll
+++ b/test/Transforms/LICM/preheader-safe.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
declare void @use_nothrow(i64 %a) nounwind
declare void @use(i64 %a)
@@ -14,6 +15,9 @@ entry:
loop: ; preds = %entry, %for.inc
%div = udiv i64 %x, %y
+ br label %loop2
+
+loop2:
call void @use_nothrow(i64 %div)
br label %loop
}
diff --git a/test/Transforms/LICM/promote-order.ll b/test/Transforms/LICM/promote-order.ll
index a189cf22f66b..7d87bb221b76 100644
--- a/test/Transforms/LICM/promote-order.ll
+++ b/test/Transforms/LICM/promote-order.ll
@@ -1,4 +1,5 @@
; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
; LICM should keep the stores in their original order when it sinks/promotes them.
; rdar://12045203
diff --git a/test/Transforms/LICM/promote-tls.ll b/test/Transforms/LICM/promote-tls.ll
new file mode 100644
index 000000000000..e3654902a124
--- /dev/null
+++ b/test/Transforms/LICM/promote-tls.ll
@@ -0,0 +1,134 @@
+; RUN: opt -tbaa -basicaa -licm -S < %s | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+
+; If we can prove a local is thread local, we can insert stores during
+; promotion which wouldn't be legal otherwise.
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-linux-generic"
+
+@p = external global i8*
+
+declare i8* @malloc(i64)
+
+; Exercise the TLS case
+define i32* @test(i32 %n) {
+entry:
+ ;; ignore the required null check for simplicity
+ %mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
+ %addr = bitcast i8* %mem to i32*
+ br label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.header
+
+for.header:
+ %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %old = load i32, i32* %addr, align 4
+ ; deliberate impossible to analyze branch
+ %guard = load atomic i8*, i8** @p monotonic, align 8
+ %exitcmp = icmp eq i8* %guard, null
+ br i1 %exitcmp, label %for.body, label %early-exit
+
+early-exit:
+; CHECK-LABEL: early-exit:
+; CHECK: store i32 %new1.lcssa, i32* %addr, align 1
+ ret i32* null
+
+for.body:
+ %new = add i32 %old, 1
+ store i32 %new, i32* %addr, align 4
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+; CHECK-LABEL: for.cond.for.end_crit_edge:
+; CHECK: store i32 %new.lcssa, i32* %addr, align 1
+ %split = phi i32* [ %addr, %for.body ]
+ ret i32* null
+}
+
+declare i8* @not_malloc(i64)
+
+; Negative test - not TLS
+define i32* @test_neg(i32 %n) {
+entry:
+ ;; ignore the required null check for simplicity
+ %mem = call dereferenceable(16) noalias i8* @not_malloc(i64 16)
+ %addr = bitcast i8* %mem to i32*
+ br label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.header
+
+for.header:
+ %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %old = load i32, i32* %addr, align 4
+ ; deliberate impossible to analyze branch
+ %guard = load volatile i8*, i8** @p
+ %exitcmp = icmp eq i8* %guard, null
+ br i1 %exitcmp, label %for.body, label %early-exit
+
+early-exit:
+; CHECK-LABEL: early-exit:
+; CHECK-NOT: store
+ ret i32* null
+
+for.body:
+; CHECK-LABEL: for.body:
+; CHECK: store i32 %new, i32* %addr, align 4
+ %new = add i32 %old, 1
+ store i32 %new, i32* %addr, align 4
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+; CHECK-LABEL: for.cond.for.end_crit_edge:
+; CHECK-NOT: store
+ %split = phi i32* [ %addr, %for.body ]
+ ret i32* null
+}
+
+; Negative test - can't speculate load since branch
+; may control alignment
+define i32* @test_neg2(i32 %n) {
+entry:
+ ;; ignore the required null check for simplicity
+ %mem = call dereferenceable(16) noalias i8* @malloc(i64 16)
+ %addr = bitcast i8* %mem to i32*
+ br label %for.body.lr.ph
+
+for.body.lr.ph: ; preds = %entry
+ br label %for.header
+
+for.header:
+ %i.02 = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ ; deliberate impossible to analyze branch
+ %guard = load volatile i8*, i8** @p
+ %exitcmp = icmp eq i8* %guard, null
+ br i1 %exitcmp, label %for.body, label %early-exit
+
+early-exit:
+; CHECK-LABEL: early-exit:
+; CHECK-NOT: store
+ ret i32* null
+
+for.body:
+; CHECK-LABEL: for.body:
+; CHECK: store i32 %new, i32* %addr, align 4
+ %old = load i32, i32* %addr, align 4
+ %new = add i32 %old, 1
+ store i32 %new, i32* %addr, align 4
+ %inc = add nsw i32 %i.02, 1
+ %cmp = icmp slt i32 %inc, %n
+ br i1 %cmp, label %for.header, label %for.cond.for.end_crit_edge
+
+for.cond.for.end_crit_edge: ; preds = %for.body
+; CHECK-LABEL: for.cond.for.end_crit_edge:
+; CHECK-NOT: store
+ %split = phi i32* [ %addr, %for.body ]
+ ret i32* null
+}
+
diff --git a/test/Transforms/LICM/scalar-promote-memmodel.ll b/test/Transforms/LICM/scalar-promote-memmodel.ll
index 3603c25ba23c..ceee7292ac5c 100644
--- a/test/Transforms/LICM/scalar-promote-memmodel.ll
+++ b/test/Transforms/LICM/scalar-promote-memmodel.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
; Make sure we don't hoist a conditionally-executed store out of the loop;
; it would violate the concurrency memory model
diff --git a/test/Transforms/LICM/scalar_promote-unwind.ll b/test/Transforms/LICM/scalar_promote-unwind.ll
new file mode 100644
index 000000000000..22e7e50c22e5
--- /dev/null
+++ b/test/Transforms/LICM/scalar_promote-unwind.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -basicaa -licm -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Make sure we don't hoist the store out of the loop; %a would
+; have the wrong value if f() unwinds
+
+define void @test1(i32* nocapture noalias %a, i1 zeroext %y) uwtable {
+entry:
+ br label %for.body
+
+for.body:
+ %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %0 = load i32, i32* %a, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, i32* %a, align 4
+ br i1 %y, label %if.then, label %for.inc
+
+; CHECK: define void @test1
+; CHECK: load i32, i32*
+; CHECK-NEXT: add
+; CHECK-NEXT: store i32
+
+if.then:
+ tail call void @f()
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+}
+
+; We can hoist the store out of the loop here; if f() unwinds,
+; the lifetime of %a ends.
+
+define void @test2(i1 zeroext %y) uwtable {
+entry:
+ %a = alloca i32
+ br label %for.body
+
+for.body:
+ %i.03 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %0 = load i32, i32* %a, align 4
+ %add = add nsw i32 %0, 1
+ store i32 %add, i32* %a, align 4
+ br i1 %y, label %if.then, label %for.inc
+
+if.then:
+ tail call void @f()
+ br label %for.inc
+
+for.inc:
+ %inc = add nuw nsw i32 %i.03, 1
+ %exitcond = icmp eq i32 %inc, 10000
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup:
+ ret void
+
+; CHECK: define void @test2
+; CHECK: store i32
+; CHECK-NEXT: ret void
+ ret void
+}
+
+declare void @f() uwtable
diff --git a/test/Transforms/LICM/scalar_promote.ll b/test/Transforms/LICM/scalar_promote.ll
index 6ef4bac39bbc..91cdbdbc2269 100644
--- a/test/Transforms/LICM/scalar_promote.ll
+++ b/test/Transforms/LICM/scalar_promote.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -basicaa -tbaa -licm -S | FileCheck %s
+; RUN: opt -aa-pipeline=type-based-aa,basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@X = global i32 7 ; <i32*> [#uses=4]
@@ -135,7 +136,7 @@ Loop: ; preds = %Loop, %0
%x2 = add i32 %x, 1 ; <i32> [#uses=1]
store i32 %x2, i32* @X
- store volatile i32* @X, i32** %P2
+ store atomic i32* @X, i32** %P2 monotonic, align 8
%Next = add i32 %j, 1 ; <i32> [#uses=2]
%cond = icmp eq i32 %Next, 0 ; <i1> [#uses=1]
diff --git a/test/Transforms/LICM/speculate.ll b/test/Transforms/LICM/speculate.ll
index 91b5a25ac0f5..fed1cbaa8555 100644
--- a/test/Transforms/LICM/speculate.ll
+++ b/test/Transforms/LICM/speculate.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -licm < %s | FileCheck %s
+; RUN: opt -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S %s | FileCheck %s
; UDiv is safe to speculate if the denominator is known non-zero.
diff --git a/test/Transforms/LICM/update-scev.ll b/test/Transforms/LICM/update-scev.ll
new file mode 100644
index 000000000000..221c124c8bf1
--- /dev/null
+++ b/test/Transforms/LICM/update-scev.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -licm < %s | FileCheck %s --check-prefix=IR-AFTER-TRANSFORM
+; RUN: opt -analyze -scalar-evolution -licm -scalar-evolution < %s | FileCheck %s --check-prefix=SCEV-EXPRS
+
+declare void @clobber()
+
+define void @f_0(i1* %loc) {
+; IR-AFTER-TRANSFORM-LABEL: @f_0(
+; IR-AFTER-TRANSFORM: loop.outer:
+; IR-AFTER-TRANSFORM-NEXT: call void @clobber()
+; IR-AFTER-TRANSFORM-NEXT: %cond = load i1, i1* %loc
+; IR-AFTER-TRANSFORM-NEXT: br label %loop.inner
+
+; SCEV-EXPRS: Classifying expressions for: @f_0
+; SCEV-EXPRS: Classifying expressions for: @f_0
+; SCEV-EXPRS: %cond = load i1, i1* %loc
+; SCEV-EXPRS-NEXT: --> {{.*}} LoopDispositions: { %loop.outer: Variant, %loop.inner: Invariant }
+
+entry:
+ br label %loop.outer
+
+loop.outer:
+ call void @clobber()
+ br label %loop.inner
+
+loop.inner:
+ %cond = load i1, i1* %loc
+ br i1 %cond, label %loop.inner, label %leave.inner
+
+leave.inner:
+ br label %loop.outer
+}
diff --git a/test/Transforms/LICM/volatile-alias.ll b/test/Transforms/LICM/volatile-alias.ll
index fda930df933b..7836df004c0f 100644
--- a/test/Transforms/LICM/volatile-alias.ll
+++ b/test/Transforms/LICM/volatile-alias.ll
@@ -1,4 +1,5 @@
; RUN: opt -basicaa -sroa -loop-rotate -licm -S < %s | FileCheck %s
+; RUN: opt -basicaa -sroa -loop-rotate %s | opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(licm)' -S | FileCheck %s
; The objects *p and *q are aliased to each other, but even though *q is
; volatile, *p can be considered invariant in the loop. Check if it is moved
; out of the loop.
@@ -9,7 +10,7 @@
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Function Attrs: nounwind uwtable
-define i32 @foo(i32* %p, i32* %q, i32 %n) #0 {
+define i32 @foo(i32* dereferenceable(4) nonnull %p, i32* %q, i32 %n) #0 {
entry:
%p.addr = alloca i32*, align 8
%q.addr = alloca i32*, align 8
diff --git a/test/Transforms/LoadCombine/load-combine-negativegep.ll b/test/Transforms/LoadCombine/load-combine-negativegep.ll
new file mode 100644
index 000000000000..7c5700b42954
--- /dev/null
+++ b/test/Transforms/LoadCombine/load-combine-negativegep.ll
@@ -0,0 +1,19 @@
+; RUN: opt -basicaa -load-combine -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @Load_NegGep(i32* %i){
+ %1 = getelementptr inbounds i32, i32* %i, i64 -1
+ %2 = load i32, i32* %1, align 4
+ %3 = load i32, i32* %i, align 4
+ %4 = add nsw i32 %3, %2
+ ret i32 %4
+; CHECK-LABEL: @Load_NegGep(
+; CHECK: %[[load:.*]] = load i64
+; CHECK: %[[combine_extract_lo:.*]] = trunc i64 %[[load]] to i32
+; CHECK: %[[combine_extract_shift:.*]] = lshr i64 %[[load]], 32
+; CHECK: %[[combine_extract_hi:.*]] = trunc i64 %[[combine_extract_shift]] to i32
+; CHECK: %[[add:.*]] = add nsw i32 %[[combine_extract_hi]], %[[combine_extract_lo]]
+}
+
+
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll
new file mode 100644
index 000000000000..e6904ee50bca
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/aa-metadata.ll
@@ -0,0 +1,32 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -scoped-noalias -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=SCOPE -check-prefix=ALL %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=NOSCOPE -check-prefix=ALL %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; This fails to vectorize if the !alias.scope is not used
+
+; ALL-LABEL: @vectorize_alias_scope(
+; SCOPE: load float, float addrspace(1)* %c
+; SCOPE: bitcast float addrspace(1)* %a to <2 x float> addrspace(1)*
+; SCOPE: store <2 x float> zeroinitializer
+; SCOPE: store float %ld.c, float addrspace(1)* %b,
+
+; NOSCOPE: store float
+; NOSCOPE: load float
+; NOSCOPE: store float
+; NOSCOPE: store float
+define void @vectorize_alias_scope(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 {
+entry:
+ %a.idx.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 1
+ store float 0.0, float addrspace(1)* %a, align 4, !noalias !0
+ %ld.c = load float, float addrspace(1)* %c, align 4, !alias.scope !0
+ store float 0.0, float addrspace(1)* %a.idx.1, align 4, !noalias !0
+ store float %ld.c, float addrspace(1)* %b, align 4, !noalias !0
+ ret void
+}
+
+attributes #0 = { nounwind }
+
+!0 = !{!1}
+!1 = distinct !{!1, !2, !"some scope"}
+!2 = distinct !{!2, !"some domain"}
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll
new file mode 100644
index 000000000000..25abb98c6ebd
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/extended-index.ll
@@ -0,0 +1,150 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+; CHECK-LABEL: @basic_merge_sext_index(
+; CHECK: sext i32 %id.x to i64
+; CHECK: load <2 x float>
+; CHECK: store <2 x float> zeroinitializer
+define void @basic_merge_sext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 {
+entry:
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %sext.id.x = sext i32 %id.x to i64
+ %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %sext.id.x
+ %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %sext.id.x
+ %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
+ %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
+
+ %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
+ %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
+
+ store float 0.0, float addrspace(1)* %a.idx.x, align 4
+ store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
+
+ %add = fadd float %ld.c, %ld.c.idx.1
+ store float %add, float addrspace(1)* %b, align 4
+ ret void
+}
+
+; CHECK-LABEL: @basic_merge_zext_index(
+; CHECK: zext i32 %id.x to i64
+; CHECK: load <2 x float>
+; CHECK: store <2 x float>
+define void @basic_merge_zext_index(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c) #0 {
+entry:
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %zext.id.x = zext i32 %id.x to i64
+ %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
+ %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
+ %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
+ %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
+
+ %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
+ %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
+ store float 0.0, float addrspace(1)* %a.idx.x, align 4
+ store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
+
+ %add = fadd float %ld.c, %ld.c.idx.1
+ store float %add, float addrspace(1)* %b, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_op_zext_index(
+; CHECK: load <2 x float>
+; CHECK: store <2 x float>
+define void @merge_op_zext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 {
+entry:
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %shl = shl i32 %id.x, 2
+ %zext.id.x = zext i32 %shl to i64
+ %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
+ %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
+
+ %id.x.1 = or i32 %shl, 1
+ %id.x.1.ext = zext i32 %id.x.1 to i64
+
+ %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext
+ %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext
+
+ %ld.c.0 = load float, float addrspace(1)* %c.0, align 4
+ store float 0.0, float addrspace(1)* %a.0, align 4
+ %ld.c.1 = load float, float addrspace(1)* %c.1, align 4
+ store float 0.0, float addrspace(1)* %a.1, align 4
+
+ %add = fadd float %ld.c.0, %ld.c.1
+ store float %add, float addrspace(1)* %b, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_op_sext_index(
+; CHECK: load <2 x float>
+; CHECK: store <2 x float>
+define void @merge_op_sext_index(float addrspace(1)* nocapture noalias %a, float addrspace(1)* nocapture noalias %b, float addrspace(1)* nocapture readonly noalias %c) #0 {
+entry:
+ %id.x = call i32 @llvm.amdgcn.workitem.id.x()
+ %shl = shl i32 %id.x, 2
+ %zext.id.x = sext i32 %shl to i64
+ %a.0 = getelementptr inbounds float, float addrspace(1)* %a, i64 %zext.id.x
+ %c.0 = getelementptr inbounds float, float addrspace(1)* %c, i64 %zext.id.x
+
+ %id.x.1 = or i32 %shl, 1
+ %id.x.1.ext = sext i32 %id.x.1 to i64
+
+ %a.1 = getelementptr inbounds float, float addrspace(1)* %a, i64 %id.x.1.ext
+ %c.1 = getelementptr inbounds float, float addrspace(1)* %c, i64 %id.x.1.ext
+
+ %ld.c.0 = load float, float addrspace(1)* %c.0, align 4
+ store float 0.0, float addrspace(1)* %a.0, align 4
+ %ld.c.1 = load float, float addrspace(1)* %c.1, align 4
+ store float 0.0, float addrspace(1)* %a.1, align 4
+
+ %add = fadd float %ld.c.0, %ld.c.1
+ store float %add, float addrspace(1)* %b, align 4
+ ret void
+}
+
+; This case fails to vectorize if not using the extra extension
+; handling in isConsecutiveAccess.
+
+; CHECK-LABEL: @zext_trunc_phi_1(
+; CHECK: loop:
+; CHECK: load <2 x i32>
+; CHECK: store <2 x i32>
+define void @zext_trunc_phi_1(i32 addrspace(1)* nocapture noalias %a, i32 addrspace(1)* nocapture noalias %b, i32 addrspace(1)* nocapture readonly noalias %c, i32 %n, i64 %arst, i64 %aoeu) #0 {
+entry:
+ %cmp0 = icmp eq i32 %n, 0
+ br i1 %cmp0, label %exit, label %loop
+
+loop:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 0, %entry ]
+ %trunc.iv = trunc i64 %indvars.iv to i32
+ %idx = shl i32 %trunc.iv, 4
+
+ %idx.ext = zext i32 %idx to i64
+ %c.0 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.ext
+ %a.0 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.ext
+
+ %idx.1 = or i32 %idx, 1
+ %idx.1.ext = zext i32 %idx.1 to i64
+ %c.1 = getelementptr inbounds i32, i32 addrspace(1)* %c, i64 %idx.1.ext
+ %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i64 %idx.1.ext
+
+ %ld.c.0 = load i32, i32 addrspace(1)* %c.0, align 4
+ store i32 %ld.c.0, i32 addrspace(1)* %a.0, align 4
+ %ld.c.1 = load i32, i32 addrspace(1)* %c.1, align 4
+ store i32 %ld.c.1, i32 addrspace(1)* %a.1, align 4
+
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll
new file mode 100644
index 000000000000..64a0480d8d3c
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/insertion-point.ll
@@ -0,0 +1,62 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; Check relative position of the inserted vector load relative to the existing
+; adds. Vectorized loads should be inserted at the position of the first load.
+
+; CHECK-LABEL: @insert_load_point(
+; CHECK: %z = add i32 %x, 4
+; CHECK: load <2 x float>
+; CHECK: %w = add i32 %y, 9
+; CHECK: %foo = add i32 %z, %w
+define void @insert_load_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 {
+entry:
+ %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
+ %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx
+ %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
+ %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
+
+ %z = add i32 %x, 4
+ %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
+ %w = add i32 %y, 9
+ %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
+ %foo = add i32 %z, %w
+
+ store float 0.0, float addrspace(1)* %a.idx.x, align 4
+ store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
+
+ %add = fadd float %ld.c, %ld.c.idx.1
+ store float %add, float addrspace(1)* %b, align 4
+ store i32 %foo, i32 addrspace(3)* null, align 4
+ ret void
+}
+
+; CHECK-LABEL: @insert_store_point(
+; CHECK: %z = add i32 %x, 4
+; CHECK: %w = add i32 %y, 9
+; CHECK: store <2 x float>
+; CHECK: %foo = add i32 %z, %w
+define void @insert_store_point(float addrspace(1)* nocapture %a, float addrspace(1)* nocapture %b, float addrspace(1)* nocapture readonly %c, i64 %idx, i32 %x, i32 %y) #0 {
+entry:
+ %a.idx.x = getelementptr inbounds float, float addrspace(1)* %a, i64 %idx
+ %c.idx.x = getelementptr inbounds float, float addrspace(1)* %c, i64 %idx
+ %a.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %a.idx.x, i64 1
+ %c.idx.x.1 = getelementptr inbounds float, float addrspace(1)* %c.idx.x, i64 1
+
+ %ld.c = load float, float addrspace(1)* %c.idx.x, align 4
+ %ld.c.idx.1 = load float, float addrspace(1)* %c.idx.x.1, align 4
+
+ %z = add i32 %x, 4
+ store float 0.0, float addrspace(1)* %a.idx.x, align 4
+ %w = add i32 %y, 9
+ store float 0.0, float addrspace(1)* %a.idx.x.1, align 4
+ %foo = add i32 %z, %w
+
+ %add = fadd float %ld.c, %ld.c.idx.1
+ store float %add, float addrspace(1)* %b, align 4
+ store i32 %foo, i32 addrspace(3)* null, align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll
new file mode 100644
index 000000000000..4d6240a9aa9d
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/interleaved-mayalias-store.ll
@@ -0,0 +1,28 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; This is NOT OK to vectorize, as either load may alias either store.
+
+; CHECK: load double
+; CHECK: store double 0.000000e+00, double addrspace(1)* %a,
+; CHECK: load double
+; CHECK: store double 0.000000e+00, double addrspace(1)* %a.idx.1
+define void @interleave(double addrspace(1)* nocapture %a, double addrspace(1)* nocapture %b, double addrspace(1)* nocapture readonly %c) #0 {
+entry:
+ %a.idx.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
+ %c.idx.1 = getelementptr inbounds double, double addrspace(1)* %c, i64 1
+
+ %ld.c = load double, double addrspace(1)* %c, align 8 ; may alias store to %a
+ store double 0.0, double addrspace(1)* %a, align 8
+
+ %ld.c.idx.1 = load double, double addrspace(1)* %c.idx.1, align 8 ; may alias store to %a
+ store double 0.0, double addrspace(1)* %a.idx.1, align 8
+
+ %add = fadd double %ld.c, %ld.c.idx.1
+ store double %add, double addrspace(1)* %b
+
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/lit.local.cfg b/test/Transforms/LoadStoreVectorizer/AMDGPU/lit.local.cfg
new file mode 100644
index 000000000000..6baccf05fff0
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/lit.local.cfg
@@ -0,0 +1,3 @@
+if not 'AMDGPU' in config.root.targets:
+ config.unsupported = True
+
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
new file mode 100644
index 000000000000..4a4237294294
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores-private.ll
@@ -0,0 +1,53 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-4 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT4 -check-prefix=ALL %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-8 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT8 -check-prefix=ALL %s
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -mattr=+max-private-element-size-16 -load-store-vectorizer -S -o - %s | FileCheck -check-prefix=ELT16 -check-prefix=ALL %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i32
+; ELT4: store i32
+; ELT4: store i32
+; ELT4: store i32
+; ELT4: store i32
+
+; ELT8: store <2 x i32>
+; ELT8: store <2 x i32>
+
+; ELT16: store <4 x i32>
+define void @merge_private_store_4_vector_elts_loads_v4i32(i32* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32* %out, i32 3
+
+ store i32 9, i32* %out
+ store i32 1, i32* %out.gep.1
+ store i32 23, i32* %out.gep.2
+ store i32 19, i32* %out.gep.3
+ ret void
+}
+
+; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v4i8(
+; ALL: store <4 x i8>
+define void @merge_private_store_4_vector_elts_loads_v4i8(i8* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8* %out, i32 1
+ %out.gep.2 = getelementptr i8, i8* %out, i32 2
+ %out.gep.3 = getelementptr i8, i8* %out, i32 3
+
+ store i8 9, i8* %out, align 4
+ store i8 1, i8* %out.gep.1
+ store i8 23, i8* %out.gep.2
+ store i8 19, i8* %out.gep.3
+ ret void
+}
+
+; ALL-LABEL: @merge_private_store_4_vector_elts_loads_v2i16(
+; ALL: store <2 x i16>
+define void @merge_private_store_4_vector_elts_loads_v2i16(i16* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16* %out, i32 1
+
+ store i16 9, i16* %out, align 4
+ store i16 12, i16* %out.gep.1
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
new file mode 100644
index 000000000000..03265efe2843
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-stores.ll
@@ -0,0 +1,638 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
+; Copy of test/CodeGen/AMDGPU/merge-stores.ll with some additions
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; TODO: Vector element tests
+; TODO: Non-zero base offset for load and store combinations
+; TODO: Same base addrspacecasted
+
+
+; CHECK-LABEL: @merge_global_store_2_constants_i8(
+; CHECK: store <2 x i8> <i8 -56, i8 123>, <2 x i8> addrspace(1)* %{{[0-9]+}}, align 2
+define void @merge_global_store_2_constants_i8(i8 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
+
+ store i8 123, i8 addrspace(1)* %out.gep.1
+ store i8 456, i8 addrspace(1)* %out, align 2
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_i8_natural_align
+; CHECK: store <2 x i8>
+define void @merge_global_store_2_constants_i8_natural_align(i8 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
+
+ store i8 123, i8 addrspace(1)* %out.gep.1
+ store i8 456, i8 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_i16
+; CHECK: store <2 x i16> <i16 456, i16 123>, <2 x i16> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_2_constants_i16(i16 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
+
+ store i16 123, i16 addrspace(1)* %out.gep.1
+ store i16 456, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_0_i16
+; CHECK: store <2 x i16> zeroinitializer, <2 x i16> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_2_constants_0_i16(i16 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
+
+ store i16 0, i16 addrspace(1)* %out.gep.1
+ store i16 0, i16 addrspace(1)* %out, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_i16_natural_align
+; CHECK: store <2 x i16>
+define void @merge_global_store_2_constants_i16_natural_align(i16 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i16, i16 addrspace(1)* %out, i32 1
+
+ store i16 123, i16 addrspace(1)* %out.gep.1
+ store i16 456, i16 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_half_natural_align
+; CHECK: store <2 x half>
+define void @merge_global_store_2_constants_half_natural_align(half addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr half, half addrspace(1)* %out, i32 1
+
+ store half 2.0, half addrspace(1)* %out.gep.1
+ store half 1.0, half addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_i32
+; CHECK: store <2 x i32> <i32 456, i32 123>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_2_constants_i32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_i32_f32
+; CHECK: store <2 x i32> <i32 456, i32 1065353216>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_2_constants_i32_f32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.1.bc = bitcast i32 addrspace(1)* %out.gep.1 to float addrspace(1)*
+ store float 1.0, float addrspace(1)* %out.gep.1.bc
+ store i32 456, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_f32_i32
+; CHECK store <2 x float> <float 4.000000e+00, float 0x370EC00000000000>, <2 x float> addrspace(1)* %{{[0-9]+$}}
+define void @merge_global_store_2_constants_f32_i32(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
+ store i32 123, i32 addrspace(1)* %out.gep.1.bc
+ store float 4.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_constants_i32
+; CHECK: store <4 x i32> <i32 1234, i32 123, i32 456, i32 333>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_4_constants_i32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out.gep.2
+ store i32 333, i32 addrspace(1)* %out.gep.3
+ store i32 1234, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_constants_f32_order
+; CHECK: store <4 x float> <float 8.000000e+00, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>, <4 x float> addrspace(1)* %{{[0-9]+}}
+define void @merge_global_store_4_constants_f32_order(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+ store float 8.0, float addrspace(1)* %out
+ store float 1.0, float addrspace(1)* %out.gep.1
+ store float 2.0, float addrspace(1)* %out.gep.2
+ store float 4.0, float addrspace(1)* %out.gep.3
+ ret void
+}
+
+; First store is out of order.
+; CHECK-LABEL: @merge_global_store_4_constants_f32
+; CHECK: store <4 x float> <float 8.000000e+00, float 1.000000e+00, float 2.000000e+00, float 4.000000e+00>, <4 x float> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_4_constants_f32(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+ store float 1.0, float addrspace(1)* %out.gep.1
+ store float 2.0, float addrspace(1)* %out.gep.2
+ store float 4.0, float addrspace(1)* %out.gep.3
+ store float 8.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_constants_mixed_i32_f32
+; CHECK: store <4 x i32> <i32 1090519040, i32 11, i32 1073741824, i32 17>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_4_constants_mixed_i32_f32(float addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+
+ %out.gep.1.bc = bitcast float addrspace(1)* %out.gep.1 to i32 addrspace(1)*
+ %out.gep.3.bc = bitcast float addrspace(1)* %out.gep.3 to i32 addrspace(1)*
+
+ store i32 11, i32 addrspace(1)* %out.gep.1.bc
+ store float 2.0, float addrspace(1)* %out.gep.2
+ store i32 17, i32 addrspace(1)* %out.gep.3.bc
+ store float 8.0, float addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_3_constants_i32
+; CHECK: store <3 x i32> <i32 1234, i32 123, i32 456>, <3 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_3_constants_i32(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out.gep.2
+ store i32 1234, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_constants_i64
+; CHECK: store <2 x i64> <i64 456, i64 123>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8
+define void @merge_global_store_2_constants_i64(i64 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
+
+ store i64 123, i64 addrspace(1)* %out.gep.1
+ store i64 456, i64 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_constants_i64
+; CHECK: store <2 x i64> <i64 456, i64 333>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8
+; CHECK: store <2 x i64> <i64 1234, i64 123>, <2 x i64> addrspace(1)* %{{[0-9]+}}, align 8
+define void @merge_global_store_4_constants_i64(i64 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i64, i64 addrspace(1)* %out, i64 1
+ %out.gep.2 = getelementptr i64, i64 addrspace(1)* %out, i64 2
+ %out.gep.3 = getelementptr i64, i64 addrspace(1)* %out, i64 3
+
+ store i64 123, i64 addrspace(1)* %out.gep.1
+ store i64 456, i64 addrspace(1)* %out.gep.2
+ store i64 333, i64 addrspace(1)* %out.gep.3
+ store i64 1234, i64 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_adjacent_loads_i32
+; CHECK: [[LOAD:%[0-9]+]] = load <2 x i32>
+; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i32> [[LOAD]], i32 0
+; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; CHECK: [[INSERT0:%[0-9]+]] = insertelement <2 x i32> undef, i32 [[ELT0]], i32 0
+; CHECK: [[INSERT1:%[0-9]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT1]], i32 1
+; CHECK: store <2 x i32> [[INSERT1]]
+define void @merge_global_store_2_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+
+ %lo = load i32, i32 addrspace(1)* %in
+ %hi = load i32, i32 addrspace(1)* %in.gep.1
+
+ store i32 %lo, i32 addrspace(1)* %out
+ store i32 %hi, i32 addrspace(1)* %out.gep.1
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_adjacent_loads_i32_nonzero_base
+; CHECK: extractelement
+; CHECK: extractelement
+; CHECK: insertelement
+; CHECK: insertelement
+; CHECK: store <2 x i32>
+define void @merge_global_store_2_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %lo = load i32, i32 addrspace(1)* %in.gep.0
+ %hi = load i32, i32 addrspace(1)* %in.gep.1
+
+ store i32 %lo, i32 addrspace(1)* %out.gep.0
+ store i32 %hi, i32 addrspace(1)* %out.gep.1
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_2_adjacent_loads_shuffle_i32
+; CHECK: [[LOAD:%[0-9]+]] = load <2 x i32>
+; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i32> [[LOAD]], i32 0
+; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i32> [[LOAD]], i32 1
+; CHECK: [[INSERT0:%[0-9]+]] = insertelement <2 x i32> undef, i32 [[ELT1]], i32 0
+; CHECK: [[INSERT1:%[0-9]+]] = insertelement <2 x i32> [[INSERT0]], i32 [[ELT0]], i32 1
+; CHECK: store <2 x i32> [[INSERT1]]
+define void @merge_global_store_2_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+
+ %lo = load i32, i32 addrspace(1)* %in
+ %hi = load i32, i32 addrspace(1)* %in.gep.1
+
+ store i32 %hi, i32 addrspace(1)* %out
+ store i32 %lo, i32 addrspace(1)* %out.gep.1
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i32
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+define void @merge_global_store_4_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ store i32 %x, i32 addrspace(1)* %out
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_3_adjacent_loads_i32
+; CHECK: load <3 x i32>
+; CHECK: store <3 x i32>
+define void @merge_global_store_3_adjacent_loads_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+
+ store i32 %x, i32 addrspace(1)* %out
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_adjacent_loads_f32
+; CHECK: load <4 x float>
+; CHECK: store <4 x float>
+define void @merge_global_store_4_adjacent_loads_f32(float addrspace(1)* %out, float addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr float, float addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr float, float addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr float, float addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr float, float addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr float, float addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr float, float addrspace(1)* %in, i32 3
+
+ %x = load float, float addrspace(1)* %in
+ %y = load float, float addrspace(1)* %in.gep.1
+ %z = load float, float addrspace(1)* %in.gep.2
+ %w = load float, float addrspace(1)* %in.gep.3
+
+ store float %x, float addrspace(1)* %out
+ store float %y, float addrspace(1)* %out.gep.1
+ store float %z, float addrspace(1)* %out.gep.2
+ store float %w, float addrspace(1)* %out.gep.3
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i32_nonzero_base
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+define void @merge_global_store_4_adjacent_loads_i32_nonzero_base(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %in.gep.0 = getelementptr i32, i32 addrspace(1)* %in, i32 11
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 12
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 13
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 14
+ %out.gep.0 = getelementptr i32, i32 addrspace(1)* %out, i32 7
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 8
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 9
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 10
+
+ %x = load i32, i32 addrspace(1)* %in.gep.0
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ store i32 %x, i32 addrspace(1)* %out.gep.0
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_adjacent_loads_inverse_i32
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+define void @merge_global_store_4_adjacent_loads_inverse_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ ; Make sure the barrier doesn't stop this
+ tail call void @llvm.amdgcn.s.barrier() #1
+
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %x, i32 addrspace(1)* %out
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_adjacent_loads_shuffle_i32
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+define void @merge_global_store_4_adjacent_loads_shuffle_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %in.gep.1 = getelementptr i32, i32 addrspace(1)* %in, i32 1
+ %in.gep.2 = getelementptr i32, i32 addrspace(1)* %in, i32 2
+ %in.gep.3 = getelementptr i32, i32 addrspace(1)* %in, i32 3
+
+ %x = load i32, i32 addrspace(1)* %in
+ %y = load i32, i32 addrspace(1)* %in.gep.1
+ %z = load i32, i32 addrspace(1)* %in.gep.2
+ %w = load i32, i32 addrspace(1)* %in.gep.3
+
+ ; Make sure the barrier doesn't stop this
+ tail call void @llvm.amdgcn.s.barrier() #1
+
+ store i32 %w, i32 addrspace(1)* %out
+ store i32 %z, i32 addrspace(1)* %out.gep.1
+ store i32 %y, i32 addrspace(1)* %out.gep.2
+ store i32 %x, i32 addrspace(1)* %out.gep.3
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i8
+; CHECK: load <4 x i8>
+; CHECK: extractelement <4 x i8>
+; CHECK: extractelement <4 x i8>
+; CHECK: extractelement <4 x i8>
+; CHECK: extractelement <4 x i8>
+; CHECK: insertelement <4 x i8>
+; CHECK: insertelement <4 x i8>
+; CHECK: insertelement <4 x i8>
+; CHECK: insertelement <4 x i8>
+; CHECK: store <4 x i8>
+define void @merge_global_store_4_adjacent_loads_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
+ %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
+ %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
+
+ %x = load i8, i8 addrspace(1)* %in, align 4
+ %y = load i8, i8 addrspace(1)* %in.gep.1
+ %z = load i8, i8 addrspace(1)* %in.gep.2
+ %w = load i8, i8 addrspace(1)* %in.gep.3
+
+ store i8 %x, i8 addrspace(1)* %out, align 4
+ store i8 %y, i8 addrspace(1)* %out.gep.1
+ store i8 %z, i8 addrspace(1)* %out.gep.2
+ store i8 %w, i8 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_adjacent_loads_i8_natural_align
+; CHECK: load <4 x i8>
+; CHECK: store <4 x i8>
+define void @merge_global_store_4_adjacent_loads_i8_natural_align(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i8 1
+ %out.gep.2 = getelementptr i8, i8 addrspace(1)* %out, i8 2
+ %out.gep.3 = getelementptr i8, i8 addrspace(1)* %out, i8 3
+ %in.gep.1 = getelementptr i8, i8 addrspace(1)* %in, i8 1
+ %in.gep.2 = getelementptr i8, i8 addrspace(1)* %in, i8 2
+ %in.gep.3 = getelementptr i8, i8 addrspace(1)* %in, i8 3
+
+ %x = load i8, i8 addrspace(1)* %in
+ %y = load i8, i8 addrspace(1)* %in.gep.1
+ %z = load i8, i8 addrspace(1)* %in.gep.2
+ %w = load i8, i8 addrspace(1)* %in.gep.3
+
+ store i8 %x, i8 addrspace(1)* %out
+ store i8 %y, i8 addrspace(1)* %out.gep.1
+ store i8 %z, i8 addrspace(1)* %out.gep.2
+ store i8 %w, i8 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_4_vector_elts_loads_v4i32
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+define void @merge_global_store_4_vector_elts_loads_v4i32(i32 addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+ %vec = load <4 x i32>, <4 x i32> addrspace(1)* %in
+
+ %x = extractelement <4 x i32> %vec, i32 0
+ %y = extractelement <4 x i32> %vec, i32 1
+ %z = extractelement <4 x i32> %vec, i32 2
+ %w = extractelement <4 x i32> %vec, i32 3
+
+ store i32 %x, i32 addrspace(1)* %out
+ store i32 %y, i32 addrspace(1)* %out.gep.1
+ store i32 %z, i32 addrspace(1)* %out.gep.2
+ store i32 %w, i32 addrspace(1)* %out.gep.3
+ ret void
+}
+
+; CHECK-LABEL: @merge_local_store_2_constants_i8
+; CHECK: store <2 x i8> <i8 -56, i8 123>, <2 x i8> addrspace(3)* %{{[0-9]+}}, align 2
+define void @merge_local_store_2_constants_i8(i8 addrspace(3)* %out) #0 {
+ %out.gep.1 = getelementptr i8, i8 addrspace(3)* %out, i32 1
+
+ store i8 123, i8 addrspace(3)* %out.gep.1
+ store i8 456, i8 addrspace(3)* %out, align 2
+ ret void
+}
+
+; CHECK-LABEL: @merge_local_store_2_constants_i32
+; CHECK: store <2 x i32> <i32 456, i32 123>, <2 x i32> addrspace(3)* %{{[0-9]+}}, align 4
+define void @merge_local_store_2_constants_i32(i32 addrspace(3)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
+
+ store i32 123, i32 addrspace(3)* %out.gep.1
+ store i32 456, i32 addrspace(3)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_local_store_2_constants_i32_align_2
+; CHECK: store i32
+; CHECK: store i32
+define void @merge_local_store_2_constants_i32_align_2(i32 addrspace(3)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
+
+ store i32 123, i32 addrspace(3)* %out.gep.1, align 2
+ store i32 456, i32 addrspace(3)* %out, align 2
+ ret void
+}
+
+; CHECK-LABEL: @merge_local_store_4_constants_i32
+; CHECK: store <2 x i32> <i32 456, i32 333>, <2 x i32> addrspace(3)*
+; CHECK: store <2 x i32> <i32 1234, i32 123>, <2 x i32> addrspace(3)*
+define void @merge_local_store_4_constants_i32(i32 addrspace(3)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(3)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(3)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(3)* %out, i32 3
+
+ store i32 123, i32 addrspace(3)* %out.gep.1
+ store i32 456, i32 addrspace(3)* %out.gep.2
+ store i32 333, i32 addrspace(3)* %out.gep.3
+ store i32 1234, i32 addrspace(3)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_5_constants_i32
+; CHECK: store <4 x i32> <i32 9, i32 12, i32 16, i32 -12>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+; CHECK: store i32
+define void @merge_global_store_5_constants_i32(i32 addrspace(1)* %out) {
+ store i32 9, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 12, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 16, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 -12, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 11, i32 addrspace(1)* %idx4, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_6_constants_i32
+; CHECK: store <4 x i32> <i32 13, i32 15, i32 62, i32 63>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+; CHECK: store <2 x i32> <i32 11, i32 123>, <2 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_6_constants_i32(i32 addrspace(1)* %out) {
+ store i32 13, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 15, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 62, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 63, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 11, i32 addrspace(1)* %idx4, align 4
+ %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
+ store i32 123, i32 addrspace(1)* %idx5, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_7_constants_i32
+; CHECK: store <4 x i32> <i32 34, i32 999, i32 65, i32 33>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+; CHECK: store <3 x i32> <i32 98, i32 91, i32 212>, <3 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_7_constants_i32(i32 addrspace(1)* %out) {
+ store i32 34, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 999, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 65, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 33, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 98, i32 addrspace(1)* %idx4, align 4
+ %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
+ store i32 91, i32 addrspace(1)* %idx5, align 4
+ %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
+ store i32 212, i32 addrspace(1)* %idx6, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_global_store_8_constants_i32
+; CHECK: store <4 x i32> <i32 34, i32 999, i32 65, i32 33>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+; CHECK: store <4 x i32> <i32 98, i32 91, i32 212, i32 999>, <4 x i32> addrspace(1)* %{{[0-9]+}}, align 4
+define void @merge_global_store_8_constants_i32(i32 addrspace(1)* %out) {
+ store i32 34, i32 addrspace(1)* %out, align 4
+ %idx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
+ store i32 999, i32 addrspace(1)* %idx1, align 4
+ %idx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
+ store i32 65, i32 addrspace(1)* %idx2, align 4
+ %idx3 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
+ store i32 33, i32 addrspace(1)* %idx3, align 4
+ %idx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 4
+ store i32 98, i32 addrspace(1)* %idx4, align 4
+ %idx5 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 5
+ store i32 91, i32 addrspace(1)* %idx5, align 4
+ %idx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 6
+ store i32 212, i32 addrspace(1)* %idx6, align 4
+ %idx7 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 7
+ store i32 999, i32 addrspace(1)* %idx7, align 4
+ ret void
+}
+
+; CHECK-LABEL: @copy_v3i32_align4
+; CHECK: %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
+; CHECK: store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
+define void @copy_v3i32_align4(<3 x i32> addrspace(1)* noalias %out, <3 x i32> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x i32>, <3 x i32> addrspace(1)* %in, align 4
+ store <3 x i32> %vec, <3 x i32> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @copy_v3i64_align4
+; CHECK: %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
+; CHECK: store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
+define void @copy_v3i64_align4(<3 x i64> addrspace(1)* noalias %out, <3 x i64> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x i64>, <3 x i64> addrspace(1)* %in, align 4
+ store <3 x i64> %vec, <3 x i64> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @copy_v3f32_align4
+; CHECK: %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
+; CHECK: store <3 x float>
+define void @copy_v3f32_align4(<3 x float> addrspace(1)* noalias %out, <3 x float> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x float>, <3 x float> addrspace(1)* %in, align 4
+ %fadd = fadd <3 x float> %vec, <float 1.0, float 2.0, float 4.0>
+ store <3 x float> %fadd, <3 x float> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @copy_v3f64_align4
+; CHECK: %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
+; CHECK: store <3 x double> %fadd, <3 x double> addrspace(1)* %out
+define void @copy_v3f64_align4(<3 x double> addrspace(1)* noalias %out, <3 x double> addrspace(1)* noalias %in) #0 {
+ %vec = load <3 x double>, <3 x double> addrspace(1)* %in, align 4
+ %fadd = fadd <3 x double> %vec, <double 1.0, double 2.0, double 4.0>
+ store <3 x double> %fadd, <3 x double> addrspace(1)* %out
+ ret void
+}
+
+declare void @llvm.amdgcn.s.barrier() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { convergent nounwind }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll
new file mode 100644
index 000000000000..8885d61014fc
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/merge-vectors.ll
@@ -0,0 +1,91 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+; CHECK-LABEL: @merge_v2i32_v2i32(
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32> zeroinitializer
+define void @merge_v2i32_v2i32(<2 x i32> addrspace(1)* nocapture %a, <2 x i32> addrspace(1)* nocapture readonly %b) #0 {
+entry:
+ %a.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %a, i64 1
+ %b.1 = getelementptr inbounds <2 x i32>, <2 x i32> addrspace(1)* %b, i64 1
+
+ %ld.c = load <2 x i32>, <2 x i32> addrspace(1)* %b, align 4
+ %ld.c.idx.1 = load <2 x i32>, <2 x i32> addrspace(1)* %b.1, align 4
+
+ store <2 x i32> zeroinitializer, <2 x i32> addrspace(1)* %a, align 4
+ store <2 x i32> zeroinitializer, <2 x i32> addrspace(1)* %a.1, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_v1i32_v1i32(
+; CHECK: load <2 x i32>
+; CHECK: store <2 x i32> zeroinitializer
+define void @merge_v1i32_v1i32(<1 x i32> addrspace(1)* nocapture %a, <1 x i32> addrspace(1)* nocapture readonly %b) #0 {
+entry:
+ %a.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %a, i64 1
+ %b.1 = getelementptr inbounds <1 x i32>, <1 x i32> addrspace(1)* %b, i64 1
+
+ %ld.c = load <1 x i32>, <1 x i32> addrspace(1)* %b, align 4
+ %ld.c.idx.1 = load <1 x i32>, <1 x i32> addrspace(1)* %b.1, align 4
+
+ store <1 x i32> zeroinitializer, <1 x i32> addrspace(1)* %a, align 4
+ store <1 x i32> zeroinitializer, <1 x i32> addrspace(1)* %a.1, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @no_merge_v3i32_v3i32(
+; CHECK: load <3 x i32>
+; CHECK: load <3 x i32>
+; CHECK: store <3 x i32> zeroinitializer
+; CHECK: store <3 x i32> zeroinitializer
+define void @no_merge_v3i32_v3i32(<3 x i32> addrspace(1)* nocapture %a, <3 x i32> addrspace(1)* nocapture readonly %b) #0 {
+entry:
+ %a.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %a, i64 1
+ %b.1 = getelementptr inbounds <3 x i32>, <3 x i32> addrspace(1)* %b, i64 1
+
+ %ld.c = load <3 x i32>, <3 x i32> addrspace(1)* %b, align 4
+ %ld.c.idx.1 = load <3 x i32>, <3 x i32> addrspace(1)* %b.1, align 4
+
+ store <3 x i32> zeroinitializer, <3 x i32> addrspace(1)* %a, align 4
+ store <3 x i32> zeroinitializer, <3 x i32> addrspace(1)* %a.1, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_v2i16_v2i16(
+; CHECK: load <4 x i16>
+; CHECK: store <4 x i16> zeroinitializer
+define void @merge_v2i16_v2i16(<2 x i16> addrspace(1)* nocapture %a, <2 x i16> addrspace(1)* nocapture readonly %b) #0 {
+entry:
+ %a.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %a, i64 1
+ %b.1 = getelementptr inbounds <2 x i16>, <2 x i16> addrspace(1)* %b, i64 1
+
+ %ld.c = load <2 x i16>, <2 x i16> addrspace(1)* %b, align 4
+ %ld.c.idx.1 = load <2 x i16>, <2 x i16> addrspace(1)* %b.1, align 4
+
+ store <2 x i16> zeroinitializer, <2 x i16> addrspace(1)* %a, align 4
+ store <2 x i16> zeroinitializer, <2 x i16> addrspace(1)* %a.1, align 4
+
+ ret void
+}
+
+; Ideally this would be merged
+; CHECK-LABEL: @merge_load_i32_v2i16(
+; CHECK: load i32,
+; CHECK: load <2 x i16>
+define void @merge_load_i32_v2i16(i32 addrspace(1)* nocapture %a) #0 {
+entry:
+ %a.1 = getelementptr inbounds i32, i32 addrspace(1)* %a, i32 1
+ %a.1.cast = bitcast i32 addrspace(1)* %a.1 to <2 x i16> addrspace(1)*
+
+ %ld.0 = load i32, i32 addrspace(1)* %a
+ %ld.1 = load <2 x i16>, <2 x i16> addrspace(1)* %a.1.cast
+
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll
new file mode 100644
index 000000000000..ba792f783533
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/missing-alignment.ll
@@ -0,0 +1,30 @@
+; RUN: opt -mtriple=amdgcn-- -load-store-vectorizer -S -o - %s | FileCheck %s
+
+@lds = internal addrspace(3) global [512 x float] undef, align 4
+
+; The original load has an implicit alignment of 4, and should not
+; increase to an align 8 load.
+
+; CHECK-LABEL: @load_keep_base_alignment_missing_align(
+; CHECK: load <2 x float>, <2 x float> addrspace(3)* %{{[0-9]+}}, align 4
+define void @load_keep_base_alignment_missing_align(float addrspace(1)* %out) {
+ %ptr0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 11
+ %val0 = load float, float addrspace(3)* %ptr0
+
+ %ptr1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 12
+ %val1 = load float, float addrspace(3)* %ptr1
+ %add = fadd float %val0, %val1
+ store float %add, float addrspace(1)* %out
+ ret void
+}
+
+
+; CHECK-LABEL: @store_keep_base_alignment_missing_align(
+; CHECK: store <2 x float> zeroinitializer, <2 x float> addrspace(3)* %{{[0-9]+}}, align 4
+define void @store_keep_base_alignment_missing_align() {
+ %arrayidx0 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 1
+ %arrayidx1 = getelementptr inbounds [512 x float], [512 x float] addrspace(3)* @lds, i32 0, i32 2
+ store float 0.0, float addrspace(3)* %arrayidx0
+ store float 0.0, float addrspace(3)* %arrayidx1
+ ret void
+}
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll
new file mode 100644
index 000000000000..4a429533df02
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/no-implicit-float.ll
@@ -0,0 +1,20 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+; CHECK-LABEL: @no_implicit_float(
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+; CHECK: store i32
+define void @no_implicit_float(i32 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+ %out.gep.2 = getelementptr i32, i32 addrspace(1)* %out, i32 2
+ %out.gep.3 = getelementptr i32, i32 addrspace(1)* %out, i32 3
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out.gep.2
+ store i32 333, i32 addrspace(1)* %out.gep.3
+ store i32 1234, i32 addrspace(1)* %out
+ ret void
+}
+
+attributes #0 = { nounwind noimplicitfloat }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll
new file mode 100644
index 000000000000..141e20a1f83c
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/optnone.ll
@@ -0,0 +1,22 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+; CHECK-LABEL: @optnone(
+; CHECK: store i32
+; CHECK: store i32
+define void @optnone(i32 addrspace(1)* %out) noinline optnone {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @do_opt(
+; CHECK: store <2 x i32>
+define void @do_opt(i32 addrspace(1)* %out) {
+ %out.gep.1 = getelementptr i32, i32 addrspace(1)* %out, i32 1
+
+ store i32 123, i32 addrspace(1)* %out.gep.1
+ store i32 456, i32 addrspace(1)* %out
+ ret void
+}
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll
new file mode 100644
index 000000000000..9b73f34ec6cf
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/pointer-elements.ll
@@ -0,0 +1,311 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -basicaa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+; CHECK-LABEL: @merge_v2p1i8(
+; CHECK: load <2 x i64>
+; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)*
+; CHECK: inttoptr i64 %{{[0-9]+}} to i8 addrspace(1)*
+; CHECK: store <2 x i64> zeroinitializer
+define void @merge_v2p1i8(i8 addrspace(1)* addrspace(1)* nocapture %a, i8 addrspace(1)* addrspace(1)* nocapture readonly %b) #0 {
+entry:
+ %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
+ %b.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, i64 1
+
+ %ld.c = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b, align 4
+ %ld.c.idx.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %b.1, align 4
+
+ store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a, align 4
+ store i8 addrspace(1)* null, i8 addrspace(1)* addrspace(1)* %a.1, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_v2p3i8(
+; CHECK: load <2 x i32>
+; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)*
+; CHECK: inttoptr i32 %{{[0-9]+}} to i8 addrspace(3)*
+; CHECK: store <2 x i32> zeroinitializer
+define void @merge_v2p3i8(i8 addrspace(3)* addrspace(3)* nocapture %a, i8 addrspace(3)* addrspace(3)* nocapture readonly %b) #0 {
+entry:
+ %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i64 1
+ %b.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, i64 1
+
+ %ld.c = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b, align 4
+ %ld.c.idx.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %b.1, align 4
+
+ store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a, align 4
+ store i8 addrspace(3)* null, i8 addrspace(3)* addrspace(3)* %a.1, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_i64_ptr64(
+; CHECK: load <2 x i64>
+; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
+; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
+define void @merge_load_i64_ptr64(i64 addrspace(1)* nocapture %a) #0 {
+entry:
+ %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
+ %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
+
+ %ld.0 = load i64, i64 addrspace(1)* %a
+ %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_ptr64_i64(
+; CHECK: load <2 x i64>
+; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
+; CHECK: inttoptr i64 [[ELT0]] to i8 addrspace(1)*
+define void @merge_load_ptr64_i64(i64 addrspace(1)* nocapture %a) #0 {
+entry:
+ %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
+ %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
+
+ %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
+ %ld.1 = load i64, i64 addrspace(1)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_ptr64_i64(
+; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
+; CHECK: insertelement <2 x i64> undef, i64 [[ELT0]], i32 0
+; CHECK: store <2 x i64>
+define void @merge_store_ptr64_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, i64 %val1) #0 {
+entry:
+ %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
+ %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
+
+
+ store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
+ store i64 %val1, i64 addrspace(1)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_i64_ptr64(
+; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
+; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1]], i32 1
+; CHECK: store <2 x i64>
+define void @merge_store_i64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(1)* %ptr1) #0 {
+entry:
+ %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
+ %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to i64 addrspace(1)*
+
+ store i64 %val0, i64 addrspace(1)* %a.cast
+ store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_i32_ptr32(
+; CHECK: load <2 x i32>
+; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 1
+; CHECK: inttoptr i32 [[ELT1]] to i8 addrspace(3)*
+define void @merge_load_i32_ptr32(i32 addrspace(3)* nocapture %a) #0 {
+entry:
+ %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
+ %a.1.cast = bitcast i32 addrspace(3)* %a.1 to i8 addrspace(3)* addrspace(3)*
+
+ %ld.0 = load i32, i32 addrspace(3)* %a
+ %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.1.cast
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_ptr32_i32(
+; CHECK: load <2 x i32>
+; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i32> %{{[0-9]+}}, i32 0
+; CHECK: inttoptr i32 [[ELT0]] to i8 addrspace(3)*
+define void @merge_load_ptr32_i32(i32 addrspace(3)* nocapture %a) #0 {
+entry:
+ %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
+ %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
+
+ %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a.cast
+ %ld.1 = load i32, i32 addrspace(3)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_ptr32_i32(
+; CHECK: [[ELT0:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr0 to i32
+; CHECK: insertelement <2 x i32> undef, i32 [[ELT0]], i32 0
+; CHECK: store <2 x i32>
+define void @merge_store_ptr32_i32(i32 addrspace(3)* nocapture %a, i8 addrspace(3)* %ptr0, i32 %val1) #0 {
+entry:
+ %a.cast = bitcast i32 addrspace(3)* %a to i8 addrspace(3)* addrspace(3)*
+ %a.1 = getelementptr inbounds i32, i32 addrspace(3)* %a, i32 1
+
+ store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(3)* %a.cast
+ store i32 %val1, i32 addrspace(3)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_i32_ptr32(
+; CHECK: [[ELT1:%[0-9]+]] = ptrtoint i8 addrspace(3)* %ptr1 to i32
+; CHECK: insertelement <2 x i32> %{{[0-9]+}}, i32 [[ELT1]], i32 1
+; CHECK: store <2 x i32>
+define void @merge_store_i32_ptr32(i8 addrspace(3)* addrspace(3)* nocapture %a, i32 %val0, i8 addrspace(3)* %ptr1) #0 {
+entry:
+ %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* %a, i32 1
+ %a.cast = bitcast i8 addrspace(3)* addrspace(3)* %a to i32 addrspace(3)*
+
+ store i32 %val0, i32 addrspace(3)* %a.cast
+ store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(3)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @no_merge_store_ptr32_i64(
+; CHECK: store i8 addrspace(3)*
+; CHECK: store i64
+define void @no_merge_store_ptr32_i64(i64 addrspace(1)* nocapture %a, i8 addrspace(3)* %ptr0, i64 %val1) #0 {
+entry:
+ %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
+ %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
+
+
+ store i8 addrspace(3)* %ptr0, i8 addrspace(3)* addrspace(1)* %a.cast
+ store i64 %val1, i64 addrspace(1)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @no_merge_store_i64_ptr32(
+; CHECK: store i64
+; CHECK: store i8 addrspace(3)*
+define void @no_merge_store_i64_ptr32(i8 addrspace(3)* addrspace(1)* nocapture %a, i64 %val0, i8 addrspace(3)* %ptr1) #0 {
+entry:
+ %a.1 = getelementptr inbounds i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a, i64 1
+ %a.cast = bitcast i8 addrspace(3)* addrspace(1)* %a to i64 addrspace(1)*
+
+ store i64 %val0, i64 addrspace(1)* %a.cast
+ store i8 addrspace(3)* %ptr1, i8 addrspace(3)* addrspace(1)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @no_merge_load_i64_ptr32(
+; CHECK: load i64,
+; CHECK: load i8 addrspace(3)*,
+define void @no_merge_load_i64_ptr32(i64 addrspace(1)* nocapture %a) #0 {
+entry:
+ %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
+ %a.1.cast = bitcast i64 addrspace(1)* %a.1 to i8 addrspace(3)* addrspace(1)*
+
+ %ld.0 = load i64, i64 addrspace(1)* %a
+ %ld.1 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.1.cast
+
+ ret void
+}
+
+; CHECK-LABEL: @no_merge_load_ptr32_i64(
+; CHECK: load i8 addrspace(3)*,
+; CHECK: load i64,
+define void @no_merge_load_ptr32_i64(i64 addrspace(1)* nocapture %a) #0 {
+entry:
+ %a.cast = bitcast i64 addrspace(1)* %a to i8 addrspace(3)* addrspace(1)*
+ %a.1 = getelementptr inbounds i64, i64 addrspace(1)* %a, i64 1
+
+ %ld.0 = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(1)* %a.cast
+ %ld.1 = load i64, i64 addrspace(1)* %a.1
+
+ ret void
+}
+
+; XXX - This isn't merged for some reason
+; CHECK-LABEL: @merge_v2p1i8_v2p1i8(
+; CHECK: load <2 x i8 addrspace(1)*>
+; CHECK: load <2 x i8 addrspace(1)*>
+; CHECK: store <2 x i8 addrspace(1)*>
+; CHECK: store <2 x i8 addrspace(1)*>
+define void @merge_v2p1i8_v2p1i8(<2 x i8 addrspace(1)*> addrspace(1)* nocapture noalias %a, <2 x i8 addrspace(1)*> addrspace(1)* nocapture readonly noalias %b) #0 {
+entry:
+ %a.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %a, i64 1
+ %b.1 = getelementptr inbounds <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, i64 1
+
+ %ld.c = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b, align 4
+ %ld.c.idx.1 = load <2 x i8 addrspace(1)*>, <2 x i8 addrspace(1)*> addrspace(1)* %b.1, align 4
+
+ store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a, align 4
+ store <2 x i8 addrspace(1)*> zeroinitializer, <2 x i8 addrspace(1)*> addrspace(1)* %a.1, align 4
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_ptr64_f64(
+; CHECK: load <2 x i64>
+; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
+; CHECK: [[ELT0_INT:%[0-9]+]] = inttoptr i64 [[ELT0]] to i8 addrspace(1)*
+; CHECK: [[ELT1_INT:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
+; CHECK: bitcast i64 [[ELT1_INT]] to double
+define void @merge_load_ptr64_f64(double addrspace(1)* nocapture %a) #0 {
+entry:
+ %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
+ %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
+
+ %ld.0 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.cast
+ %ld.1 = load double, double addrspace(1)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_f64_ptr64(
+; CHECK: load <2 x i64>
+; CHECK: [[ELT0:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 0
+; CHECK: bitcast i64 [[ELT0]] to double
+; CHECK: [[ELT1:%[0-9]+]] = extractelement <2 x i64> %{{[0-9]+}}, i32 1
+; CHECK: inttoptr i64 [[ELT1]] to i8 addrspace(1)*
+define void @merge_load_f64_ptr64(double addrspace(1)* nocapture %a) #0 {
+entry:
+ %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
+ %a.1.cast = bitcast double addrspace(1)* %a.1 to i8 addrspace(1)* addrspace(1)*
+
+ %ld.0 = load double, double addrspace(1)* %a
+ %ld.1 = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a.1.cast
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_ptr64_f64(
+; CHECK: [[ELT0_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr0 to i64
+; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
+; CHECK: [[ELT1_INT:%[0-9]+]] = bitcast double %val1 to i64
+; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1
+; CHECK: store <2 x i64>
+define void @merge_store_ptr64_f64(double addrspace(1)* nocapture %a, i8 addrspace(1)* %ptr0, double %val1) #0 {
+entry:
+ %a.cast = bitcast double addrspace(1)* %a to i8 addrspace(1)* addrspace(1)*
+ %a.1 = getelementptr inbounds double, double addrspace(1)* %a, i64 1
+
+ store i8 addrspace(1)* %ptr0, i8 addrspace(1)* addrspace(1)* %a.cast
+ store double %val1, double addrspace(1)* %a.1
+
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_f64_ptr64(
+; CHECK: [[ELT0_INT:%[0-9]+]] = bitcast double %val0 to i64
+; CHECK: insertelement <2 x i64> undef, i64 [[ELT0_INT]], i32 0
+; CHECK: [[ELT1_INT:%[0-9]+]] = ptrtoint i8 addrspace(1)* %ptr1 to i64
+; CHECK: insertelement <2 x i64> %{{[0-9]+}}, i64 [[ELT1_INT]], i32 1
+; CHECK: store <2 x i64>
+define void @merge_store_f64_ptr64(i8 addrspace(1)* addrspace(1)* nocapture %a, double %val0, i8 addrspace(1)* %ptr1) #0 {
+entry:
+ %a.1 = getelementptr inbounds i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %a, i64 1
+ %a.cast = bitcast i8 addrspace(1)* addrspace(1)* %a to double addrspace(1)*
+
+ store double %val0, double addrspace(1)* %a.cast
+ store i8 addrspace(1)* %ptr1, i8 addrspace(1)* addrspace(1)* %a.1
+
+ ret void
+}
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll b/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll
new file mode 100644
index 000000000000..18f62be27c82
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/AMDGPU/weird-type-accesses.ll
@@ -0,0 +1,199 @@
+; RUN: opt -mtriple=amdgcn-amd-amdhsa -load-store-vectorizer -S -o - %s | FileCheck %s
+
+; Checks that we don't merge loads/stores of types smaller than one
+; byte, or vectors with elements smaller than one byte.
+
+%struct.foo = type { i32, i8 }
+
+declare void @use_i1(i1)
+declare void @use_i2(i2)
+declare void @use_i8(i8)
+declare void @use_foo(%struct.foo)
+declare void @use_v2i2(<2 x i2>)
+declare void @use_v4i2(<4 x i2>)
+declare void @use_v2i9(<2 x i9>)
+
+; CHECK-LABEL: @merge_store_2_constants_i1(
+; CHECK: store i1
+; CHECK: store i1
+define void @merge_store_2_constants_i1(i1 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i1, i1 addrspace(1)* %out, i32 1
+ store i1 true, i1 addrspace(1)* %out.gep.1
+ store i1 false, i1 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_2_constants_i2(
+; CHECK: store i2 1
+; CHECK: store i2 -1
+define void @merge_store_2_constants_i2(i2 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i2, i2 addrspace(1)* %out, i32 1
+ store i2 1, i2 addrspace(1)* %out.gep.1
+ store i2 -1, i2 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_different_store_sizes_i1_i8(
+; CHECK: store i1 true
+; CHECK: store i8 123
+define void @merge_different_store_sizes_i1_i8(i8 addrspace(1)* %out) #0 {
+ %out.i1 = bitcast i8 addrspace(1)* %out to i1 addrspace(1)*
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
+ store i1 true, i1 addrspace(1)* %out.i1
+ store i8 123, i8 addrspace(1)* %out.gep.1
+ ret void
+}
+
+; CHECK-LABEL: @merge_different_store_sizes_i8_i1(
+; CHECK: store i8 123
+; CHECK: store i1 true
+define void @merge_different_store_sizes_i8_i1(i1 addrspace(1)* %out) #0 {
+ %out.i8 = bitcast i1 addrspace(1)* %out to i8 addrspace(1)*
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out.i8, i32 1
+ store i8 123, i8 addrspace(1)* %out.gep.1
+ store i1 true, i1 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_2_constant_structs(
+; CHECK: store %struct.foo
+; CHECK: store %struct.foo
+define void @merge_store_2_constant_structs(%struct.foo addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr %struct.foo, %struct.foo addrspace(1)* %out, i32 1
+ store %struct.foo { i32 12, i8 3 }, %struct.foo addrspace(1)* %out.gep.1
+ store %struct.foo { i32 92, i8 9 }, %struct.foo addrspace(1)* %out
+ ret void
+}
+
+; sub-byte element size
+; CHECK-LABEL: @merge_store_2_constants_v2i2(
+; CHECK: store <2 x i2>
+; CHECK: store <2 x i2>
+define void @merge_store_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr <2 x i2>, <2 x i2> addrspace(1)* %out, i32 1
+ store <2 x i2> <i2 1, i2 -1>, <2 x i2> addrspace(1)* %out.gep.1
+ store <2 x i2> <i2 -1, i2 1>, <2 x i2> addrspace(1)* %out
+ ret void
+}
+
+; sub-byte element size but byte size
+
+; CHECK-LABEL: @merge_store_2_constants_v4i2(
+; CHECK: store <4 x i2>
+; CHECK: store <4 x i2>
+define void @merge_store_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr <4 x i2>, <4 x i2> addrspace(1)* %out, i32 1
+ store <4 x i2> <i2 1, i2 -1, i2 1, i2 -1>, <4 x i2> addrspace(1)* %out.gep.1
+ store <4 x i2> <i2 -1, i2 1, i2 -1, i2 1>, <4 x i2> addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_2_constants_i1(
+; CHECK: load i1
+; CHECK: load i1
+define void @merge_load_2_constants_i1(i1 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i1, i1 addrspace(1)* %out, i32 1
+ %x = load i1, i1 addrspace(1)* %out.gep.1
+ %y = load i1, i1 addrspace(1)* %out
+ call void @use_i1(i1 %x)
+ call void @use_i1(i1 %y)
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_2_constants_i2(
+; CHECK: load i2
+; CHECK: load i2
+define void @merge_load_2_constants_i2(i2 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i2, i2 addrspace(1)* %out, i32 1
+ %x = load i2, i2 addrspace(1)* %out.gep.1
+ %y = load i2, i2 addrspace(1)* %out
+ call void @use_i2(i2 %x)
+ call void @use_i2(i2 %y)
+ ret void
+}
+
+; CHECK-LABEL: @merge_different_load_sizes_i1_i8(
+; CHECK: load i1
+; CHECK: load i8
+define void @merge_different_load_sizes_i1_i8(i8 addrspace(1)* %out) #0 {
+ %out.i1 = bitcast i8 addrspace(1)* %out to i1 addrspace(1)*
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out, i32 1
+ %x = load i1, i1 addrspace(1)* %out.i1
+ %y = load i8, i8 addrspace(1)* %out.gep.1
+ call void @use_i1(i1 %x)
+ call void @use_i8(i8 %y)
+ ret void
+}
+
+; CHECK-LABEL: @merge_different_load_sizes_i8_i1(
+; CHECK: load i8
+; CHECK: load i1
+define void @merge_different_load_sizes_i8_i1(i1 addrspace(1)* %out) #0 {
+ %out.i8 = bitcast i1 addrspace(1)* %out to i8 addrspace(1)*
+ %out.gep.1 = getelementptr i8, i8 addrspace(1)* %out.i8, i32 1
+ %x = load i8, i8 addrspace(1)* %out.gep.1
+ %y = load i1, i1 addrspace(1)* %out
+ call void @use_i8(i8 %x)
+ call void @use_i1(i1 %y)
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_2_constant_structs(
+; CHECK: load %struct.foo
+; CHECK: load %struct.foo
+define void @merge_load_2_constant_structs(%struct.foo addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr %struct.foo, %struct.foo addrspace(1)* %out, i32 1
+ %x = load %struct.foo, %struct.foo addrspace(1)* %out.gep.1
+ %y = load %struct.foo, %struct.foo addrspace(1)* %out
+ call void @use_foo(%struct.foo %x)
+ call void @use_foo(%struct.foo %y)
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_2_constants_v2i2(
+; CHECK: load <2 x i2>
+; CHECK: load <2 x i2>
+define void @merge_load_2_constants_v2i2(<2 x i2> addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr <2 x i2>, <2 x i2> addrspace(1)* %out, i32 1
+ %x = load <2 x i2>, <2 x i2> addrspace(1)* %out.gep.1
+ %y = load <2 x i2>, <2 x i2> addrspace(1)* %out
+ call void @use_v2i2(<2 x i2> %x)
+ call void @use_v2i2(<2 x i2> %y)
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_2_constants_v4i2(
+; CHECK: load <4 x i2>
+; CHECK: load <4 x i2>
+define void @merge_load_2_constants_v4i2(<4 x i2> addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr <4 x i2>, <4 x i2> addrspace(1)* %out, i32 1
+ %x = load <4 x i2>, <4 x i2> addrspace(1)* %out.gep.1
+ %y = load <4 x i2>, <4 x i2> addrspace(1)* %out
+ call void @use_v4i2(<4 x i2> %x)
+ call void @use_v4i2(<4 x i2> %y)
+ ret void
+}
+
+; CHECK-LABEL: @merge_store_2_constants_i9(
+; CHECK: store i9 3
+; CHECK: store i9 -5
+define void @merge_store_2_constants_i9(i9 addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr i9, i9 addrspace(1)* %out, i32 1
+ store i9 3, i9 addrspace(1)* %out.gep.1
+ store i9 -5, i9 addrspace(1)* %out
+ ret void
+}
+
+; CHECK-LABEL: @merge_load_2_constants_v2i9(
+; CHECK: load <2 x i9>
+; CHECK: load <2 x i9>
+define void @merge_load_2_constants_v2i9(<2 x i9> addrspace(1)* %out) #0 {
+ %out.gep.1 = getelementptr <2 x i9>, <2 x i9> addrspace(1)* %out, i32 1
+ %x = load <2 x i9>, <2 x i9> addrspace(1)* %out.gep.1
+ %y = load <2 x i9>, <2 x i9> addrspace(1)* %out
+ call void @use_v2i9(<2 x i9> %x)
+ call void @use_v2i9(<2 x i9> %y)
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll b/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
new file mode 100644
index 000000000000..b98014e76cb6
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/X86/correct-order.ll
@@ -0,0 +1,26 @@
+; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @correct_order(
+; CHECK: bitcast i32*
+; CHECK: load <2 x i32>
+; CHECK: load i32
+; CHECK: bitcast i32*
+; CHECK: store <2 x i32>
+; CHECK: load i32
+define void @correct_order(i32* noalias %ptr) {
+ %next.gep = getelementptr i32, i32* %ptr, i64 0
+ %next.gep1 = getelementptr i32, i32* %ptr, i64 1
+ %next.gep2 = getelementptr i32, i32* %ptr, i64 2
+
+ %l1 = load i32, i32* %next.gep1, align 4
+ %l2 = load i32, i32* %next.gep, align 4
+ store i32 0, i32* %next.gep1, align 4
+ store i32 0, i32* %next.gep, align 4
+ %l3 = load i32, i32* %next.gep1, align 4
+ %l4 = load i32, i32* %next.gep2, align 4
+
+ ret void
+}
+
diff --git a/test/Transforms/TailDup/X86/lit.local.cfg b/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg
index e71f3cc4c41e..e71f3cc4c41e 100644
--- a/test/Transforms/TailDup/X86/lit.local.cfg
+++ b/test/Transforms/LoadStoreVectorizer/X86/lit.local.cfg
diff --git a/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll b/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
new file mode 100644
index 000000000000..9a7b294e4ced
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/X86/preserve-order32.ll
@@ -0,0 +1,28 @@
+; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:64:64-i64:64-v16:16-v24:32-v32:32-v48:64-v96:128-v192:256-v256:256-v512:512-v1024:1024-v2048:2048-n32:64"
+
+%struct.buffer_t = type { i32, i8* }
+
+; Check an i32 and i8* get vectorized, and that the two accesses
+; (load into buff.val and store to buff.p) preserve their order.
+; Vectorized loads should be inserted at the position of the first load,
+; and instructions which were between the first and last load should be
+; reordered preserving their relative order inasmuch as possible.
+
+; CHECK-LABEL: @preserve_order_32(
+; CHECK: load <2 x i32>
+; CHECK: %buff.val = load i8
+; CHECK: store i8 0
+define void @preserve_order_32(%struct.buffer_t* noalias %buff) #0 {
+entry:
+ %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 1
+ %buff.p = load i8*, i8** %tmp1, align 8
+ %buff.val = load i8, i8* %buff.p, align 8
+ store i8 0, i8* %buff.p, align 8
+ %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i32 0, i32 0
+ %buff.int = load i32, i32* %tmp0, align 8
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll b/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
new file mode 100644
index 000000000000..23c43863015b
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/X86/preserve-order64.ll
@@ -0,0 +1,77 @@
+; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+%struct.buffer_t = type { i64, i8* }
+%struct.nested.buffer = type { %struct.buffer_t, %struct.buffer_t }
+
+; Check an i64 and i8* get vectorized, and that the two accesses
+; (load into buff.val and store to buff.p) preserve their order.
+; Vectorized loads should be inserted at the position of the first load,
+; and instructions which were between the first and last load should be
+; reordered preserving their relative order inasmuch as possible.
+
+; CHECK-LABEL: @preserve_order_64(
+; CHECK: load <2 x i64>
+; CHECK: %buff.val = load i8
+; CHECK: store i8 0
+define void @preserve_order_64(%struct.buffer_t* noalias %buff) #0 {
+entry:
+ %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
+ %buff.p = load i8*, i8** %tmp1, align 8
+ %buff.val = load i8, i8* %buff.p, align 8
+ store i8 0, i8* %buff.p, align 8
+ %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
+ %buff.int = load i64, i64* %tmp0, align 8
+ ret void
+}
+
+; Check reordering recurses correctly.
+
+; CHECK-LABEL: @transitive_reorder(
+; CHECK: load <2 x i64>
+; CHECK: %buff.val = load i8
+; CHECK: store i8 0
+define void @transitive_reorder(%struct.buffer_t* noalias %buff, %struct.nested.buffer* noalias %nest) #0 {
+entry:
+ %nest0_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
+ %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest0_0, i64 0, i32 1
+ %buff.p = load i8*, i8** %tmp1, align 8
+ %buff.val = load i8, i8* %buff.p, align 8
+ store i8 0, i8* %buff.p, align 8
+ %nest1_0 = getelementptr inbounds %struct.nested.buffer, %struct.nested.buffer* %nest, i64 0, i32 0
+ %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %nest1_0, i64 0, i32 0
+ %buff.int = load i64, i64* %tmp0, align 8
+ ret void
+}
+
+; Check for no vectorization over phi node
+
+; CHECK-LABEL: @no_vect_phi(
+; CHECK: load i8*
+; CHECK: load i8
+; CHECK: store i8 0
+; CHECK: load i64
+define void @no_vect_phi(i32* noalias %ptr, %struct.buffer_t* noalias %buff) {
+entry:
+ %tmp1 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 1
+ %buff.p = load i8*, i8** %tmp1, align 8
+ %buff.val = load i8, i8* %buff.p, align 8
+ store i8 0, i8* %buff.p, align 8
+ br label %"for something"
+
+"for something":
+ %index = phi i64 [ 0, %entry ], [ %index.next, %"for something" ]
+
+ %tmp0 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %buff, i64 0, i32 0
+ %buff.int = load i64, i64* %tmp0, align 8
+
+ %index.next = add i64 %index, 8
+ %cmp_res = icmp eq i64 %index.next, 8
+ br i1 %cmp_res, label %ending, label %"for something"
+
+ending:
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll b/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
new file mode 100644
index 000000000000..cee7d9f8f9b5
--- /dev/null
+++ b/test/Transforms/LoadStoreVectorizer/X86/subchain-interleaved.ll
@@ -0,0 +1,87 @@
+; RUN: opt -mtriple=x86-linux -load-store-vectorizer -S -o - %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; Vectorized subsets of the load/store chains in the presence of
+; interleaved loads/stores
+
+; CHECK-LABEL: @interleave_2L_2S(
+; CHECK: load <2 x i32>
+; CHECK: load i32
+; CHECK: store <2 x i32>
+; CHECK: load i32
+define void @interleave_2L_2S(i32* noalias %ptr) {
+ %next.gep = getelementptr i32, i32* %ptr, i64 0
+ %next.gep1 = getelementptr i32, i32* %ptr, i64 1
+ %next.gep2 = getelementptr i32, i32* %ptr, i64 2
+
+ %l1 = load i32, i32* %next.gep1, align 4
+ %l2 = load i32, i32* %next.gep, align 4
+ store i32 0, i32* %next.gep1, align 4
+ store i32 0, i32* %next.gep, align 4
+ %l3 = load i32, i32* %next.gep1, align 4
+ %l4 = load i32, i32* %next.gep2, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @interleave_3L_2S_1L(
+; CHECK: load <3 x i32>
+; CHECK: store <2 x i32>
+; CHECK: load i32
+
+define void @interleave_3L_2S_1L(i32* noalias %ptr) {
+ %next.gep = getelementptr i32, i32* %ptr, i64 0
+ %next.gep1 = getelementptr i32, i32* %ptr, i64 1
+ %next.gep2 = getelementptr i32, i32* %ptr, i64 2
+
+ %l2 = load i32, i32* %next.gep, align 4
+ %l1 = load i32, i32* %next.gep1, align 4
+ store i32 0, i32* %next.gep1, align 4
+ store i32 0, i32* %next.gep, align 4
+ %l3 = load i32, i32* %next.gep1, align 4
+ %l4 = load i32, i32* %next.gep2, align 4
+
+ ret void
+}
+
+; CHECK-LABEL: @chain_suffix(
+; CHECK: load i32
+; CHECK: store <2 x i32>
+; CHECK: load <2 x i32>
+define void @chain_suffix(i32* noalias %ptr) {
+ %next.gep = getelementptr i32, i32* %ptr, i64 0
+ %next.gep1 = getelementptr i32, i32* %ptr, i64 1
+ %next.gep2 = getelementptr i32, i32* %ptr, i64 2
+
+ %l2 = load i32, i32* %next.gep, align 4
+ store i32 0, i32* %next.gep1, align 4
+ store i32 0, i32* %next.gep, align 4
+ %l3 = load i32, i32* %next.gep1, align 4
+ %l4 = load i32, i32* %next.gep2, align 4
+
+ ret void
+}
+
+
+; CHECK-LABEL: @chain_prefix_suffix(
+; CHECK: load <2 x i32>
+; CHECK: store <2 x i32>
+; CHECK: load <3 x i32>
+define void @chain_prefix_suffix(i32* noalias %ptr) {
+ %next.gep = getelementptr i32, i32* %ptr, i64 0
+ %next.gep1 = getelementptr i32, i32* %ptr, i64 1
+ %next.gep2 = getelementptr i32, i32* %ptr, i64 2
+ %next.gep3 = getelementptr i32, i32* %ptr, i64 3
+
+ %l1 = load i32, i32* %next.gep, align 4
+ %l2 = load i32, i32* %next.gep1, align 4
+ store i32 0, i32* %next.gep1, align 4
+ store i32 0, i32* %next.gep2, align 4
+ %l3 = load i32, i32* %next.gep1, align 4
+ %l4 = load i32, i32* %next.gep2, align 4
+ %l5 = load i32, i32* %next.gep3, align 4
+
+ ret void
+}
+
diff --git a/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll b/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll
new file mode 100644
index 000000000000..6433a82fb180
--- /dev/null
+++ b/test/Transforms/LoopDataPrefetch/AArch64/kryo-large-stride.ll
@@ -0,0 +1,51 @@
+; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -loop-data-prefetch -max-prefetch-iters-ahead=1000 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=kryo -mtriple=aarch64-gnu-linux -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+
+; ALL-LABEL: @small_stride(
+define void @small_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; ALL-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
+
+; ALL-LABEL: @large_stride(
+define void @large_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; LARGE_PREFETCH: call void @llvm.prefetch
+; NO_LARGE_PREFETCH-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 150
+ %exitcond = icmp eq i64 %indvars.iv.next, 160000
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll b/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
new file mode 100644
index 000000000000..d585367c563e
--- /dev/null
+++ b/test/Transforms/LoopDataPrefetch/AArch64/large-stride.ll
@@ -0,0 +1,52 @@
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -max-prefetch-iters-ahead=100 -S < %s | FileCheck %s --check-prefix=LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+; RUN: opt -mcpu=generic -mtriple=arm64-apple-ios -loop-data-prefetch -S < %s | FileCheck %s --check-prefix=NO_LARGE_PREFETCH --check-prefix=ALL
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32:64-S128"
+
+; ALL-LABEL: @small_stride(
+define void @small_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; ALL-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
+
+; ALL-LABEL: @large_stride(
+define void @large_stride(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; ALL: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; LARGE_PREFETCH: call void @llvm.prefetch
+; NO_LARGE_PREFETCH-NOT: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 300
+ %exitcond = icmp eq i64 %indvars.iv.next, 160000
+ br i1 %exitcond, label %for.end, label %for.body
+
+; ALL: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg b/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg
new file mode 100644
index 000000000000..675f48e199a0
--- /dev/null
+++ b/test/Transforms/LoopDataPrefetch/AArch64/lit.local.cfg
@@ -0,0 +1,4 @@
+config.suffixes = ['.ll']
+
+if not 'AArch64' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll b/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll
new file mode 100644
index 000000000000..66748570e66c
--- /dev/null
+++ b/test/Transforms/LoopDataPrefetch/AArch64/opt-remark.ll
@@ -0,0 +1,78 @@
+; RUN: opt -mcpu=cyclone -mtriple=arm64-apple-ios -loop-data-prefetch \
+; RUN: -pass-remarks=loop-data-prefetch -S -max-prefetch-iters-ahead=100 \
+; RUN: < %s 2>&1 | FileCheck %s
+
+; ModuleID = '/tmp/s.c'
+source_filename = "/tmp/s.c"
+target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
+target triple = "arm64-apple-ios5.0.0"
+
+; 1 struct MyStruct {
+; 2 int field;
+; 3 char kk[2044];
+; 4 } *my_struct;
+; 5
+; 6 int f(struct MyStruct *p, int N) {
+; 7 int total = 0;
+; 8 for (int i = 0; i < N; i++) {
+; 9 total += my_struct[i].field;
+; 10 }
+; 11 return total;
+; 12 }
+
+; CHECK: remark: /tmp/s.c:9:27: prefetched memory access
+
+%struct.MyStruct = type { i32, [2044 x i8] }
+
+@my_struct = common global %struct.MyStruct* null, align 8
+
+define i32 @f(%struct.MyStruct* nocapture readnone %p, i32 %N) !dbg !6 {
+entry:
+ %cmp6 = icmp sgt i32 %N, 0, !dbg !8
+ br i1 %cmp6, label %for.body.lr.ph, label %for.cond.cleanup, !dbg !9
+
+for.body.lr.ph: ; preds = %entry
+ %0 = load %struct.MyStruct*, %struct.MyStruct** @my_struct, align 8, !dbg !10, !tbaa !11
+ br label %for.body, !dbg !9
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %total.0.lcssa = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ ret i32 %total.0.lcssa, !dbg !15
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %total.07 = phi i32 [ 0, %for.body.lr.ph ], [ %add, %for.body ]
+ %field = getelementptr inbounds %struct.MyStruct, %struct.MyStruct* %0, i64 %indvars.iv, i32 0, !dbg !16
+ %1 = load i32, i32* %field, align 4, !dbg !16, !tbaa !17
+ %add = add nsw i32 %1, %total.07, !dbg !20
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "/tmp/s.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"PIC Level", i32 2}
+!5 = !{!"clang version 3.9.0"}
+!6 = distinct !DISubprogram(name: "f", scope: !1, file: !1, line: 6, type: !7, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 8, column: 21, scope: !6)
+!9 = !DILocation(line: 8, column: 3, scope: !6)
+!10 = !DILocation(line: 9, column: 14, scope: !6)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"any pointer", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 11, column: 3, scope: !6)
+!16 = !DILocation(line: 9, column: 27, scope: !6)
+!17 = !{!18, !19, i64 0}
+!18 = !{!"MyStruct", !19, i64 0, !13, i64 4}
+!19 = !{!"int", !13, i64 0}
+!20 = !DILocation(line: 9, column: 11, scope: !6)
diff --git a/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll b/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll
new file mode 100644
index 000000000000..772ecae74b44
--- /dev/null
+++ b/test/Transforms/LoopDataPrefetch/PowerPC/basic.ll
@@ -0,0 +1,25 @@
+; RUN: opt -mcpu=a2 -loop-data-prefetch -S < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+define void @foo(double* nocapture %a, double* nocapture readonly %b) {
+entry:
+ br label %for.body
+
+; CHECK: for.body:
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds double, double* %b, i64 %indvars.iv
+; CHECK: call void @llvm.prefetch
+ %0 = load double, double* %arrayidx, align 8
+ %add = fadd double %0, 1.000000e+00
+ %arrayidx2 = getelementptr inbounds double, double* %a, i64 %indvars.iv
+ store double %add, double* %arrayidx2, align 8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.end, label %for.body
+
+; CHECK: for.end:
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg b/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..091332439b18
--- /dev/null
+++ b/test/Transforms/LoopDataPrefetch/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/LoopDeletion/multiple-exit-conditions.ll b/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
index 87f8f461050d..d7d6badb1650 100644
--- a/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
+++ b/test/Transforms/LoopDeletion/multiple-exit-conditions.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-deletion -S | FileCheck %s
+; RUN: opt < %s -passes='require<scalar-evolution>,loop(loop-deletion)' -S | FileCheck %s
; ScalarEvolution can prove the loop iteration is finite, even though
; it can't represent the exact trip count as an expression. That's
diff --git a/test/Transforms/LoopDeletion/multiple-exits.ll b/test/Transforms/LoopDeletion/multiple-exits.ll
index 6af413b49cd9..dcf79057db54 100644
--- a/test/Transforms/LoopDeletion/multiple-exits.ll
+++ b/test/Transforms/LoopDeletion/multiple-exits.ll
@@ -2,13 +2,13 @@
; Checks whether dead loops with multiple exits can be eliminated
+define void @foo(i64 %n, i64 %m) nounwind {
+; CHECK-LABEL: @foo(
; CHECK: entry:
; CHECK-NEXT: br label %return
; CHECK: return:
; CHECK-NEXT: ret void
-
-define void @foo(i64 %n, i64 %m) nounwind {
entry:
br label %bb
@@ -24,3 +24,57 @@ bb2:
return:
ret void
}
+
+define i64 @bar(i64 %n, i64 %m) nounwind {
+; CHECK-LABEL: @bar(
+; CHECK: entry:
+; CHECK-NEXT: br label %return
+
+; CHECK: return:
+; CHECK-NEXT: ret i64 10
+
+entry:
+ br label %bb
+
+bb:
+ %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb3 ]
+ %t0 = add i64 %x.0, 1
+ %t1 = icmp slt i64 %x.0, %n
+ br i1 %t1, label %bb2, label %return
+bb2:
+ %t2 = icmp slt i64 %x.0, %m
+ br i1 %t2, label %bb3, label %return
+bb3:
+ %t3 = icmp slt i64 %x.0, %m
+ br i1 %t3, label %bb, label %return
+
+return:
+ %x.lcssa = phi i64 [ 10, %bb ], [ 10, %bb2 ], [ 10, %bb3 ]
+ ret i64 %x.lcssa
+}
+
+define i64 @baz(i64 %n, i64 %m) nounwind {
+; CHECK-LABEL: @baz(
+; CHECK: return:
+; CHECK-NEXT: %x.lcssa = phi i64 [ 12, %bb ], [ 10, %bb2 ]
+; CHECK-NEXT: ret i64 %x.lcssa
+
+entry:
+ br label %bb
+
+bb:
+ %x.0 = phi i64 [ 0, %entry ], [ %t0, %bb3 ]
+ %t0 = add i64 %x.0, 1
+ %t1 = icmp slt i64 %x.0, %n
+ br i1 %t1, label %bb2, label %return
+bb2:
+ %t2 = icmp slt i64 %x.0, %m
+ br i1 %t2, label %bb3, label %return
+bb3:
+ %t3 = icmp slt i64 %x.0, %m
+ br i1 %t3, label %bb, label %return
+
+return:
+ %x.lcssa = phi i64 [ 12, %bb ], [ 10, %bb2 ], [ 10, %bb3 ]
+ ret i64 %x.lcssa
+}
diff --git a/test/Transforms/LoopDeletion/update-scev.ll b/test/Transforms/LoopDeletion/update-scev.ll
new file mode 100644
index 000000000000..641ba55ed8f6
--- /dev/null
+++ b/test/Transforms/LoopDeletion/update-scev.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -analyze -scalar-evolution -loop-deletion -scalar-evolution < %s | FileCheck %s --check-prefix=SCEV-EXPRS
+; RUN: opt -S -loop-deletion < %s | FileCheck %s --check-prefix=IR-AFTER-TRANSFORM
+; RUN: opt -S -indvars -loop-deletion -indvars < %s | FileCheck %s --check-prefix=ORIGINAL-CRASH
+
+; Checking for a crash. Loop-deletion would change the loop
+; disposition of an instruction, but not update SCEV.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @pr27570() {
+; IR-AFTER-TRANSFORM-LABEL: @pr27570(
+; ORIGINAL-CRASH: @pr27570(
+entry:
+ br label %for.cond
+
+for.cond: ; preds = %for.cond14, %entry
+ %f.0 = phi i32 [ 20, %entry ], [ 0, %for.cond14 ]
+ br label %for.body
+
+for.body: ; preds = %for.inc11, %for.cond
+; IR-AFTER-TRANSFORM: for.body:
+; IR-AFTER-TRANSFORM: %cmp = icmp eq i32 %val, -1
+; IR-AFTER-TRANSFORM: %conv7 = zext i1 %cmp to i32
+; IR-AFTER-TRANSFORM: for.body6:
+
+; SCEV-EXPRS: %conv7 = zext i1 %cmp to i32
+; SCEV-EXPRS: %conv7 = zext i1 %cmp to i32
+; SCEV-EXPRS-NEXT: --> {{.*}} LoopDispositions: { %for.body: Variant, %for.cond: Variant, %for.body6: Invariant }
+ %val = phi i32 [ -20, %for.cond ], [ %inc12, %for.inc11 ]
+ %g.040 = phi i32 [ -20, %for.cond ], [ %and.lcssa, %for.inc11 ]
+ br label %for.body6
+
+for.body6: ; preds = %for.body6, %for.body
+ %h.039 = phi i32 [ 1, %for.body ], [ %inc, %for.body6 ]
+ %g.138 = phi i32 [ %g.040, %for.body ], [ %and, %for.body6 ]
+ %cmp = icmp eq i32 %val, -1
+ %conv7 = zext i1 %cmp to i32
+ %add.i = add nsw i32 %conv7, %h.039
+ %sext = shl i32 %add.i, 24
+ %conv8 = ashr exact i32 %sext, 24
+ %cmp9 = icmp eq i32 %conv8, %f.0
+ %conv10 = zext i1 %cmp9 to i32
+ %and = and i32 %conv10, %g.138
+ %inc = add i32 %h.039, 1
+ br i1 undef, label %for.inc11, label %for.body6
+
+for.inc11: ; preds = %for.body6
+ %and.lcssa = phi i32 [ %and, %for.body6 ]
+ %inc12 = add nsw i32 %val, 1
+ %tobool = icmp eq i32 %inc12, 0
+ br i1 %tobool, label %for.cond14, label %for.body
+
+for.cond14: ; preds = %for.cond14, %for.inc11
+ br i1 undef, label %for.cond, label %for.cond14
+}
diff --git a/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll b/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll
new file mode 100644
index 000000000000..819c002e282e
--- /dev/null
+++ b/test/Transforms/LoopDistribute/diagnostics-with-hotness-lazy-BFI.ll
@@ -0,0 +1,82 @@
+; Check that BFI is not computed when -pass-remarks-with-hotness is off
+
+; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
+; RUN: -debug-only=block-freq -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
+; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
+; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: -debug-only=block-freq -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: -debug-only=block-freq < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+
+; REQUIRES: asserts
+
+; HOTNESS: block-frequency: forced
+; NO_HOTNESS-NOT: block-frequency: forced
+
+; This is the input program:
+;
+; 1 void forced (char *A, char *B, char *C, int N) {
+; 2 #pragma clang loop distribute(enable)
+; 3 for(int i = 0; i < N; i++) {
+; 4 A[i] = B[i] * C[i];
+; 5 }
+; 6 }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+
+define void @forced(i8* %A, i8* %B, i8* %C, i32 %N) !dbg !7 !prof !22 {
+entry:
+ %cmp12 = icmp sgt i32 %N, 0, !dbg !9
+ br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !10, !prof !23
+
+ph:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
+ %arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !12
+ %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
+ %arrayidx2 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !16
+ %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
+ %mul = mul i8 %1, %0, !dbg !17
+ %arrayidx6 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !18
+ store i8 %mul, i8* %arrayidx6, align 1, !dbg !19, !tbaa !13
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !20, !prof !24
+
+for.cond.cleanup:
+ ret void, !dbg !11
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 267633) (llvm/trunk 267675)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
+!1 = !DIFile(filename: "/tmp/t.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 2}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = distinct !DISubprogram(name: "forced", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 3, column: 20, scope: !7)
+!10 = !DILocation(line: 3, column: 3, scope: !7)
+!11 = !DILocation(line: 6, column: 1, scope: !7)
+!12 = !DILocation(line: 4, column: 12, scope: !7)
+!13 = !{!14, !14, i64 0}
+!14 = !{!"omnipotent char", !15, i64 0}
+!15 = !{!"Simple C/C++ TBAA"}
+!16 = !DILocation(line: 4, column: 19, scope: !7)
+!17 = !DILocation(line: 4, column: 17, scope: !7)
+!18 = !DILocation(line: 4, column: 5, scope: !7)
+!19 = !DILocation(line: 4, column: 10, scope: !7)
+!20 = distinct !{!20, !21}
+!21 = !{!"llvm.loop.distribute.enable", i1 true}
+!22 = !{!"function_entry_count", i64 3}
+!23 = !{!"branch_weights", i32 99, i32 1}
+!24 = !{!"branch_weights", i32 1, i32 99}
diff --git a/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll b/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll
new file mode 100644
index 000000000000..289786fa4a15
--- /dev/null
+++ b/test/Transforms/LoopDistribute/diagnostics-with-hotness.ll
@@ -0,0 +1,77 @@
+; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
+; RUN: -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
+; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute \
+; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: -pass-remarks-with-hotness < %s 2>&1 | FileCheck %s --check-prefix=HOTNESS
+; RUN: opt -passes='require<aa>,loop-distribute' -S -pass-remarks-missed=loop-distribute \
+; RUN: < %s 2>&1 | FileCheck %s --check-prefix=NO_HOTNESS
+
+; This is the input program:
+;
+; 1 void forced (char *A, char *B, char *C, int N) {
+; 2 #pragma clang loop distribute(enable)
+; 3 for(int i = 0; i < N; i++) {
+; 4 A[i] = B[i] * C[i];
+; 5 }
+; 6 }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info (hotness: 300)
+; NO_HOTNESS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info{{$}}
+
+define void @forced(i8* %A, i8* %B, i8* %C, i32 %N) !dbg !7 !prof !22 {
+entry:
+ %cmp12 = icmp sgt i32 %N, 0, !dbg !9
+ br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !10, !prof !23
+
+ph:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
+ %arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !12
+ %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
+ %arrayidx2 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !16
+ %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
+ %mul = mul i8 %1, %0, !dbg !17
+ %arrayidx6 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !18
+ store i8 %mul, i8* %arrayidx6, align 1, !dbg !19, !tbaa !13
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !20, !prof !24
+
+for.cond.cleanup:
+ ret void, !dbg !11
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 267633) (llvm/trunk 267675)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
+!1 = !DIFile(filename: "/tmp/t.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 2}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = distinct !DISubprogram(name: "forced", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 3, column: 20, scope: !7)
+!10 = !DILocation(line: 3, column: 3, scope: !7)
+!11 = !DILocation(line: 6, column: 1, scope: !7)
+!12 = !DILocation(line: 4, column: 12, scope: !7)
+!13 = !{!14, !14, i64 0}
+!14 = !{!"omnipotent char", !15, i64 0}
+!15 = !{!"Simple C/C++ TBAA"}
+!16 = !DILocation(line: 4, column: 19, scope: !7)
+!17 = !DILocation(line: 4, column: 17, scope: !7)
+!18 = !DILocation(line: 4, column: 5, scope: !7)
+!19 = !DILocation(line: 4, column: 10, scope: !7)
+!20 = distinct !{!20, !21}
+!21 = !{!"llvm.loop.distribute.enable", i1 true}
+!22 = !{!"function_entry_count", i64 3}
+!23 = !{!"branch_weights", i32 99, i32 1}
+!24 = !{!"branch_weights", i32 1, i32 99}
diff --git a/test/Transforms/LoopDistribute/diagnostics.ll b/test/Transforms/LoopDistribute/diagnostics.ll
new file mode 100644
index 000000000000..9c0d915dfaf0
--- /dev/null
+++ b/test/Transforms/LoopDistribute/diagnostics.ll
@@ -0,0 +1,176 @@
+; RUN: opt -loop-distribute -S < %s 2>&1 \
+; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=NO_REMARKS
+; RUN: opt -loop-distribute -S -pass-remarks-missed=loop-distribute < %s 2>&1 \
+; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=MISSED_REMARKS
+; RUN: opt -loop-distribute -S -pass-remarks-analysis=loop-distribute < %s 2>&1 \
+; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=ANALYSIS_REMARKS
+; RUN: opt -loop-distribute -S -pass-remarks=loop-distribute < %s 2>&1 \
+; RUN: | FileCheck %s --check-prefix=ALWAYS --check-prefix=REMARKS
+
+; This is the input program:
+;
+; 1 void forced (char *A, char *B, char *C, int N) {
+; 2 #pragma clang loop distribute(enable)
+; 3 for(int i = 0; i < N; i++) {
+; 4 A[i] = B[i] * C[i];
+; 5 }
+; 6 }
+; 7
+; 8 void not_forced (char *A, char *B, char *C, int N) {
+; 9 for(int i = 0; i < N; i++) {
+; 10 A[i] = B[i] * C[i];
+; 11 }
+; 12 }
+; 13
+; 14 void success (char *A, char *B, char *C, char *D, char *E, int N) {
+; 15 for(int i = 0; i < N; i++) {
+; 16 A[i + 1] = A[i] + B[i];
+; 17 C[i] = D[i] * E[i];
+; 18 }
+; 19 }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; MISSED_REMARKS: remark: /tmp/t.c:3:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info
+; ALWAYS: remark: /tmp/t.c:3:3: loop not distributed: memory operations are safe for vectorization
+; ALWAYS: warning: /tmp/t.c:3:3: loop not distributed: failed explicitly specified loop distribution
+
+define void @forced(i8* %A, i8* %B, i8* %C, i32 %N) !dbg !7 {
+entry:
+ %cmp12 = icmp sgt i32 %N, 0, !dbg !9
+ br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !10
+
+ph:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
+ %arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !12
+ %0 = load i8, i8* %arrayidx, align 1, !dbg !12, !tbaa !13
+ %arrayidx2 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !16
+ %1 = load i8, i8* %arrayidx2, align 1, !dbg !16, !tbaa !13
+ %mul = mul i8 %1, %0, !dbg !17
+ %arrayidx6 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !18
+ store i8 %mul, i8* %arrayidx6, align 1, !dbg !19, !tbaa !13
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !10
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !10
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !10
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !10, !llvm.loop !20
+
+for.cond.cleanup:
+ ret void, !dbg !11
+}
+
+; NO_REMARKS-NOT: remark: /tmp/t.c:9:3: loop not distributed: memory operations are safe for vectorization
+; MISSED_REMARKS: remark: /tmp/t.c:9:3: loop not distributed: use -Rpass-analysis=loop-distribute for more info
+; ANALYSIS_REMARKS: remark: /tmp/t.c:9:3: loop not distributed: memory operations are safe for vectorization
+; ALWAYS-NOT: warning: /tmp/t.c:9:3: loop not distributed: failed explicitly specified loop distribution
+
+define void @not_forced(i8* %A, i8* %B, i8* %C, i32 %N) !dbg !22 {
+entry:
+ %cmp12 = icmp sgt i32 %N, 0, !dbg !23
+ br i1 %cmp12, label %ph, label %for.cond.cleanup, !dbg !24
+
+ph:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
+ %arrayidx = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !26
+ %0 = load i8, i8* %arrayidx, align 1, !dbg !26, !tbaa !13
+ %arrayidx2 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !27
+ %1 = load i8, i8* %arrayidx2, align 1, !dbg !27, !tbaa !13
+ %mul = mul i8 %1, %0, !dbg !28
+ %arrayidx6 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !29
+ store i8 %mul, i8* %arrayidx6, align 1, !dbg !30, !tbaa !13
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !24
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !24
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !24
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !24
+
+for.cond.cleanup:
+ ret void, !dbg !25
+}
+
+; REMARKS: remark: /tmp/t.c:15:3: distributed loop
+
+define void @success(i8* %A, i8* %B, i8* %C, i8* %D, i8* %E, i32 %N) !dbg !31 {
+entry:
+ %cmp28 = icmp sgt i32 %N, 0, !dbg !32
+ br i1 %cmp28, label %ph, label %for.cond.cleanup, !dbg !33
+
+ph:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %ph ]
+ %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !35
+ %0 = load i8, i8* %arrayidx, align 1, !dbg !35, !tbaa !13
+ %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !36
+ %1 = load i8, i8* %arrayidx2, align 1, !dbg !36, !tbaa !13
+ %add = add i8 %1, %0, !dbg !37
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !33
+ %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !38
+ store i8 %add, i8* %arrayidx7, align 1, !dbg !39, !tbaa !13
+ %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !40
+ %2 = load i8, i8* %arrayidx9, align 1, !dbg !40, !tbaa !13
+ %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !41
+ %3 = load i8, i8* %arrayidx12, align 1, !dbg !41, !tbaa !13
+ %mul = mul i8 %3, %2, !dbg !42
+ %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !43
+ store i8 %mul, i8* %arrayidx16, align 1, !dbg !44, !tbaa !13
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !33
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !33
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !33
+
+for.cond.cleanup:
+ ret void, !dbg !34
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 (trunk 267633) (llvm/trunk 267675)", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
+!1 = !DIFile(filename: "/tmp/t.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 2}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!7 = distinct !DISubprogram(name: "forced", scope: !1, file: !1, line: 1, type: !8, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!8 = !DISubroutineType(types: !2)
+!9 = !DILocation(line: 3, column: 20, scope: !7)
+!10 = !DILocation(line: 3, column: 3, scope: !7)
+!11 = !DILocation(line: 6, column: 1, scope: !7)
+!12 = !DILocation(line: 4, column: 12, scope: !7)
+!13 = !{!14, !14, i64 0}
+!14 = !{!"omnipotent char", !15, i64 0}
+!15 = !{!"Simple C/C++ TBAA"}
+!16 = !DILocation(line: 4, column: 19, scope: !7)
+!17 = !DILocation(line: 4, column: 17, scope: !7)
+!18 = !DILocation(line: 4, column: 5, scope: !7)
+!19 = !DILocation(line: 4, column: 10, scope: !7)
+!20 = distinct !{!20, !21}
+!21 = !{!"llvm.loop.distribute.enable", i1 true}
+!22 = distinct !DISubprogram(name: "not_forced", scope: !1, file: !1, line: 8, type: !8, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!23 = !DILocation(line: 9, column: 20, scope: !22)
+!24 = !DILocation(line: 9, column: 3, scope: !22)
+!25 = !DILocation(line: 12, column: 1, scope: !22)
+!26 = !DILocation(line: 10, column: 12, scope: !22)
+!27 = !DILocation(line: 10, column: 19, scope: !22)
+!28 = !DILocation(line: 10, column: 17, scope: !22)
+!29 = !DILocation(line: 10, column: 5, scope: !22)
+!30 = !DILocation(line: 10, column: 10, scope: !22)
+!31 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 14, type: !8, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!32 = !DILocation(line: 15, column: 20, scope: !31)
+!33 = !DILocation(line: 15, column: 3, scope: !31)
+!34 = !DILocation(line: 19, column: 1, scope: !31)
+!35 = !DILocation(line: 16, column: 16, scope: !31)
+!36 = !DILocation(line: 16, column: 23, scope: !31)
+!37 = !DILocation(line: 16, column: 21, scope: !31)
+!38 = !DILocation(line: 16, column: 5, scope: !31)
+!39 = !DILocation(line: 16, column: 14, scope: !31)
+!40 = !DILocation(line: 17, column: 12, scope: !31)
+!41 = !DILocation(line: 17, column: 19, scope: !31)
+!42 = !DILocation(line: 17, column: 17, scope: !31)
+!43 = !DILocation(line: 17, column: 5, scope: !31)
+!44 = !DILocation(line: 17, column: 10, scope: !31)
diff --git a/test/Transforms/LoopDistribute/metadata.ll b/test/Transforms/LoopDistribute/metadata.ll
new file mode 100644
index 000000000000..6c99340f7d0b
--- /dev/null
+++ b/test/Transforms/LoopDistribute/metadata.ll
@@ -0,0 +1,149 @@
+; RUN: opt -basicaa -loop-distribute -enable-loop-distribute=0 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=EXPLICIT --check-prefix=DEFAULT_OFF
+; RUN: opt -basicaa -loop-distribute -enable-loop-distribute=1 -S < %s | FileCheck %s --check-prefix=CHECK --check-prefix=EXPLICIT --check-prefix=DEFAULT_ON
+
+; Same loop as in basic.ll. Check that distribution is enabled/disabled
+; properly according to -enable-loop-distribute=0/1 and the
+; llvm.loop.distribute.enable metadata.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; CHECK-LABEL: @explicit_on(
+define void @explicit_on(i32* noalias %a,
+ i32* noalias %b,
+ i32* noalias %c,
+ i32* noalias %d,
+ i32* noalias %e) {
+entry:
+ br label %for.body
+
+; EXPLICIT: for.body.ldist1:
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+ %loadE = load i32, i32* %arrayidxE, align 4
+
+ %mulC = mul i32 %loadD, %loadE
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; CHECK-LABEL: @explicit_off(
+define void @explicit_off(i32* noalias %a,
+ i32* noalias %b,
+ i32* noalias %c,
+ i32* noalias %d,
+ i32* noalias %e) {
+entry:
+ br label %for.body
+
+; EXPLICIT-NOT: for.body.ldist1:
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+ %loadE = load i32, i32* %arrayidxE, align 4
+
+ %mulC = mul i32 %loadD, %loadE
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !2
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; CHECK-LABEL: @default_distribute(
+define void @default_distribute(i32* noalias %a,
+ i32* noalias %b,
+ i32* noalias %c,
+ i32* noalias %d,
+ i32* noalias %e) {
+entry:
+ br label %for.body
+
+; Verify the two distributed loops.
+
+; DEFAULT_ON: for.body.ldist1:
+; DEFAULT_OFF-NOT: for.body.ldist1:
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+ %loadE = load i32, i32* %arrayidxE, align 4
+
+ %mulC = mul i32 %loadD, %loadE
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.distribute.enable", i1 true}
+!2 = distinct !{!2, !3}
+!3 = !{!"llvm.loop.distribute.enable", i1 false}
diff --git a/test/Transforms/LoopDistribute/pr28443.ll b/test/Transforms/LoopDistribute/pr28443.ll
new file mode 100644
index 000000000000..0b8839c84770
--- /dev/null
+++ b/test/Transforms/LoopDistribute/pr28443.ll
@@ -0,0 +1,36 @@
+; RUN: opt -basicaa -loop-distribute -verify-loop-info -verify-dom-info -S \
+; RUN: < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fn1(i64 %a, i64* %b) {
+entry:
+ br label %for.body
+
+for.body:
+ %add75.epil = phi i64 [ %add7.epil, %for.body ], [ %a, %entry ]
+ %add1.epil = add nsw i64 %add75.epil, 268435457
+ %arrayidx.epil = getelementptr inbounds i64, i64* %b, i64 %add1.epil
+ %load = load i64, i64* %arrayidx.epil, align 8
+ %add5.epil = add nsw i64 %add75.epil, 805306369
+ %arrayidx6.epil = getelementptr inbounds i64, i64* %b, i64 %add5.epil
+ store i64 %load, i64* %arrayidx6.epil, align 8
+ %add7.epil = add nsw i64 %add75.epil, 2
+ %epil.iter.cmp = icmp eq i64 %add7.epil, 0
+ br i1 %epil.iter.cmp, label %for.end, label %for.body
+
+ ; CHECK: %[[phi:.*]] = phi i64
+ ; CHECK: %[[add1:.*]] = add nsw i64 %[[phi]], 268435457
+ ; CHECK: %[[gep1:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add1]]
+ ; CHECK: %[[load:.*]] = load i64, i64* %[[gep1]], align 8
+ ; CHECK: %[[add2:.*]] = add nsw i64 %[[phi]], 805306369
+ ; CHECK: %[[gep2:.*]] = getelementptr inbounds i64, i64* %b, i64 %[[add2]]
+ ; CHECK: store i64 %[[load]], i64* %[[gep2]], align 8
+ ; CHECK: %[[incr:.*]] = add nsw i64 %[[phi]], 2
+ ; CHECK: %[[cmp:.*]] = icmp eq i64 %[[incr]], 0
+ ; CHECK: br i1 %[[cmp]]
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopDistribute/symbolic-stride.ll b/test/Transforms/LoopDistribute/symbolic-stride.ll
new file mode 100644
index 000000000000..73d3d19c5dd4
--- /dev/null
+++ b/test/Transforms/LoopDistribute/symbolic-stride.ll
@@ -0,0 +1,65 @@
+; RUN: opt -basicaa -loop-distribute -S < %s | \
+; RUN: FileCheck %s --check-prefix=ALL --check-prefix=STRIDE_SPEC
+
+; RUN: opt -basicaa -loop-distribute -S -enable-mem-access-versioning=0 < %s | \
+; RUN: FileCheck %s --check-prefix=ALL --check-prefix=NO_STRIDE_SPEC
+
+; If we don't speculate stride for 1 we can't distribute along the line
+; because we could have a backward dependence:
+;
+; for (i = 0; i < n; i++) {
+; A[i + 1] = A[i] * B[i];
+; =======================
+; C[i] = D[i] * A[stride * i];
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.10.0"
+
+; ALL-LABEL: @f(
+define void @f(i32* noalias %a,
+ i32* noalias %b,
+ i32* noalias %c,
+ i32* noalias %d,
+ i64 %stride) {
+entry:
+ br label %for.body
+
+; STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
+; STRIDE_SPEC: for.body.ldist1:
+; NO_STRIDE_SPEC-NOT: for.body.ldist1:
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %mul = mul i64 %ind, %stride
+ %arrayidxStridedA = getelementptr inbounds i32, i32* %a, i64 %mul
+ %loadStridedA = load i32, i32* %arrayidxStridedA, align 4
+
+ %mulC = mul i32 %loadD, %loadStridedA
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
index e4301bbb06d3..e594c79a3e17 100644
--- a/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
+++ b/test/Transforms/LoopIdiom/AMDGPU/popcnt.ll
@@ -1,4 +1,4 @@
-; RUN: opt -loop-idiom -mtriple=r600-- -mcpu=SI -S < %s | FileCheck %s
+; RUN: opt -loop-idiom -mtriple=amdgcn-- -S < %s | FileCheck %s
; Mostly copied from x86 version.
@@ -59,6 +59,29 @@ while.end: ; preds = %while.body, %entry
ret i32 %c.0.lcssa
}
+; CHECK-LABEL: @popcount_i128
+; CHECK: entry
+; CHECK: llvm.ctpop.i128
+; CHECK: ret
+define i32 @popcount_i128(i128 %a) nounwind uwtable readnone ssp {
+entry:
+ %tobool3 = icmp eq i128 %a, 0
+ br i1 %tobool3, label %while.end, label %while.body
+
+while.body: ; preds = %entry, %while.body
+ %c.05 = phi i32 [ %inc, %while.body ], [ 0, %entry ]
+ %a.addr.04 = phi i128 [ %and, %while.body ], [ %a, %entry ]
+ %inc = add nsw i32 %c.05, 1
+ %sub = add i128 %a.addr.04, -1
+ %and = and i128 %sub, %a.addr.04
+ %tobool = icmp eq i128 %and, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body, %entry
+ %c.0.lcssa = phi i32 [ 0, %entry ], [ %inc, %while.body ]
+ ret i32 %c.0.lcssa
+}
+
; To recognize this pattern:
;int popcount(unsigned long long a, int mydata1, int mydata2) {
; int c = 0;
diff --git a/test/Transforms/LoopIdiom/basic.ll b/test/Transforms/LoopIdiom/basic.ll
index 27a955175b59..4d584de9c6f7 100644
--- a/test/Transforms/LoopIdiom/basic.ll
+++ b/test/Transforms/LoopIdiom/basic.ll
@@ -531,3 +531,40 @@ for.cond.cleanup: ; preds = %for.body
; CHECK: call void @llvm.memcpy
; CHECK: ret
}
+
+; Two dimensional nested loop with negative stride should be promoted to one big memset.
+define void @test19(i8* nocapture %X) {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc4
+ %i.06 = phi i32 [ 99, %entry ], [ %dec5, %for.inc4 ]
+ %mul = mul nsw i32 %i.06, 100
+ br label %for.body3
+
+for.body3: ; preds = %for.cond1.preheader, %for.body3
+ %j.05 = phi i32 [ 99, %for.cond1.preheader ], [ %dec, %for.body3 ]
+ %add = add nsw i32 %j.05, %mul
+ %idxprom = sext i32 %add to i64
+ %arrayidx = getelementptr inbounds i8, i8* %X, i64 %idxprom
+ store i8 0, i8* %arrayidx, align 1
+ %dec = add nsw i32 %j.05, -1
+ %cmp2 = icmp sgt i32 %j.05, 0
+ br i1 %cmp2, label %for.body3, label %for.inc4
+
+for.inc4: ; preds = %for.body3
+ %dec5 = add nsw i32 %i.06, -1
+ %cmp = icmp sgt i32 %i.06, 0
+ br i1 %cmp, label %for.cond1.preheader, label %for.end6
+
+for.end6: ; preds = %for.inc4
+ ret void
+; CHECK-LABEL: @test19(
+; CHECK: entry:
+; CHECK-NEXT: call void @llvm.memset.p0i8.i64(i8* %X, i8 0, i64 10000, i32 1, i1 false)
+; CHECK: ret void
+}
+
+; Validate that "memset_pattern" has the proper attributes.
+; CHECK: declare void @memset_pattern16(i8* nocapture, i8* nocapture readonly, i64) [[ATTRS:#[0-9]+]]
+; CHECK: [[ATTRS]] = { argmemonly }
diff --git a/test/Transforms/LoopIdiom/debug-line.ll b/test/Transforms/LoopIdiom/debug-line.ll
index a85e48997548..a6a4af4e8d4e 100644
--- a/test/Transforms/LoopIdiom/debug-line.ll
+++ b/test/Transforms/LoopIdiom/debug-line.ll
@@ -28,11 +28,11 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
!llvm.module.flags = !{!19}
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
-!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !18, scope: !1, type: !3)
!1 = !DIFile(filename: "li.c", directory: "/private/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: 0, file: !18, enums: !9, retainedTypes: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 127165:127174)", isOptimized: true, emissionKind: FullDebug, file: !18, enums: !9, retainedTypes: !9)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocalVariable(name: "a", line: 2, arg: 1, scope: !0, file: !1, type: !6)
diff --git a/test/Transforms/LoopIdiom/nontemporal_store.ll b/test/Transforms/LoopIdiom/nontemporal_store.ll
new file mode 100644
index 000000000000..a5f8c7c451c7
--- /dev/null
+++ b/test/Transforms/LoopIdiom/nontemporal_store.ll
@@ -0,0 +1,32 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='require<aa>,require<targetir>,require<scalar-evolution>,loop(loop-idiom)' < %s -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.bigBlock_t = type { [256 x <4 x float>] }
+
+; CHECK-LABEL: @test(
+; CHECK-NOT: llvm.memset
+define void @test(%struct.bigBlock_t* %p) {
+entry:
+ %0 = getelementptr inbounds %struct.bigBlock_t, %struct.bigBlock_t* %p, i64 0, i32 0, i64 0, i64 0
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %index.02 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %dst.01 = phi float* [ %0, %entry ], [ %add.ptr2, %for.body ]
+ %cast.i5 = bitcast float* %dst.01 to <4 x float>*
+ store <4 x float> zeroinitializer, <4 x float>* %cast.i5, align 16, !nontemporal !0
+ %add.ptr1 = getelementptr inbounds float, float* %dst.01, i64 4
+ %cast.i = bitcast float* %add.ptr1 to <4 x float>*
+ store <4 x float> zeroinitializer, <4 x float>* %cast.i, align 16, !nontemporal !0
+ %add.ptr2 = getelementptr inbounds float, float* %dst.01, i64 8
+ %add = add nuw nsw i32 %index.02, 32
+ %cmp = icmp ult i32 %add, 4096
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+!0 = !{i32 1}
diff --git a/test/Transforms/LoopIdiom/pr28196.ll b/test/Transforms/LoopIdiom/pr28196.ll
new file mode 100644
index 000000000000..10f49fbcd09f
--- /dev/null
+++ b/test/Transforms/LoopIdiom/pr28196.ll
@@ -0,0 +1,26 @@
+; RUN: opt -loop-idiom -S < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+ br label %for.body.preheader
+
+for.body.preheader: ; preds = %for.cond
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %add.ptr3 = getelementptr inbounds i32, i32* null, i32 %indvars.iv
+ %add.ptr4 = getelementptr inbounds i32, i32* %add.ptr3, i32 1
+ %0 = load i32, i32* %add.ptr4, align 4
+ store i32 %0, i32* %add.ptr3, align 4
+ %indvars.iv.next = add nsw i32 %indvars.iv, 1
+ %exitcond = icmp ne i32 %indvars.iv.next, 6
+ br i1 %exitcond, label %for.body, label %for.body.preheader
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: call void @llvm.memcpy.p0i8.p0i8.i64(i8* null, i8* inttoptr (i64 4 to i8*), i64 24, i32 4, i1 false)
+; CHECK-NOT: store
diff --git a/test/Transforms/LoopIdiom/struct.ll b/test/Transforms/LoopIdiom/struct.ll
new file mode 100644
index 000000000000..2828024952e2
--- /dev/null
+++ b/test/Transforms/LoopIdiom/struct.ll
@@ -0,0 +1,221 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.foo = type { i32, i32 }
+%struct.foo1 = type { i32, i32, i32 }
+%struct.foo2 = type { i32, i16, i16 }
+
+;void bar1(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar1(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar2(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].b = 0;
+; f[i].a = 0;
+; }
+;}
+define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar2(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar3(foo_t *f, unsigned n) {
+; for (unsigned i = n; i > 0; --i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %n to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %1 = trunc i64 %indvars.iv to i32
+ %dec = add i32 %1, -1
+ %cmp = icmp eq i32 %dec, 0
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar3(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void bar4(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 1;
+; }
+;}
+define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar4(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar5(foo1_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; }
+;}
+define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
+ store i32 0, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar5(
+; CHECK-NOT: call void @llvm.memset
+}
+
+;void bar6(foo2_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 0;
+; f[i].c = 0;
+; }
+;}
+define void @bar6(%struct.foo2* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 1
+ store i16 0, i16* %b, align 4
+ %c = getelementptr inbounds %struct.foo2, %struct.foo2* %f, i64 %indvars.iv, i32 2
+ store i16 0, i16* %c, align 2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar6(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
diff --git a/test/Transforms/LoopIdiom/struct_pattern.ll b/test/Transforms/LoopIdiom/struct_pattern.ll
new file mode 100644
index 000000000000..d7809b746b15
--- /dev/null
+++ b/test/Transforms/LoopIdiom/struct_pattern.ll
@@ -0,0 +1,186 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK: @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.1 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+; CHECK: @.memset_pattern.2 = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+%struct.foo = type { i32, i32 }
+%struct.foo1 = type { i32, i32, i32 }
+
+;void bar1(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 2;
+; f[i].b = 2;
+; }
+;}
+define void @bar1(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar1(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar2(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].b = 2;
+; f[i].a = 2;
+; }
+;}
+define void @bar2(%struct.foo* %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar2(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar3(foo_t *f, unsigned n) {
+; for (unsigned i = n; i > 0; --i) {
+; f[i].a = 2;
+; f[i].b = 2;
+; }
+;}
+define void @bar3(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %n to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 2, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 2, i32* %b, align 4
+ %1 = trunc i64 %indvars.iv to i32
+ %dec = add i32 %1, -1
+ %cmp = icmp eq i32 %dec, 0
+ %indvars.iv.next = add nsw i64 %indvars.iv, -1
+ br i1 %cmp, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar3(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
+
+;void bar4(foo_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 0;
+; f[i].b = 1;
+; }
+;}
+define void @bar4(%struct.foo* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 0
+ store i32 0, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar4(
+; CHECK-NOT: call void @memset_pattern16
+}
+
+;void bar5(foo1_t *f, unsigned n) {
+; for (unsigned i = 0; i < n; ++i) {
+; f[i].a = 1;
+; f[i].b = 1;
+; }
+;}
+define void @bar5(%struct.foo1* nocapture %f, i32 %n) nounwind ssp {
+entry:
+ %cmp1 = icmp eq i32 %n, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %a = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 0
+ store i32 1, i32* %a, align 4
+ %b = getelementptr inbounds %struct.foo1, %struct.foo1* %f, i64 %indvars.iv, i32 1
+ store i32 1, i32* %b, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp ne i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @bar5(
+; CHECK-NOT: call void @memset_pattern16
+}
diff --git a/test/Transforms/LoopIdiom/unroll.ll b/test/Transforms/LoopIdiom/unroll.ll
new file mode 100644
index 000000000000..0cdfda254d78
--- /dev/null
+++ b/test/Transforms/LoopIdiom/unroll.ll
@@ -0,0 +1,80 @@
+; RUN: opt -basicaa -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+
+; CHECK @.memset_pattern = private unnamed_addr constant [4 x i32] [i32 2, i32 2, i32 2, i32 2], align 16
+
+target triple = "x86_64-apple-darwin10.0.0"
+
+;void test(int *f, unsigned n) {
+; for (unsigned i = 0; i < 2 * n; i += 2) {
+; f[i] = 0;
+; f[i+1] = 0;
+; }
+;}
+define void @test(i32* %f, i32 %n) nounwind ssp {
+entry:
+ %mul = shl i32 %n, 1
+ %cmp1 = icmp eq i32 %mul, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %mul to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 0, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
+ store i32 0, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %0
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @test(
+; CHECK: call void @llvm.memset
+; CHECK-NOT: store
+}
+
+;void test_pattern(int *f, unsigned n) {
+; for (unsigned i = 0; i < 2 * n; i += 2) {
+; f[i] = 2;
+; f[i+1] = 2;
+; }
+;}
+define void @test_pattern(i32* %f, i32 %n) nounwind ssp {
+entry:
+ %mul = shl i32 %n, 1
+ %cmp1 = icmp eq i32 %mul, 0
+ br i1 %cmp1, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ %0 = zext i32 %mul to i64
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %f, i64 %indvars.iv
+ store i32 2, i32* %arrayidx, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %f, i64 %1
+ store i32 2, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %0
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+; CHECK-LABEL: @test_pattern(
+; CHECK: call void @memset_pattern16
+; CHECK-NOT: store
+}
diff --git a/test/Transforms/LoopIdiom/unwind.ll b/test/Transforms/LoopIdiom/unwind.ll
new file mode 100644
index 000000000000..a132cba164bd
--- /dev/null
+++ b/test/Transforms/LoopIdiom/unwind.ll
@@ -0,0 +1,33 @@
+; RUN: opt -loop-idiom < %s -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+declare void @ff()
+
+define void @test(i8* noalias nocapture %base, i64 %size) #1 {
+entry:
+ %cmp3 = icmp eq i64 %size, 0
+ br i1 %cmp3, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+; CHECK-LABEL: @test(
+; CHECK-NOT: llvm.memset
+ %indvars.iv = phi i64 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ tail call void @ff()
+ %arrayidx = getelementptr inbounds i8, i8* %base, i64 %indvars.iv
+ store i8 0, i8* %arrayidx, align 1
+ %indvars.iv.next = add i64 %indvars.iv, 1
+ %exitcond = icmp ne i64 %indvars.iv.next, %size
+ br i1 %exitcond, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+attributes #1 = { uwtable }
diff --git a/test/Transforms/LoopLoadElim/cond-load.ll b/test/Transforms/LoopLoadElim/cond-load.ll
new file mode 100644
index 000000000000..e337397e9969
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/cond-load.ll
@@ -0,0 +1,42 @@
+; RUN: opt -S -loop-load-elim < %s | FileCheck %s
+
+; We can't hoist conditional loads to the preheader for the initial value.
+; E.g. in the loop below we'd access array[-1] if we did:
+;
+; for(int i = 0 ; i < n ; i++ )
+; array[i] = ( i > 0 ? array[i - 1] : 0 ) + 4;
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define void @f(i32* %array, i32 %n) {
+entry:
+ %cmp10 = icmp sgt i32 %n, 0
+ br i1 %cmp10, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %cond.end, %entry
+ ret void
+
+for.body: ; preds = %entry, %cond.end
+ %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ]
+; CHECK-NOT: %store_forwarded = phi
+ %cmp1 = icmp sgt i64 %indvars.iv, 0
+ br i1 %cmp1, label %cond.true, label %cond.end
+
+cond.true: ; preds = %for.body
+ %0 = add nsw i64 %indvars.iv, -1
+ %arrayidx = getelementptr inbounds i32, i32* %array, i64 %0
+ %1 = load i32, i32* %arrayidx, align 4
+ br label %cond.end
+
+cond.end: ; preds = %for.body, %cond.true
+ %cond = phi i32 [ %1, %cond.true ], [ 0, %for.body ]
+; CHECK: %cond = phi i32 [ %1, %cond.true ], [ 0, %for.body ]
+ %add = add nsw i32 %cond, 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %array, i64 %indvars.iv
+ store i32 %add, i32* %arrayidx3, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopLoadElim/forward.ll b/test/Transforms/LoopLoadElim/forward.ll
index c2b1816530c1..ed0d162ab7e3 100644
--- a/test/Transforms/LoopLoadElim/forward.ll
+++ b/test/Transforms/LoopLoadElim/forward.ll
@@ -1,6 +1,6 @@
; RUN: opt -loop-load-elim -S < %s | FileCheck %s
-; Simple st->ld forwarding derived from a lexical forwrad dep.
+; Simple st->ld forwarding derived from a lexical forward dep.
;
; for (unsigned i = 0; i < 100; i++) {
; A[i+1] = B[i] + 2;
diff --git a/test/Transforms/LoopLoadElim/loop-simplify-dep.ll b/test/Transforms/LoopLoadElim/loop-simplify-dep.ll
new file mode 100644
index 000000000000..f6bfe96d9c98
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/loop-simplify-dep.ll
@@ -0,0 +1,33 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Make sure we create a preheader if we dont' have one.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N, i1 %C) {
+entry:
+ br i1 %C, label %for.body, label %for.end
+
+; CHECK: for.body.preheader:
+; CHECK-NEXT: %load_initial = load i32, i32* %A
+; CHECK-NEXT: br label %for.body
+
+; CHECK: for.body:
+for.body:
+; CHECK-NEXT: %store_forwarded = phi i32 [ %load_initial, %for.body.preheader ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %load = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %load_1 = load i32, i32* %arrayidx2, align 4
+; CHECK: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/non-consecutive.ll b/test/Transforms/LoopLoadElim/non-consecutive.ll
new file mode 100644
index 000000000000..43751a8ff60d
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/non-consecutive.ll
@@ -0,0 +1,43 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; The accesses to A are independent here but LAA reports it as a loop-carried
+; forward dependence. Check that we don't perform st->ld forwarding between
+; them.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i][1] = B[i] + 2;
+; C[i] = A[i][0] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f([2 x i32]* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %A1idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 1
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %A0idx = getelementptr inbounds [2 x i32], [2 x i32]* %A, i64 %indvars.iv, i32 0
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %A1idx, align 4
+
+; CHECK: %a = load i32, i32* %A0idx, align 4
+ %a = load i32, i32* %A0idx, align 4
+; CHECK: %c = mul i32 %a, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/opt-size.ll b/test/Transforms/LoopLoadElim/opt-size.ll
new file mode 100644
index 000000000000..a6322d874f32
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/opt-size.ll
@@ -0,0 +1,76 @@
+; RUN: opt -basicaa -loop-load-elim -S < %s | FileCheck %s
+
+; When optimizing for size don't eliminate in this loop because the loop would
+; have to be versioned first because A and C may alias.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; C[i] = A[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @f(
+define void @f(i32* %A, i32* %B, i32* %C, i64 %N) optsize {
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %a, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; Same loop but with noalias on %A and %C. In this case load-eliminate even
+; with -Os.
+
+; CHECK-LABEL: @g(
+define void @g(i32* noalias %A, i32* %B, i32* noalias %C, i64 %N) optsize {
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+ %a = load i32, i32* %Aidx, align 4
+; CHECK: %c = mul i32 %store_forwarded, 2
+ %c = mul i32 %a, 2
+ store i32 %c, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/symbolic-stride.ll b/test/Transforms/LoopLoadElim/symbolic-stride.ll
new file mode 100644
index 000000000000..7a2d1b6c7e3c
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/symbolic-stride.ll
@@ -0,0 +1,92 @@
+; RUN: opt -loop-load-elim -S < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN: -check-prefix=TWO_STRIDE_SPEC
+
+; RUN: opt -loop-load-elim -S -enable-mem-access-versioning=0 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=NO_ONE_STRIDE_SPEC \
+; RUN: -check-prefix=NO_TWO_STRIDE_SPEC
+
+; RUN: opt -loop-load-elim -S -loop-load-elimination-scev-check-threshold=1 < %s | \
+; RUN: FileCheck %s -check-prefix=ALL -check-prefix=ONE_STRIDE_SPEC \
+; RUN: -check-prefix=NO_TWO_STRIDE_SPEC
+
+; Forwarding in the presence of symbolic strides:
+;
+; for (unsigned i = 0; i < 100; i++)
+; A[i + 1] = A[Stride * i] + B[i];
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; ALL-LABEL: @f(
+define void @f(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
+ i64 %stride) {
+
+; ONE_STRIDE_SPEC: %ident.check = icmp ne i64 %stride, 1
+
+entry:
+; NO_ONE_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; ONE_STRIDE_SPEC: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; NO_ONE_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; ONE_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %mul = mul i64 %indvars.iv, %stride
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
+ %load = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %load_1 = load i32, i32* %arrayidx2, align 4
+; NO_ONE_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; ONE_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; With two symbolic strides:
+;
+; for (unsigned i = 0; i < 100; i++)
+; A[Stride2 * (i + 1)] = A[Stride1 * i] + B[i];
+
+; ALL-LABEL: @two_strides(
+define void @two_strides(i32* noalias nocapture %A, i32* noalias nocapture readonly %B, i64 %N,
+ i64 %stride.1, i64 %stride.2) {
+
+; TWO_STRIDE_SPEC: %ident.check = icmp ne i64 %stride.2, 1
+; TWO_STRIDE_SPEC: %ident.check1 = icmp ne i64 %stride.1, 1
+; NO_TWO_STRIDE_SPEC-NOT: %ident.check{{.*}} = icmp ne i64 %stride{{.*}}, 1
+
+entry:
+; NO_TWO_STRIDE_SPEC-NOT: %load_initial = load i32, i32* %A
+; TWO_STRIDE_SPEC: %load_initial = load i32, i32* %A
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+; NO_TWO_STRIDE_SPEC-NOT: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+; TWO_STRIDE_SPEC: %store_forwarded = phi i32 [ %load_initial, {{.*}} ], [ %add, %for.body ]
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %mul = mul i64 %indvars.iv, %stride.1
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %mul
+ %load = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %load_1 = load i32, i32* %arrayidx2, align 4
+; NO_TWO_STRIDE_SPEC-NOT: %add = add i32 %load_1, %store_forwarded
+; TWO_STRIDE_SPEC: %add = add i32 %load_1, %store_forwarded
+ %add = add i32 %load_1, %load
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %mul.2 = mul i64 %indvars.iv.next, %stride.2
+ %arrayidx_next = getelementptr inbounds i32, i32* %A, i64 %mul.2
+ store i32 %add, i32* %arrayidx_next, align 4
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopLoadElim/type-mismatch.ll b/test/Transforms/LoopLoadElim/type-mismatch.ll
new file mode 100644
index 000000000000..ab8029bd35fb
--- /dev/null
+++ b/test/Transforms/LoopLoadElim/type-mismatch.ll
@@ -0,0 +1,89 @@
+; RUN: opt -loop-load-elim -S < %s | FileCheck %s
+
+; Don't crash if the store and the load use different types.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; C[i] = ((float*)A)[i] * 2;
+; }
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; CHECK-LABEL: @f(
+define void @f(i32* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %Aidx.float = bitcast i32* %Aidx to float*
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p1 = add i32 %b, 2
+ store i32 %a_p1, i32* %Aidx_next, align 4
+
+; CHECK: %a = load float, float* %Aidx.float, align 4
+ %a = load float, float* %Aidx.float, align 4
+; CHECK-NEXT: %c = fmul float %a, 2.0
+ %c = fmul float %a, 2.0
+ %c.int = fptosi float %c to i32
+ store i32 %c.int, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; Don't crash if the store and the load use different types.
+;
+; for (unsigned i = 0; i < 100; i++) {
+; A[i+1] = B[i] + 2;
+; A[i+1] = B[i] + 3;
+; C[i] = ((float*)A)[i] * 2;
+; }
+
+; CHECK-LABEL: @f2(
+define void @f2(i32* noalias %A, i32* noalias %B, i32* noalias %C, i64 %N) {
+
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+
+ %Aidx_next = getelementptr inbounds i32, i32* %A, i64 %indvars.iv.next
+ %Bidx = getelementptr inbounds i32, i32* %B, i64 %indvars.iv
+ %Cidx = getelementptr inbounds i32, i32* %C, i64 %indvars.iv
+ %Aidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %Aidx.float = bitcast i32* %Aidx to float*
+
+ %b = load i32, i32* %Bidx, align 4
+ %a_p2 = add i32 %b, 2
+ store i32 %a_p2, i32* %Aidx_next, align 4
+
+ %a_p3 = add i32 %b, 3
+ store i32 %a_p3, i32* %Aidx_next, align 4
+
+; CHECK: %a = load float, float* %Aidx.float, align 4
+ %a = load float, float* %Aidx.float, align 4
+; CHECK-NEXT: %c = fmul float %a, 2.0
+ %c = fmul float %a, 2.0
+ %c.int = fptosi float %c to i32
+ store i32 %c.int, i32* %Cidx, align 4
+
+ %exitcond = icmp eq i64 %indvars.iv.next, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopReroll/basic.ll b/test/Transforms/LoopReroll/basic.ll
index 16a6dc81af79..ce2ab2f11aa0 100644
--- a/test/Transforms/LoopReroll/basic.ll
+++ b/test/Transforms/LoopReroll/basic.ll
@@ -24,7 +24,7 @@ for.body: ; preds = %for.body, %entry
%add2 = add nsw i32 %i.08, 2
%call3 = tail call i32 @foo(i32 %add2) #1
%add3 = add nsw i32 %i.08, 3
- %exitcond = icmp eq i32 %add3, 500
+ %exitcond = icmp sge i32 %add3, 500
br i1 %exitcond, label %for.end, label %for.body
; CHECK-LABEL: @bar
@@ -33,7 +33,7 @@ for.body: ; preds = %for.body, %entry
; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %entry ]
; CHECK: %call = tail call i32 @foo(i32 %indvar) #1
; CHECK: %indvar.next = add i32 %indvar, 1
-; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
; CHECK: br i1 %exitcond1, label %for.end, label %for.body
; CHECK: ret
@@ -524,7 +524,7 @@ for.body: ; preds = %for.body, %entry
%add3 = add nsw i32 %i.08, 3
- %exitcond = icmp eq i32 %add3, 500
+ %exitcond = icmp sge i32 %add3, 500
br i1 %exitcond, label %for.end, label %for.body
; CHECK-LABEL: @bar2
@@ -536,7 +536,7 @@ for.body: ; preds = %for.body, %entry
; CHECK: %tmp3 = add i32 %tmp2, %tmp1
; CHECK: %call = tail call i32 @foo(i32 %tmp3) #1
; CHECK: %indvar.next = add i32 %indvar, 1
-; CHECK: %exitcond1 = icmp eq i32 %indvar, 497
+; CHECK: %exitcond1 = icmp eq i32 %indvar, 500
; CHECK: br i1 %exitcond1, label %for.end, label %for.body
; CHECK: ret
diff --git a/test/Transforms/LoopReroll/basic32iters.ll b/test/Transforms/LoopReroll/basic32iters.ll
new file mode 100644
index 000000000000..758fd8a0a790
--- /dev/null
+++ b/test/Transforms/LoopReroll/basic32iters.ll
@@ -0,0 +1,328 @@
+; RUN: opt < %s -loop-reroll -verify-scev -S | FileCheck %s
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; void goo32(float alpha, float *a, float *b) {
+; for (int i = 0; i < 3200; i += 32) {
+; a[i] += alpha * b[i];
+; a[i + 1] += alpha * b[i + 1];
+; a[i + 2] += alpha * b[i + 2];
+; a[i + 3] += alpha * b[i + 3];
+; a[i + 4] += alpha * b[i + 4];
+; a[i + 5] += alpha * b[i + 5];
+; a[i + 6] += alpha * b[i + 6];
+; a[i + 7] += alpha * b[i + 7];
+; a[i + 8] += alpha * b[i + 8];
+; a[i + 9] += alpha * b[i + 9];
+; a[i + 10] += alpha * b[i + 10];
+; a[i + 11] += alpha * b[i + 11];
+; a[i + 12] += alpha * b[i + 12];
+; a[i + 13] += alpha * b[i + 13];
+; a[i + 14] += alpha * b[i + 14];
+; a[i + 15] += alpha * b[i + 15];
+; a[i + 16] += alpha * b[i + 16];
+; a[i + 17] += alpha * b[i + 17];
+; a[i + 18] += alpha * b[i + 18];
+; a[i + 19] += alpha * b[i + 19];
+; a[i + 20] += alpha * b[i + 20];
+; a[i + 21] += alpha * b[i + 21];
+; a[i + 22] += alpha * b[i + 22];
+; a[i + 23] += alpha * b[i + 23];
+; a[i + 24] += alpha * b[i + 24];
+; a[i + 25] += alpha * b[i + 25];
+; a[i + 26] += alpha * b[i + 26];
+; a[i + 27] += alpha * b[i + 27];
+; a[i + 28] += alpha * b[i + 28];
+; a[i + 29] += alpha * b[i + 29];
+; a[i + 30] += alpha * b[i + 30];
+; a[i + 31] += alpha * b[i + 31];
+; }
+; }
+
+; Function Attrs: norecurse nounwind uwtable
+define void @goo32(float %alpha, float* %a, float* readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %0 = load float, float* %arrayidx, align 4
+ %mul = fmul float %0, %alpha
+ %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ %1 = load float, float* %arrayidx2, align 4
+ %add = fadd float %1, %mul
+ store float %add, float* %arrayidx2, align 4
+ %2 = or i64 %indvars.iv, 1
+ %arrayidx5 = getelementptr inbounds float, float* %b, i64 %2
+ %3 = load float, float* %arrayidx5, align 4
+ %mul6 = fmul float %3, %alpha
+ %arrayidx9 = getelementptr inbounds float, float* %a, i64 %2
+ %4 = load float, float* %arrayidx9, align 4
+ %add10 = fadd float %4, %mul6
+ store float %add10, float* %arrayidx9, align 4
+ %5 = or i64 %indvars.iv, 2
+ %arrayidx13 = getelementptr inbounds float, float* %b, i64 %5
+ %6 = load float, float* %arrayidx13, align 4
+ %mul14 = fmul float %6, %alpha
+ %arrayidx17 = getelementptr inbounds float, float* %a, i64 %5
+ %7 = load float, float* %arrayidx17, align 4
+ %add18 = fadd float %7, %mul14
+ store float %add18, float* %arrayidx17, align 4
+ %8 = or i64 %indvars.iv, 3
+ %arrayidx21 = getelementptr inbounds float, float* %b, i64 %8
+ %9 = load float, float* %arrayidx21, align 4
+ %mul22 = fmul float %9, %alpha
+ %arrayidx25 = getelementptr inbounds float, float* %a, i64 %8
+ %10 = load float, float* %arrayidx25, align 4
+ %add26 = fadd float %10, %mul22
+ store float %add26, float* %arrayidx25, align 4
+ %11 = or i64 %indvars.iv, 4
+ %arrayidx29 = getelementptr inbounds float, float* %b, i64 %11
+ %12 = load float, float* %arrayidx29, align 4
+ %mul30 = fmul float %12, %alpha
+ %arrayidx33 = getelementptr inbounds float, float* %a, i64 %11
+ %13 = load float, float* %arrayidx33, align 4
+ %add34 = fadd float %13, %mul30
+ store float %add34, float* %arrayidx33, align 4
+ %14 = or i64 %indvars.iv, 5
+ %arrayidx37 = getelementptr inbounds float, float* %b, i64 %14
+ %15 = load float, float* %arrayidx37, align 4
+ %mul38 = fmul float %15, %alpha
+ %arrayidx41 = getelementptr inbounds float, float* %a, i64 %14
+ %16 = load float, float* %arrayidx41, align 4
+ %add42 = fadd float %16, %mul38
+ store float %add42, float* %arrayidx41, align 4
+ %17 = or i64 %indvars.iv, 6
+ %arrayidx45 = getelementptr inbounds float, float* %b, i64 %17
+ %18 = load float, float* %arrayidx45, align 4
+ %mul46 = fmul float %18, %alpha
+ %arrayidx49 = getelementptr inbounds float, float* %a, i64 %17
+ %19 = load float, float* %arrayidx49, align 4
+ %add50 = fadd float %19, %mul46
+ store float %add50, float* %arrayidx49, align 4
+ %20 = or i64 %indvars.iv, 7
+ %arrayidx53 = getelementptr inbounds float, float* %b, i64 %20
+ %21 = load float, float* %arrayidx53, align 4
+ %mul54 = fmul float %21, %alpha
+ %arrayidx57 = getelementptr inbounds float, float* %a, i64 %20
+ %22 = load float, float* %arrayidx57, align 4
+ %add58 = fadd float %22, %mul54
+ store float %add58, float* %arrayidx57, align 4
+ %23 = or i64 %indvars.iv, 8
+ %arrayidx61 = getelementptr inbounds float, float* %b, i64 %23
+ %24 = load float, float* %arrayidx61, align 4
+ %mul62 = fmul float %24, %alpha
+ %arrayidx65 = getelementptr inbounds float, float* %a, i64 %23
+ %25 = load float, float* %arrayidx65, align 4
+ %add66 = fadd float %25, %mul62
+ store float %add66, float* %arrayidx65, align 4
+ %26 = or i64 %indvars.iv, 9
+ %arrayidx69 = getelementptr inbounds float, float* %b, i64 %26
+ %27 = load float, float* %arrayidx69, align 4
+ %mul70 = fmul float %27, %alpha
+ %arrayidx73 = getelementptr inbounds float, float* %a, i64 %26
+ %28 = load float, float* %arrayidx73, align 4
+ %add74 = fadd float %28, %mul70
+ store float %add74, float* %arrayidx73, align 4
+ %29 = or i64 %indvars.iv, 10
+ %arrayidx77 = getelementptr inbounds float, float* %b, i64 %29
+ %30 = load float, float* %arrayidx77, align 4
+ %mul78 = fmul float %30, %alpha
+ %arrayidx81 = getelementptr inbounds float, float* %a, i64 %29
+ %31 = load float, float* %arrayidx81, align 4
+ %add82 = fadd float %31, %mul78
+ store float %add82, float* %arrayidx81, align 4
+ %32 = or i64 %indvars.iv, 11
+ %arrayidx85 = getelementptr inbounds float, float* %b, i64 %32
+ %33 = load float, float* %arrayidx85, align 4
+ %mul86 = fmul float %33, %alpha
+ %arrayidx89 = getelementptr inbounds float, float* %a, i64 %32
+ %34 = load float, float* %arrayidx89, align 4
+ %add90 = fadd float %34, %mul86
+ store float %add90, float* %arrayidx89, align 4
+ %35 = or i64 %indvars.iv, 12
+ %arrayidx93 = getelementptr inbounds float, float* %b, i64 %35
+ %36 = load float, float* %arrayidx93, align 4
+ %mul94 = fmul float %36, %alpha
+ %arrayidx97 = getelementptr inbounds float, float* %a, i64 %35
+ %37 = load float, float* %arrayidx97, align 4
+ %add98 = fadd float %37, %mul94
+ store float %add98, float* %arrayidx97, align 4
+ %38 = or i64 %indvars.iv, 13
+ %arrayidx101 = getelementptr inbounds float, float* %b, i64 %38
+ %39 = load float, float* %arrayidx101, align 4
+ %mul102 = fmul float %39, %alpha
+ %arrayidx105 = getelementptr inbounds float, float* %a, i64 %38
+ %40 = load float, float* %arrayidx105, align 4
+ %add106 = fadd float %40, %mul102
+ store float %add106, float* %arrayidx105, align 4
+ %41 = or i64 %indvars.iv, 14
+ %arrayidx109 = getelementptr inbounds float, float* %b, i64 %41
+ %42 = load float, float* %arrayidx109, align 4
+ %mul110 = fmul float %42, %alpha
+ %arrayidx113 = getelementptr inbounds float, float* %a, i64 %41
+ %43 = load float, float* %arrayidx113, align 4
+ %add114 = fadd float %43, %mul110
+ store float %add114, float* %arrayidx113, align 4
+ %44 = or i64 %indvars.iv, 15
+ %arrayidx117 = getelementptr inbounds float, float* %b, i64 %44
+ %45 = load float, float* %arrayidx117, align 4
+ %mul118 = fmul float %45, %alpha
+ %arrayidx121 = getelementptr inbounds float, float* %a, i64 %44
+ %46 = load float, float* %arrayidx121, align 4
+ %add122 = fadd float %46, %mul118
+ store float %add122, float* %arrayidx121, align 4
+ %47 = or i64 %indvars.iv, 16
+ %arrayidx125 = getelementptr inbounds float, float* %b, i64 %47
+ %48 = load float, float* %arrayidx125, align 4
+ %mul126 = fmul float %48, %alpha
+ %arrayidx129 = getelementptr inbounds float, float* %a, i64 %47
+ %49 = load float, float* %arrayidx129, align 4
+ %add130 = fadd float %49, %mul126
+ store float %add130, float* %arrayidx129, align 4
+ %50 = or i64 %indvars.iv, 17
+ %arrayidx133 = getelementptr inbounds float, float* %b, i64 %50
+ %51 = load float, float* %arrayidx133, align 4
+ %mul134 = fmul float %51, %alpha
+ %arrayidx137 = getelementptr inbounds float, float* %a, i64 %50
+ %52 = load float, float* %arrayidx137, align 4
+ %add138 = fadd float %52, %mul134
+ store float %add138, float* %arrayidx137, align 4
+ %53 = or i64 %indvars.iv, 18
+ %arrayidx141 = getelementptr inbounds float, float* %b, i64 %53
+ %54 = load float, float* %arrayidx141, align 4
+ %mul142 = fmul float %54, %alpha
+ %arrayidx145 = getelementptr inbounds float, float* %a, i64 %53
+ %55 = load float, float* %arrayidx145, align 4
+ %add146 = fadd float %55, %mul142
+ store float %add146, float* %arrayidx145, align 4
+ %56 = or i64 %indvars.iv, 19
+ %arrayidx149 = getelementptr inbounds float, float* %b, i64 %56
+ %57 = load float, float* %arrayidx149, align 4
+ %mul150 = fmul float %57, %alpha
+ %arrayidx153 = getelementptr inbounds float, float* %a, i64 %56
+ %58 = load float, float* %arrayidx153, align 4
+ %add154 = fadd float %58, %mul150
+ store float %add154, float* %arrayidx153, align 4
+ %59 = or i64 %indvars.iv, 20
+ %arrayidx157 = getelementptr inbounds float, float* %b, i64 %59
+ %60 = load float, float* %arrayidx157, align 4
+ %mul158 = fmul float %60, %alpha
+ %arrayidx161 = getelementptr inbounds float, float* %a, i64 %59
+ %61 = load float, float* %arrayidx161, align 4
+ %add162 = fadd float %61, %mul158
+ store float %add162, float* %arrayidx161, align 4
+ %62 = or i64 %indvars.iv, 21
+ %arrayidx165 = getelementptr inbounds float, float* %b, i64 %62
+ %63 = load float, float* %arrayidx165, align 4
+ %mul166 = fmul float %63, %alpha
+ %arrayidx169 = getelementptr inbounds float, float* %a, i64 %62
+ %64 = load float, float* %arrayidx169, align 4
+ %add170 = fadd float %64, %mul166
+ store float %add170, float* %arrayidx169, align 4
+ %65 = or i64 %indvars.iv, 22
+ %arrayidx173 = getelementptr inbounds float, float* %b, i64 %65
+ %66 = load float, float* %arrayidx173, align 4
+ %mul174 = fmul float %66, %alpha
+ %arrayidx177 = getelementptr inbounds float, float* %a, i64 %65
+ %67 = load float, float* %arrayidx177, align 4
+ %add178 = fadd float %67, %mul174
+ store float %add178, float* %arrayidx177, align 4
+ %68 = or i64 %indvars.iv, 23
+ %arrayidx181 = getelementptr inbounds float, float* %b, i64 %68
+ %69 = load float, float* %arrayidx181, align 4
+ %mul182 = fmul float %69, %alpha
+ %arrayidx185 = getelementptr inbounds float, float* %a, i64 %68
+ %70 = load float, float* %arrayidx185, align 4
+ %add186 = fadd float %70, %mul182
+ store float %add186, float* %arrayidx185, align 4
+ %71 = or i64 %indvars.iv, 24
+ %arrayidx189 = getelementptr inbounds float, float* %b, i64 %71
+ %72 = load float, float* %arrayidx189, align 4
+ %mul190 = fmul float %72, %alpha
+ %arrayidx193 = getelementptr inbounds float, float* %a, i64 %71
+ %73 = load float, float* %arrayidx193, align 4
+ %add194 = fadd float %73, %mul190
+ store float %add194, float* %arrayidx193, align 4
+ %74 = or i64 %indvars.iv, 25
+ %arrayidx197 = getelementptr inbounds float, float* %b, i64 %74
+ %75 = load float, float* %arrayidx197, align 4
+ %mul198 = fmul float %75, %alpha
+ %arrayidx201 = getelementptr inbounds float, float* %a, i64 %74
+ %76 = load float, float* %arrayidx201, align 4
+ %add202 = fadd float %76, %mul198
+ store float %add202, float* %arrayidx201, align 4
+ %77 = or i64 %indvars.iv, 26
+ %arrayidx205 = getelementptr inbounds float, float* %b, i64 %77
+ %78 = load float, float* %arrayidx205, align 4
+ %mul206 = fmul float %78, %alpha
+ %arrayidx209 = getelementptr inbounds float, float* %a, i64 %77
+ %79 = load float, float* %arrayidx209, align 4
+ %add210 = fadd float %79, %mul206
+ store float %add210, float* %arrayidx209, align 4
+ %80 = or i64 %indvars.iv, 27
+ %arrayidx213 = getelementptr inbounds float, float* %b, i64 %80
+ %81 = load float, float* %arrayidx213, align 4
+ %mul214 = fmul float %81, %alpha
+ %arrayidx217 = getelementptr inbounds float, float* %a, i64 %80
+ %82 = load float, float* %arrayidx217, align 4
+ %add218 = fadd float %82, %mul214
+ store float %add218, float* %arrayidx217, align 4
+ %83 = or i64 %indvars.iv, 28
+ %arrayidx221 = getelementptr inbounds float, float* %b, i64 %83
+ %84 = load float, float* %arrayidx221, align 4
+ %mul222 = fmul float %84, %alpha
+ %arrayidx225 = getelementptr inbounds float, float* %a, i64 %83
+ %85 = load float, float* %arrayidx225, align 4
+ %add226 = fadd float %85, %mul222
+ store float %add226, float* %arrayidx225, align 4
+ %86 = or i64 %indvars.iv, 29
+ %arrayidx229 = getelementptr inbounds float, float* %b, i64 %86
+ %87 = load float, float* %arrayidx229, align 4
+ %mul230 = fmul float %87, %alpha
+ %arrayidx233 = getelementptr inbounds float, float* %a, i64 %86
+ %88 = load float, float* %arrayidx233, align 4
+ %add234 = fadd float %88, %mul230
+ store float %add234, float* %arrayidx233, align 4
+ %89 = or i64 %indvars.iv, 30
+ %arrayidx237 = getelementptr inbounds float, float* %b, i64 %89
+ %90 = load float, float* %arrayidx237, align 4
+ %mul238 = fmul float %90, %alpha
+ %arrayidx241 = getelementptr inbounds float, float* %a, i64 %89
+ %91 = load float, float* %arrayidx241, align 4
+ %add242 = fadd float %91, %mul238
+ store float %add242, float* %arrayidx241, align 4
+ %92 = or i64 %indvars.iv, 31
+ %arrayidx245 = getelementptr inbounds float, float* %b, i64 %92
+ %93 = load float, float* %arrayidx245, align 4
+ %mul246 = fmul float %93, %alpha
+ %arrayidx249 = getelementptr inbounds float, float* %a, i64 %92
+ %94 = load float, float* %arrayidx249, align 4
+ %add250 = fadd float %94, %mul246
+ store float %add250, float* %arrayidx249, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
+ %cmp = icmp slt i64 %indvars.iv.next, 3200
+ br i1 %cmp, label %for.body, label %for.end
+
+; CHECK-LABEL: @goo32
+
+; CHECK: for.body:
+; CHECK: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %entry ]
+; CHECK: %arrayidx = getelementptr inbounds float, float* %b, i64 %indvar
+; CHECK: %0 = load float, float* %arrayidx, align 4
+; CHECK: %mul = fmul float %0, %alpha
+; CHECK: %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvar
+; CHECK: %1 = load float, float* %arrayidx2, align 4
+; CHECK: %add = fadd float %1, %mul
+; CHECK: store float %add, float* %arrayidx2, align 4
+; CHECK: %indvar.next = add i64 %indvar, 1
+; CHECK: %exitcond = icmp eq i64 %indvar, 3199
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK: ret
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+attributes #0 = { nounwind uwtable }
diff --git a/test/Transforms/LoopReroll/complex_reroll.ll b/test/Transforms/LoopReroll/complex_reroll.ll
new file mode 100644
index 000000000000..3a2c72215781
--- /dev/null
+++ b/test/Transforms/LoopReroll/complex_reroll.ll
@@ -0,0 +1,134 @@
+; RUN: opt -S -loop-reroll %s | FileCheck %s
+declare i32 @goo(i32, i32)
+
+@buf = external global i8*
+@aaa = global [16 x i8] c"\01\02\03\04\05\06\07\08\09\0A\0B\0C\0D\0E\0F\10", align 1
+
+define i32 @test1(i32 %len) {
+entry:
+ br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %entry ]
+;CHECK-NEXT: %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
+;CHECK-NEXT: %sum44.020 = phi i64 [ 0, %entry ], [ %add, %while.body ]
+;CHECK-NEXT: [[T2:%[0-9]+]] = load i8, i8* %buf.021, align 1
+;CHECK-NEXT: %conv = zext i8 [[T2]] to i64
+;CHECK-NEXT: %add = add i64 %conv, %sum44.020
+;CHECK-NEXT: %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 1
+;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, 1
+;CHECK-NEXT: br i1 %exitcond, label %while.end, label %while.body
+
+ %dec22 = phi i32 [ 4, %entry ], [ %dec, %while.body ]
+ %buf.021 = phi i8* [ getelementptr inbounds ([16 x i8], [16 x i8]* @aaa, i64 0, i64 0), %entry ], [ %add.ptr, %while.body ]
+ %sum44.020 = phi i64 [ 0, %entry ], [ %add9, %while.body ]
+ %0 = load i8, i8* %buf.021, align 1
+ %conv = zext i8 %0 to i64
+ %add = add i64 %conv, %sum44.020
+ %arrayidx1 = getelementptr inbounds i8, i8* %buf.021, i64 1
+ %1 = load i8, i8* %arrayidx1, align 1
+ %conv2 = zext i8 %1 to i64
+ %add3 = add i64 %add, %conv2
+ %arrayidx4 = getelementptr inbounds i8, i8* %buf.021, i64 2
+ %2 = load i8, i8* %arrayidx4, align 1
+ %conv5 = zext i8 %2 to i64
+ %add6 = add i64 %add3, %conv5
+ %arrayidx7 = getelementptr inbounds i8, i8* %buf.021, i64 3
+ %3 = load i8, i8* %arrayidx7, align 1
+ %conv8 = zext i8 %3 to i64
+ %add9 = add i64 %add6, %conv8
+ %add.ptr = getelementptr inbounds i8, i8* %buf.021, i64 4
+ %dec = add nsw i32 %dec22, -1
+ %tobool = icmp eq i32 %dec, 0
+ br i1 %tobool, label %while.end, label %while.body
+
+while.end: ; preds = %while.body
+ %conv11 = trunc i64 %add9 to i32
+ %call = tail call i32 @goo(i32 0, i32 %conv11)
+ unreachable
+}
+
+define i32 @test2(i32 %N, i32* nocapture readonly %a, i32 %S) {
+entry:
+ %cmp.9 = icmp sgt i32 %N, 0
+ br i1 %cmp.9, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph:
+ br label %for.body
+
+for.cond.for.cond.cleanup_crit_edge:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %S.addr.0.lcssa = phi i32 [ %add2, %for.cond.for.cond.cleanup_crit_edge ], [ %S, %entry ]
+ ret i32 %S.addr.0.lcssa
+
+for.body:
+;CHECK-LABEL: for.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.lr.ph ]
+;CHECK-NEXT: %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add, %for.body ]
+;CHECK-NEXT: %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
+;CHECK-NEXT: %4 = load i32, i32* %a.addr.010, align 4
+;CHECK-NEXT: %add = add nsw i32 %4, %S.addr.011
+;CHECK-NEXT: %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 1
+;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
+;CHECK-NEXT: br i1 %exitcond, label %for.cond.for.cond.cleanup_crit_edge, label %for.body
+
+ %i.012 = phi i32 [ 0, %for.body.lr.ph ], [ %add3, %for.body ]
+ %S.addr.011 = phi i32 [ %S, %for.body.lr.ph ], [ %add2, %for.body ]
+ %a.addr.010 = phi i32* [ %a, %for.body.lr.ph ], [ %incdec.ptr1, %for.body ]
+ %incdec.ptr = getelementptr inbounds i32, i32* %a.addr.010, i64 1
+ %0 = load i32, i32* %a.addr.010, align 4
+ %add = add nsw i32 %0, %S.addr.011
+ %incdec.ptr1 = getelementptr inbounds i32, i32* %a.addr.010, i64 2
+ %1 = load i32, i32* %incdec.ptr, align 4
+ %add2 = add nsw i32 %add, %1
+ %add3 = add nsw i32 %i.012, 2
+ %cmp = icmp slt i32 %add3, %N
+ br i1 %cmp, label %for.body, label %for.cond.for.cond.cleanup_crit_edge
+}
+
+define i32 @test3(i32* nocapture readonly %buf, i32 %len) #0 {
+entry:
+ %cmp10 = icmp sgt i32 %len, 1
+ br i1 %cmp10, label %while.body.preheader, label %while.end
+
+while.body.preheader: ; preds = %entry
+ br label %while.body
+
+while.body: ; preds = %while.body.preheader, %while.body
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i32 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
+;CHECK-NEXT: %S.012 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
+;CHECK-NEXT: %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+;CHECK-NEXT: %4 = load i32, i32* %buf.addr.011, align 4
+;CHECK-NEXT: %add = add nsw i32 %4, %S.012
+;CHECK-NEXT: %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
+;CHECK-NEXT: %indvar.next = add i32 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32 %indvar, %3
+;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
+
+ %i.013 = phi i32 [ %sub, %while.body ], [ %len, %while.body.preheader ]
+ %S.012 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
+ %buf.addr.011 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+ %0 = load i32, i32* %buf.addr.011, align 4
+ %add = add nsw i32 %0, %S.012
+ %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.011, i64 -1
+ %1 = load i32, i32* %arrayidx1, align 4
+ %add2 = add nsw i32 %add, %1
+ %add.ptr = getelementptr inbounds i32, i32* %buf.addr.011, i64 -2
+ %sub = add nsw i32 %i.013, -2
+ %cmp = icmp sgt i32 %sub, 1
+ br i1 %cmp, label %while.body, label %while.end.loopexit
+
+while.end.loopexit: ; preds = %while.body
+ br label %while.end
+
+while.end: ; preds = %while.end.loopexit, %entry
+ %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2, %while.end.loopexit ]
+ ret i32 %S.0.lcssa
+}
+
diff --git a/test/Transforms/LoopReroll/indvar_with_ext.ll b/test/Transforms/LoopReroll/indvar_with_ext.ll
new file mode 100644
index 000000000000..7aae61433aed
--- /dev/null
+++ b/test/Transforms/LoopReroll/indvar_with_ext.ll
@@ -0,0 +1,186 @@
+; RUN: opt -S -loop-reroll %s | FileCheck %s
+target triple = "aarch64--linux-gnu"
+
+define void @test(i32 %n, float* %arrayidx200, float* %arrayidx164, float* %arrayidx172) {
+entry:
+ %rem.i = srem i32 %n, 4
+ %t22 = load float, float* %arrayidx172, align 4
+ %cmp.9 = icmp eq i32 %n, 0
+ %t7 = sext i32 %n to i64
+ br i1 %cmp.9, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+ br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ]
+;CHECK-NEXT: [[T1:%[0-9]+]] = trunc i64 %indvars.iv.i423 to i32
+;CHECK-NEXT: %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423
+;CHECK-NEXT: %t1 = load float, float* %arrayidx62.i, align 4
+;CHECK-NEXT: %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423
+;CHECK-NEXT: %t2 = load float, float* %arrayidx64.i, align 4
+;CHECK-NEXT: %mul65.i = fmul fast float %t2, %t22
+;CHECK-NEXT: %add66.i = fadd fast float %mul65.i, %t1
+;CHECK-NEXT: store float %add66.i, float* %arrayidx62.i, align 4
+;CHECK-NEXT: %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 1
+;CHECK-NEXT: [[T2:%[0-9]+]] = sext i32 [[T1]] to i64
+;CHECK-NEXT: %exitcond = icmp eq i64 [[T2]], %{{[0-9]+}}
+;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
+
+ %indvars.iv.i423 = phi i64 [ %indvars.iv.next.i424, %while.body ], [ 0, %while.body.preheader ]
+ %i.22.i = phi i32 [ %add103.i, %while.body ], [ %rem.i, %while.body.preheader ]
+ %arrayidx62.i = getelementptr inbounds float, float* %arrayidx200, i64 %indvars.iv.i423
+ %t1 = load float, float* %arrayidx62.i, align 4
+ %arrayidx64.i = getelementptr inbounds float, float* %arrayidx164, i64 %indvars.iv.i423
+ %t2 = load float, float* %arrayidx64.i, align 4
+ %mul65.i = fmul fast float %t2, %t22
+ %add66.i = fadd fast float %mul65.i, %t1
+ store float %add66.i, float* %arrayidx62.i, align 4
+ %t3 = add nsw i64 %indvars.iv.i423, 1
+ %arrayidx71.i = getelementptr inbounds float, float* %arrayidx200, i64 %t3
+ %t4 = load float, float* %arrayidx71.i, align 4
+ %arrayidx74.i = getelementptr inbounds float, float* %arrayidx164, i64 %t3
+ %t5 = load float, float* %arrayidx74.i, align 4
+ %mul75.i = fmul fast float %t5, %t22
+ %add76.i = fadd fast float %mul75.i, %t4
+ store float %add76.i, float* %arrayidx71.i, align 4
+ %add103.i = add nsw i32 %i.22.i, 2
+ %t6 = sext i32 %add103.i to i64
+ %cmp58.i = icmp slt i64 %t6, %t7
+ %indvars.iv.next.i424 = add i64 %indvars.iv.i423, 2
+ br i1 %cmp58.i, label %while.body, label %while.end.loopexit
+
+while.end.loopexit:
+ br label %while.end
+
+while.end:
+ ret void
+}
+
+; Function Attrs: noinline norecurse nounwind
+define i32 @test2(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
+entry:
+ %cmp18 = icmp sgt i64 %n, 0
+ br i1 %cmp18, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+
+;CHECK: for.body:
+;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
+;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
+;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
+;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
+;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
+;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx3, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
+ %2 = load i32, i32* %arrayidx5, align 4
+ %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
+ store i32 %2, i32* %arrayidx8, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp slt i64 %indvars.iv.next, %n
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret i32 0
+}
+
+; Function Attrs: noinline norecurse nounwind
+define i32 @test3(i32 %n, i32* nocapture %x, i32* nocapture readonly %y) {
+entry:
+ %cmp21 = icmp sgt i32 %n, 0
+ br i1 %cmp21, label %for.body.preheader, label %for.end
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+
+;CHECK: for.body:
+;CHECK: %add12 = add i8 %i.022, 2
+;CHECK-NEXT: %conv = sext i8 %add12 to i32
+;CHECK-NEXT: %cmp = icmp slt i32 %conv, %n
+;CHECK-NEXT: br i1 %cmp, label %for.body, label %for.end.loopexit
+
+ %conv23 = phi i32 [ %conv, %for.body ], [ 0, %for.body.preheader ]
+ %i.022 = phi i8 [ %add12, %for.body ], [ 0, %for.body.preheader ]
+ %idxprom = sext i8 %i.022 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %y, i64 %idxprom
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %idxprom
+ store i32 %0, i32* %arrayidx3, align 4
+ %add = or i32 %conv23, 1
+ %idxprom5 = sext i32 %add to i64
+ %arrayidx6 = getelementptr inbounds i32, i32* %y, i64 %idxprom5
+ %1 = load i32, i32* %arrayidx6, align 4
+ %arrayidx10 = getelementptr inbounds i32, i32* %x, i64 %idxprom5
+ store i32 %1, i32* %arrayidx10, align 4
+ %add12 = add i8 %i.022, 2
+ %conv = sext i8 %add12 to i32
+ %cmp = icmp slt i32 %conv, %n
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret i32 0
+}
+
+; Function Attrs: noinline norecurse nounwind
+define i32 @test4(i64 %n, i32* nocapture %x, i32* nocapture readonly %y) {
+entry:
+ %cmp18 = icmp eq i64 %n, 0
+ br i1 %cmp18, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+
+;CHECK: for.body:
+;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+;CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvar
+;CHECK-NEXT: [[T1:%[0-9]+]] = load i32, i32* %arrayidx, align 4
+;CHECK-NEXT: %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvar
+;CHECK-NEXT: store i32 [[T1]], i32* %arrayidx3, align 4
+;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i64 %indvar, %{{[0-9]+}}
+;CHECK-NEXT: br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %y, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
+ store i32 %0, i32* %arrayidx3, align 4
+ %1 = or i64 %indvars.iv, 1
+ %arrayidx5 = getelementptr inbounds i32, i32* %y, i64 %1
+ %2 = load i32, i32* %arrayidx5, align 4
+ %arrayidx8 = getelementptr inbounds i32, i32* %x, i64 %1
+ store i32 %2, i32* %arrayidx8, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %n
+ br i1 %cmp, label %for.body, label %for.end.loopexit
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret i32 0
+}
+
diff --git a/test/Transforms/LoopReroll/nonconst_lb.ll b/test/Transforms/LoopReroll/nonconst_lb.ll
index 96090e8dc0be..d3c9385a33b1 100644
--- a/test/Transforms/LoopReroll/nonconst_lb.ll
+++ b/test/Transforms/LoopReroll/nonconst_lb.ll
@@ -58,7 +58,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: br label %for.body
; CHECK: for.body: ; preds = %for.body, %for.body.preheader
-; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ]
; CHECK: %6 = add i32 %m, %indvar
; CHECK: %arrayidx = getelementptr inbounds i32, i32* %B, i32 %6
; CHECK: %7 = load i32, i32* %arrayidx, align 4
@@ -67,7 +67,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: store i32 %mul, i32* %arrayidx2, align 4
; CHECK: %indvar.next = add i32 %indvar, 1
; CHECK: %exitcond = icmp eq i32 %6, %5
-; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body
;void daxpy_ur(int n,float da,float *dx,float *dy)
; {
@@ -138,7 +138,7 @@ for.end: ; preds = %for.body, %entry
; CHECK: br label %for.body
; CHECK: for.body:
-; CHECK: %indvar = phi i32 [ %indvar.next, %for.body ], [ 0, %for.body.preheader ]
+; CHECK: %indvar = phi i32 [ 0, %for.body.preheader ], [ %indvar.next, %for.body ]
; CHECK: %6 = add i32 %rem, %indvar
; CHECK: %arrayidx = getelementptr inbounds float, float* %dy, i32 %6
; CHECK: %7 = load float, float* %arrayidx, align 4
@@ -149,4 +149,4 @@ for.end: ; preds = %for.body, %entry
; CHECK: store float %add, float* %arrayidx, align 4
; CHECK: %indvar.next = add i32 %indvar, 1
; CHECK: %exitcond = icmp eq i32 %6, %5
-; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK: br i1 %exitcond, label %for.end.loopexit, label %for.body
diff --git a/test/Transforms/LoopReroll/ptrindvar.ll b/test/Transforms/LoopReroll/ptrindvar.ll
new file mode 100644
index 000000000000..05852bdca9ef
--- /dev/null
+++ b/test/Transforms/LoopReroll/ptrindvar.ll
@@ -0,0 +1,81 @@
+; RUN: opt -S -loop-reroll %s | FileCheck %s
+target triple = "aarch64--linux-gnu"
+
+define i32 @test(i32* readonly %buf, i32* readnone %end) #0 {
+entry:
+ %cmp.9 = icmp eq i32* %buf, %end
+ br i1 %cmp.9, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+ br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
+;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
+;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %indvar
+;CHECK-NEXT: %4 = load i32, i32* %scevgep, align 4
+;CHECK-NEXT: %add = add nsw i32 %4, %S.011
+;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5
+;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
+
+ %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
+ %buf.addr.010 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+ %0 = load i32, i32* %buf.addr.010, align 4
+ %add = add nsw i32 %0, %S.011
+ %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.010, i64 1
+ %1 = load i32, i32* %arrayidx1, align 4
+ %add2 = add nsw i32 %add, %1
+ %add.ptr = getelementptr inbounds i32, i32* %buf.addr.010, i64 2
+ %cmp = icmp eq i32* %add.ptr, %end
+ br i1 %cmp, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:
+ %add2.lcssa = phi i32 [ %add2, %while.body ]
+ br label %while.end
+
+while.end:
+ %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2.lcssa, %while.end.loopexit ]
+ ret i32 %S.0.lcssa
+}
+
+define i32 @test2(i32* readonly %buf, i32* readnone %end) #0 {
+entry:
+ %cmp.9 = icmp eq i32* %buf, %end
+ br i1 %cmp.9, label %while.end, label %while.body.preheader
+
+while.body.preheader:
+ br label %while.body
+
+while.body:
+;CHECK-LABEL: while.body:
+;CHECK-NEXT: %indvar = phi i64 [ %indvar.next, %while.body ], [ 0, %while.body.preheader ]
+;CHECK-NEXT: %S.011 = phi i32 [ %add, %while.body ], [ undef, %while.body.preheader ]
+;CHECK-NEXT: %4 = mul i64 %indvar, -1
+;CHECK-NEXT: %scevgep = getelementptr i32, i32* %buf, i64 %4
+;CHECK-NEXT: %5 = load i32, i32* %scevgep, align 4
+;CHECK-NEXT: %add = add nsw i32 %5, %S.011
+;CHECK-NEXT: %indvar.next = add i64 %indvar, 1
+;CHECK-NEXT: %exitcond = icmp eq i32* %scevgep, %scevgep5
+;CHECK-NEXT: br i1 %exitcond, label %while.end.loopexit, label %while.body
+
+ %S.011 = phi i32 [ %add2, %while.body ], [ undef, %while.body.preheader ]
+ %buf.addr.010 = phi i32* [ %add.ptr, %while.body ], [ %buf, %while.body.preheader ]
+ %0 = load i32, i32* %buf.addr.010, align 4
+ %add = add nsw i32 %0, %S.011
+ %arrayidx1 = getelementptr inbounds i32, i32* %buf.addr.010, i64 -1
+ %1 = load i32, i32* %arrayidx1, align 4
+ %add2 = add nsw i32 %add, %1
+ %add.ptr = getelementptr inbounds i32, i32* %buf.addr.010, i64 -2
+ %cmp = icmp eq i32* %add.ptr, %end
+ br i1 %cmp, label %while.end.loopexit, label %while.body
+
+while.end.loopexit:
+ %add2.lcssa = phi i32 [ %add2, %while.body ]
+ br label %while.end
+
+while.end:
+ %S.0.lcssa = phi i32 [ undef, %entry ], [ %add2.lcssa, %while.end.loopexit ]
+ ret i32 %S.0.lcssa
+}
diff --git a/test/Transforms/LoopReroll/reroll_with_dbg.ll b/test/Transforms/LoopReroll/reroll_with_dbg.ll
index 78b457ed94ab..e14c22b53166 100644
--- a/test/Transforms/LoopReroll/reroll_with_dbg.ll
+++ b/test/Transforms/LoopReroll/reroll_with_dbg.ll
@@ -85,11 +85,10 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!17, !18, !19, !20}
!llvm.ident = !{!21}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (http://llvm.org/git/clang.git b1fbc23058e7fa1cdd954ab97ba84f1c549c9879) (http://llvm.org/git/llvm.git 054da58c5398a721d4dab7af63d7de8d7a1e1a1c)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "test.c", directory: "/home/weimingz/llvm-build/release/community-tip")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !11)
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !11)
!5 = !DISubroutineType(types: !6)
!6 = !{null, !7, !7, !10}
!7 = !DIDerivedType(tag: DW_TAG_restrict_type, baseType: !8)
diff --git a/test/Transforms/LoopRotate/basic.ll b/test/Transforms/LoopRotate/basic.ll
index 9c04fa28753e..299c18c871e8 100644
--- a/test/Transforms/LoopRotate/basic.ll
+++ b/test/Transforms/LoopRotate/basic.ll
@@ -1,4 +1,6 @@
; RUN: opt -S -loop-rotate < %s | FileCheck %s
+; RUN: opt -S -passes='require<targetir>,require<assumptions>,loop(rotate)' < %s | FileCheck %s
+
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-darwin10.0.0"
diff --git a/test/Transforms/LoopRotate/convergent.ll b/test/Transforms/LoopRotate/convergent.ll
new file mode 100644
index 000000000000..c8b34fd75f07
--- /dev/null
+++ b/test/Transforms/LoopRotate/convergent.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -loop-rotate < %s | FileCheck %s
+
+@e = global i32 10
+
+declare void @f1(i32) convergent
+declare void @f2(i32)
+
+; The call to f1 in the loop header shouldn't be duplicated (meaning, loop
+; rotation shouldn't occur), because f1 is convergent.
+
+; CHECK: call void @f1
+; CHECK-NOT: call void @f1
+
+define void @test(i32 %x) {
+entry:
+ br label %loop
+
+loop:
+ %n.phi = phi i32 [ %n, %loop.fin ], [ 0, %entry ]
+ call void @f1(i32 %n.phi)
+ %cond = icmp eq i32 %n.phi, %x
+ br i1 %cond, label %exit, label %loop.fin
+
+loop.fin:
+ %n = add i32 %n.phi, 1
+ call void @f2(i32 %n)
+ br label %loop
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/LoopRotate/dbgvalue.ll b/test/Transforms/LoopRotate/dbgvalue.ll
index d90841d16270..9ff8bda4bc08 100644
--- a/test/Transforms/LoopRotate/dbgvalue.ll
+++ b/test/Transforms/LoopRotate/dbgvalue.ll
@@ -7,6 +7,7 @@ define i32 @tak(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 {
; CHECK-LABEL: define i32 @tak(
; CHECK: entry
; CHECK-NEXT: call void @llvm.dbg.value(metadata i32 %x
+; CHECK: tail call void @llvm.dbg.value(metadata i32 %call
entry:
br label %tailrecurse
@@ -37,6 +38,44 @@ return: ; preds = %if.end
ret i32 %z.tr, !dbg !17
}
+define i32 @tak2(i32 %x, i32 %y, i32 %z) nounwind ssp !dbg !0 {
+; CHECK-LABEL: define i32 @tak2(
+; CHECK: entry
+; CHECK: tail call void @llvm.dbg.value(metadata i32 %x.tr
+; CHECK: tail call void @llvm.dbg.value(metadata i32 undef
+
+entry:
+ br label %tailrecurse
+
+tailrecurse: ; preds = %if.then, %entry
+ %x.tr = phi i32 [ %x, %entry ], [ %call, %if.then ]
+ %y.tr = phi i32 [ %y, %entry ], [ %call9, %if.then ]
+ %z.tr = phi i32 [ %z, %entry ], [ %call14, %if.then ]
+ %cmp = icmp slt i32 %y.tr, %x.tr, !dbg !12
+ br i1 %cmp, label %if.then, label %if.end, !dbg !12
+
+if.then: ; preds = %tailrecurse
+ tail call void @llvm.dbg.value(metadata i32 %x.tr, i64 0, metadata !6, metadata !DIExpression()), !dbg !7
+ tail call void @llvm.dbg.value(metadata i32 %y.tr, i64 0, metadata !8, metadata !DIExpression()), !dbg !9
+ tail call void @llvm.dbg.value(metadata i32 %z.tr, i64 0, metadata !10, metadata !DIExpression()), !dbg !11
+ %sub = sub nsw i32 %x.tr, 1, !dbg !14
+ %call = tail call i32 @tak(i32 %sub, i32 %y.tr, i32 %z.tr), !dbg !14
+ %sub6 = sub nsw i32 %y.tr, 1, !dbg !14
+ %call9 = tail call i32 @tak(i32 %sub6, i32 %z.tr, i32 %x.tr), !dbg !14
+ %sub11 = sub nsw i32 %z.tr, 1, !dbg !14
+ %call14 = tail call i32 @tak(i32 %sub11, i32 %x.tr, i32 %y.tr), !dbg !14
+ br label %tailrecurse
+
+if.end: ; preds = %tailrecurse
+ tail call void @llvm.dbg.value(metadata i32 %x.tr, i64 0, metadata !6, metadata !DIExpression()), !dbg !7
+ tail call void @llvm.dbg.value(metadata i32 %y.tr, i64 0, metadata !8, metadata !DIExpression()), !dbg !9
+ tail call void @llvm.dbg.value(metadata i32 %z.tr, i64 0, metadata !10, metadata !DIExpression()), !dbg !11
+ br label %return, !dbg !16
+
+return: ; preds = %if.end
+ ret i32 %z.tr, !dbg !17
+}
+
@channelColumns = external global i64
@horzPlane = external global i8*, align 8
@@ -82,11 +121,11 @@ for.end:
}
!llvm.module.flags = !{!20}
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
-!0 = distinct !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !18, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "tak", line: 32, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !18, scope: !1, type: !3)
!1 = !DIFile(filename: "/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", directory: "/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.9 (trunk 125492)", isOptimized: true, emissionKind: FullDebug, file: !18)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -103,5 +142,4 @@ for.end:
!16 = !DILocation(line: 36, column: 3, scope: !13)
!17 = !DILocation(line: 37, column: 1, scope: !13)
!18 = !DIFile(filename: "/Volumes/Lalgate/cj/llvm/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame/recursive.c", directory: "/Volumes/Lalgate/cj/D/projects/llvm-test/SingleSource/Benchmarks/BenchmarkGame")
-!19 = !{i32 0}
!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
index 5818808ae0cd..df7034baf661 100644
--- a/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
+++ b/test/Transforms/LoopSimplify/2004-04-13-LoopSimplifyUpdateDomFrontier.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info
+; RUN: opt < %s -sroa -loop-simplify -licm -disable-output -verify-dom-info -verify-loop-info
define void @inflate() {
entry:
diff --git a/test/Transforms/LoopSimplify/basictest.ll b/test/Transforms/LoopSimplify/basictest.ll
index 6b31848a94bd..81b2c80fae00 100644
--- a/test/Transforms/LoopSimplify/basictest.ll
+++ b/test/Transforms/LoopSimplify/basictest.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-simplify
+; RUN: opt < %s -passes=loop-simplify
; This function should get a preheader inserted before BB3, that is jumped
; to by BB1 & BB2
diff --git a/test/Transforms/LoopSimplify/dbg-loc.ll b/test/Transforms/LoopSimplify/dbg-loc.ll
index b0e14bbcfd7f..702a1ad16af6 100644
--- a/test/Transforms/LoopSimplify/dbg-loc.ll
+++ b/test/Transforms/LoopSimplify/dbg-loc.ll
@@ -73,6 +73,7 @@ eh.resume: ; preds = %catch
; CHECK-DAG: [[LPAD_PREHEADER_LOC]] = !DILocation(line: 85, column: 1, scope: !{{[0-9]+}})
!llvm.module.flags = !{!0, !1, !2}
+!llvm.dbg.cu = !{!14}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"PIC Level", i32 2}
@@ -80,7 +81,7 @@ eh.resume: ; preds = %catch
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "Vector.h", directory: "/tmp")
-!6 = distinct !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
+!6 = distinct !DISubprogram(name: "destruct", scope: !5, file: !5, line: 71, type: !4, isLocal: false, isDefinition: true, scopeLine: 72, flags: DIFlagPrototyped, isOptimized: false, unit: !14, variables: !3)
!7 = !DILocation(line: 73, column: 38, scope: !6)
!8 = !DILocation(line: 73, column: 13, scope: !6)
!9 = !DILocation(line: 73, column: 27, scope: !6)
@@ -88,3 +89,7 @@ eh.resume: ; preds = %catch
!11 = !DILocation(line: 73, column: 46, scope: !6)
!12 = !DILocation(line: 75, column: 9, scope: !6)
!13 = !DILocation(line: 85, column: 1, scope: !6)
+!14 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !5,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
diff --git a/test/Transforms/LoopSimplify/pr26682.ll b/test/Transforms/LoopSimplify/pr26682.ll
new file mode 100644
index 000000000000..092c0c3f0b04
--- /dev/null
+++ b/test/Transforms/LoopSimplify/pr26682.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -lcssa -loop-simplify -indvars -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-unknown"
+
+@a = external global i32, align 4
+
+; Check that loop-simplify merges two loop exits, but preserves LCSSA form.
+; CHECK-LABEL: @foo
+; CHECK: for:
+; CHECK: %or.cond = and i1 %cmp1, %cmp2
+; CHECK-NOT: for.cond:
+; CHECK: for.end:
+; CHECK: %a.lcssa = phi i32 [ %a, %for ]
+define i32 @foo(i32 %x) {
+entry:
+ br label %for
+
+for:
+ %iv = phi i32 [ 0, %entry ], [ %iv.next, %for.cond ]
+ %cmp1 = icmp eq i32 %x, 0
+ %iv.next = add nuw nsw i32 %iv, 1
+ %a = load i32, i32* @a
+ br i1 %cmp1, label %for.cond, label %for.end
+
+for.cond:
+ %cmp2 = icmp slt i32 %iv.next, 4
+ br i1 %cmp2, label %for, label %for.end
+
+for.end:
+ %a.lcssa = phi i32 [ %a, %for ], [ %a, %for.cond ]
+ ret i32 %a.lcssa
+}
diff --git a/test/Transforms/LoopSimplify/single-backedge.ll b/test/Transforms/LoopSimplify/single-backedge.ll
index 6f3db8fb14fc..16d29e4915b5 100644
--- a/test/Transforms/LoopSimplify/single-backedge.ll
+++ b/test/Transforms/LoopSimplify/single-backedge.ll
@@ -24,16 +24,21 @@ BE2: ; preds = %n br label %Loop
}
!llvm.module.flags = !{!0, !1}
+!llvm.dbg.cu = !{!12}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{}
!3 = !DISubroutineType(types: !2)
!4 = !DIFile(filename: "atomic.cpp", directory: "/tmp")
-!5 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "test", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, unit: !12, variables: !2)
!6 = !DILocation(line: 100, column: 1, scope: !5)
!7 = !DILocation(line: 101, column: 1, scope: !5)
!8 = !DILocation(line: 102, column: 1, scope: !5)
!9 = !DILocation(line: 103, column: 1, scope: !5)
!10 = !DILocation(line: 104, column: 1, scope: !5)
!11 = !DILocation(line: 105, column: 1, scope: !5)
+!12 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !4,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
diff --git a/test/Transforms/LoopSimplifyCFG/merge-header.ll b/test/Transforms/LoopSimplifyCFG/merge-header.ll
new file mode 100644
index 000000000000..e72c8218d9b7
--- /dev/null
+++ b/test/Transforms/LoopSimplifyCFG/merge-header.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -loop-simplifycfg < %s | FileCheck %s
+; RUN: opt -S -passes='require<domtree>,loop(simplify-cfg)' < %s | FileCheck %s
+
+; CHECK-LABEL: foo
+; CHECK: entry:
+; CHECK-NEXT: br label %[[LOOP:[a-z]+]]
+; CHECK: [[LOOP]]:
+; CHECK-NEXT: phi
+; CHECK-NOT: br label
+; CHECK: br i1
+define i32 @foo(i32* %P, i64* %Q) {
+entry:
+ br label %outer
+
+outer: ; preds = %outer.latch2, %entry
+ %y.2 = phi i32 [ 0, %entry ], [ %y.inc2, %outer.latch2 ]
+ br label %inner
+
+inner: ; preds = %outer
+ store i32 0, i32* %P
+ store i32 1, i32* %P
+ store i32 2, i32* %P
+ %y.inc2 = add nsw i32 %y.2, 1
+ %exitcond.outer = icmp eq i32 %y.inc2, 3
+ store i32 %y.2, i32* %P
+ br i1 %exitcond.outer, label %exit, label %outer.latch2
+
+outer.latch2: ; preds = %inner
+ %t = sext i32 %y.inc2 to i64
+ store i64 %t, i64* %Q
+ br label %outer
+
+exit: ; preds = %inner
+ ret i32 0
+}
diff --git a/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll b/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
new file mode 100644
index 000000000000..a2dfe81b1084
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/AArch64/lsr-reuse.ll
@@ -0,0 +1,34 @@
+; RUN: llc -mtriple=arm64-unknown-unknown -print-lsr-output < %s 2>&1 | FileCheck %s
+
+declare void @foo(i64)
+
+; Verify that redundant adds aren't inserted by LSR.
+; CHECK-LABEL: @bar(
+define void @bar(double* %A) {
+entry:
+ br label %while.cond
+
+while.cond:
+; CHECK-LABEL: while.cond:
+; CHECK: add i64 %lsr.iv, 1
+; CHECK-NOT: add i64 %lsr.iv, 1
+; CHECK-LABEL: land.rhs:
+ %indvars.iv28 = phi i64 [ %indvars.iv.next29, %land.rhs ], [ 50, %entry ]
+ %cmp = icmp sgt i64 %indvars.iv28, 0
+ br i1 %cmp, label %land.rhs, label %while.end
+
+land.rhs:
+ %indvars.iv.next29 = add nsw i64 %indvars.iv28, -1
+ %arrayidx = getelementptr inbounds double, double* %A, i64 %indvars.iv.next29
+ %Aload = load double, double* %arrayidx, align 8
+ %cmp1 = fcmp oeq double %Aload, 0.000000e+00
+ br i1 %cmp1, label %while.cond, label %if.end
+
+while.end:
+ %indvars.iv28.lcssa = phi i64 [ %indvars.iv28, %while.cond ]
+ tail call void @foo(i64 %indvars.iv28.lcssa)
+ br label %if.end
+
+if.end:
+ ret void
+}
diff --git a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
index bd80302a68b8..8c83df5843d2 100644
--- a/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
+++ b/test/Transforms/LoopStrengthReduce/AMDGPU/lsr-postinc-pos-addrspace.ll
@@ -7,28 +7,29 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
; CHECK-LABEL: @local_cmp_user(
; CHECK: bb11:
-; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ -2, %entry ]
-; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ undef, %entry ]
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 2, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
+; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
+; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, -2
+; CHECK: br i1
; CHECK: bb:
-; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
-; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
-; CHECK: %scevgep = getelementptr i8, i8 addrspace(3)* %t, i32 %lsr.iv.next2
-; CHECK: %c1 = icmp ult i8 addrspace(3)* %scevgep, undef
-define void @local_cmp_user() nounwind {
+; CHECK: inttoptr i32 %lsr.iv.next2 to i8 addrspace(3)*
+; CHECK: %c1 = icmp ne i8 addrspace(3)*
+define void @local_cmp_user(i32 %arg0) nounwind {
entry:
br label %bb11
bb11:
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
%ii = shl i32 %i, 1
- %c0 = icmp eq i32 %i, undef
+ %c0 = icmp eq i32 %i, %arg0
br i1 %c0, label %bb13, label %bb
bb:
%t = load i8 addrspace(3)*, i8 addrspace(3)* addrspace(3)* undef
%p = getelementptr i8, i8 addrspace(3)* %t, i32 %ii
- %c1 = icmp ult i8 addrspace(3)* %p, undef
+ %c1 = icmp ne i8 addrspace(3)* %p, null
%i.next = add i32 %i, 1
br i1 %c1, label %bb11, label %bb13
@@ -37,23 +38,29 @@ bb13:
}
; CHECK-LABEL: @global_cmp_user(
+; CHECK: %lsr.iv1 = phi i64
+; CHECK: %lsr.iv = phi i64
; CHECK: %lsr.iv.next = add i64 %lsr.iv, -1
-; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, 2
-; CHECK: %scevgep = getelementptr i8, i8 addrspace(1)* %t, i64 %lsr.iv.next2
-define void @global_cmp_user() nounwind {
+; CHECK: %lsr.iv.next2 = add i64 %lsr.iv1, -2
+; CHECK: br i1
+
+; CHECK: bb:
+; CHECK: inttoptr i64 %lsr.iv.next2 to i8 addrspace(1)*
+; CHECK: icmp ne i8 addrspace(1)* %t
+define void @global_cmp_user(i64 %arg0) nounwind {
entry:
br label %bb11
bb11:
%i = phi i64 [ 0, %entry ], [ %i.next, %bb ]
%ii = shl i64 %i, 1
- %c0 = icmp eq i64 %i, undef
+ %c0 = icmp eq i64 %i, %arg0
br i1 %c0, label %bb13, label %bb
bb:
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
%p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii
- %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %c1 = icmp ne i8 addrspace(1)* %p, null
%i.next = add i64 %i, 1
br i1 %c1, label %bb11, label %bb13
@@ -62,23 +69,29 @@ bb13:
}
; CHECK-LABEL: @global_gep_user(
-; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i32 %lsr.iv1
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
-define void @global_gep_user() nounwind {
+; CHECK: br i1
+
+; CHECK: bb:
+; CHECK: %idxprom = sext i32 %lsr.iv1 to i64
+; CHECK: getelementptr i8, i8 addrspace(1)* %t, i64 %idxprom
+define void @global_gep_user(i32 %arg0) nounwind {
entry:
br label %bb11
bb11:
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
%ii = shl i32 %i, 1
- %c0 = icmp eq i32 %i, undef
+ %c0 = icmp eq i32 %i, %arg0
br i1 %c0, label %bb13, label %bb
bb:
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
%p = getelementptr i8, i8 addrspace(1)* %t, i32 %ii
- %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %c1 = icmp ne i8 addrspace(1)* %p, null
%i.next = add i32 %i, 1
br i1 %c1, label %bb11, label %bb13
@@ -87,10 +100,15 @@ bb13:
}
; CHECK-LABEL: @global_sext_scale_user(
-; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+; CHECK: %lsr.iv1 = phi i32 [ %lsr.iv.next2, %bb ], [ 0, %entry ]
+; CHECK: %lsr.iv = phi i32 [ %lsr.iv.next, %bb ], [ %{{[0-9]+}}, %entry ]
; CHECK: %lsr.iv.next = add i32 %lsr.iv, -1
; CHECK: %lsr.iv.next2 = add i32 %lsr.iv1, 2
-define void @global_sext_scale_user() nounwind {
+; CHECK: br i1
+
+; CHECK: bb
+; CHECK: %p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
+define void @global_sext_scale_user(i32 %arg0) nounwind {
entry:
br label %bb11
@@ -98,13 +116,13 @@ bb11:
%i = phi i32 [ 0, %entry ], [ %i.next, %bb ]
%ii = shl i32 %i, 1
%ii.ext = sext i32 %ii to i64
- %c0 = icmp eq i32 %i, undef
+ %c0 = icmp eq i32 %i, %arg0
br i1 %c0, label %bb13, label %bb
bb:
%t = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* undef
%p = getelementptr i8, i8 addrspace(1)* %t, i64 %ii.ext
- %c1 = icmp ult i8 addrspace(1)* %p, undef
+ %c1 = icmp ne i8 addrspace(1)* %p, null
%i.next = add i32 %i, 1
br i1 %c1, label %bb11, label %bb13
diff --git a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
index 184e300c7eb8..bdc36bdaf2e8 100644
--- a/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/2012-01-13-phielim.ll
@@ -102,7 +102,7 @@ while.end: ; preds = %entry
; CHECK-NEXT: %for.body3.us.i
; CHECK-NEXT: Inner Loop
; CHECK: testb
-; CHECK: jne
+; CHECK: je
; CHECK: jmp
define fastcc void @test3(double* nocapture %u) nounwind uwtable ssp {
entry:
diff --git a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
index c1099b23dcfc..ab7d4f1baa81 100644
--- a/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
+++ b/test/Transforms/LoopStrengthReduce/X86/ivchain-X86.ll
@@ -12,7 +12,7 @@
; X64: shlq $2
; no other address computation in the preheader
; X64-NEXT: xorl
-; X64-NEXT: .align
+; X64-NEXT: .p2align
; X64: %loop
; no complex address modes
; X64-NOT: (%{{[^)]+}},%{{[^)]+}},
diff --git a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
index 466566ed8a0d..fbf55fd81d23 100644
--- a/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
+++ b/test/Transforms/LoopStrengthReduce/post-inc-icmpzero.ll
@@ -4,8 +4,9 @@
; LSR should properly handle the post-inc offset when folding the
; non-IV operand of an icmp into the IV.
-; CHECK: [[r1:%[a-z0-9]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
-; CHECK: [[r2:%[a-z0-9]+]] = lshr i64 [[r1]], 1
+; CHECK: [[r1:%[a-z0-9\.]+]] = sub i64 %sub.ptr.lhs.cast, %sub.ptr.rhs.cast
+; CHECK: [[r2:%[a-z0-9\.]+]] = lshr exact i64 [[r1]], 1
+; CHECK: for.body.lr.ph:
; CHECK: [[r3:%[a-z0-9]+]] = shl i64 [[r2]], 1
; CHECK: br label %for.body
; CHECK: for.body:
diff --git a/test/Transforms/LoopStrengthReduce/pr27056.ll b/test/Transforms/LoopStrengthReduce/pr27056.ll
new file mode 100644
index 000000000000..6a255f6f4e8c
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/pr27056.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-reduce -S | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+%struct.L = type { i8, i8* }
+
+declare i32 @__CxxFrameHandler3(...)
+
+@GV1 = external global %struct.L*
+@GV2 = external global %struct.L
+
+define void @b_copy_ctor() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %0 = load %struct.L*, %struct.L** @GV1, align 8
+ br label %for.cond
+
+for.cond: ; preds = %call.i.noexc, %entry
+ %d.0 = phi %struct.L* [ %0, %entry ], [ %incdec.ptr, %call.i.noexc ]
+ invoke void @a_copy_ctor()
+ to label %call.i.noexc unwind label %catch.dispatch
+
+call.i.noexc: ; preds = %for.cond
+ %incdec.ptr = getelementptr inbounds %struct.L, %struct.L* %d.0, i64 1
+ br label %for.cond
+
+catch.dispatch: ; preds = %for.cond
+ %1 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %2 = catchpad within %1 [i8* null, i32 64, i8* null]
+ %cmp16 = icmp eq %struct.L* %0, %d.0
+ br i1 %cmp16, label %for.end, label %for.body
+
+for.body: ; preds = %for.body, %catch
+ %cmp = icmp eq %struct.L* @GV2, %d.0
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %catch
+ catchret from %2 to label %try.cont
+
+try.cont: ; preds = %for.end
+ ret void
+}
+
+; CHECK-LABEL: define void @b_copy_ctor(
+; CHECK: catchpad
+; CHECK-NEXT: icmp eq %struct.L
+; CHECK-NEXT: getelementptr {{.*}} i64 sub (i64 0, i64 ptrtoint (%struct.L* @GV2 to i64))
+
+declare void @a_copy_ctor()
diff --git a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
index c6d6690e4302..315ae8df9fa3 100644
--- a/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
+++ b/test/Transforms/LoopStrengthReduce/quadradic-exit-value.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -analyze -iv-users | FileCheck %s
+; RUN: opt -passes='function(require<scalar-evolution>),print<ivusers>' -S < %s 2>&1| FileCheck %s
; Provide legal integer types.
target datalayout = "n8:16:32:64"
diff --git a/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll b/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll
new file mode 100644
index 000000000000..81a6b07fe951
--- /dev/null
+++ b/test/Transforms/LoopStrengthReduce/scev-insertpt-bug.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -loop-reduce -S
+
+; Test that SCEV insertpoint's don't get corrupted and cause an
+; invalid instruction to be inserted in a block other than its parent.
+; See http://reviews.llvm.org/D20703 for context.
+define void @test() {
+entry:
+ %bf.load = load i32, i32* null, align 4
+ %bf.clear = lshr i32 %bf.load, 1
+ %div = and i32 %bf.clear, 134217727
+ %sub = add nsw i32 %div, -1
+ %0 = zext i32 %sub to i64
+ br label %while.cond
+
+while.cond: ; preds = %cond.end, %entry
+ %indvars.iv = phi i64 [ %indvars.iv.next, %cond.end ], [ 0, %entry ]
+ %cmp = icmp eq i64 %indvars.iv, %0
+ br i1 %cmp, label %cleanup16, label %while.body
+
+while.body: ; preds = %while.cond
+ %1 = trunc i64 %indvars.iv to i32
+ %mul = shl i32 %1, 1
+ %add = add nuw i32 %mul, 2
+ %cmp3 = icmp ult i32 %add, 0
+ br i1 %cmp3, label %if.end, label %if.then
+
+if.then: ; preds = %while.body
+ unreachable
+
+if.end: ; preds = %while.body
+ br i1 false, label %cond.end, label %cond.true
+
+cond.true: ; preds = %if.end
+ br label %cond.end
+
+cond.end: ; preds = %cond.true, %if.end
+ %add7 = add i32 %1, 1
+ %cmp12 = icmp ugt i32 %add7, %sub
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ br i1 %cmp12, label %if.then13, label %while.cond
+
+if.then13: ; preds = %cond.end
+ unreachable
+
+cleanup16: ; preds = %while.cond
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
index d3dc081fa6f8..ebb9444d07aa 100644
--- a/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/AArch64/runtime-loop.ll
@@ -1,13 +1,19 @@
-; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -mtriple aarch64 -mcpu=cortex-a57 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Tests for unrolling loops with run-time trip counts
-; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
-
-; CHECK: for.body.prol:
-; CHECK: for.body:
+; EPILOG: %xtraiter = and i32 %n
+; EPILOG: for.body:
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+; EPILOG: for.body.epil:
+
+; PROLOG: %xtraiter = and i32 %n
+; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+; PROLOG: for.body.prol:
+; PROLOG: for.body:
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
diff --git a/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
index 3cbb70274da5..e732ddc2bc84 100644
--- a/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
+++ b/test/Transforms/LoopUnroll/AMDGPU/unroll-barrier.ll
@@ -1,10 +1,10 @@
; RUN: opt -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -loop-unroll -S < %s | FileCheck %s
; CHECK-LABEL: @test_unroll_convergent_barrier(
-; CHECK: call void @llvm.AMDGPU.barrier.global()
-; CHECK: call void @llvm.AMDGPU.barrier.global()
-; CHECK: call void @llvm.AMDGPU.barrier.global()
-; CHECK: call void @llvm.AMDGPU.barrier.global()
+; CHECK: call void @llvm.amdgcn.s.barrier()
+; CHECK: call void @llvm.amdgcn.s.barrier()
+; CHECK: call void @llvm.amdgcn.s.barrier()
+; CHECK: call void @llvm.amdgcn.s.barrier()
; CHECK-NOT: br
define void @test_unroll_convergent_barrier(i32 addrspace(1)* noalias nocapture %out, i32 addrspace(1)* noalias nocapture %in) #0 {
entry:
@@ -16,7 +16,7 @@ for.body: ; preds = %for.body, %entry
%arrayidx.in = getelementptr inbounds i32, i32 addrspace(1)* %in, i32 %indvars.iv
%arrayidx.out = getelementptr inbounds i32, i32 addrspace(1)* %out, i32 %indvars.iv
%load = load i32, i32 addrspace(1)* %arrayidx.in
- call void @llvm.AMDGPU.barrier.global() #1
+ call void @llvm.amdgcn.s.barrier() #1
%add = add i32 %load, %sum.02
store i32 %add, i32 addrspace(1)* %arrayidx.out
%indvars.iv.next = add i32 %indvars.iv, 1
@@ -27,7 +27,7 @@ for.end: ; preds = %for.body, %entry
ret void
}
-declare void @llvm.AMDGPU.barrier.global() #1
+declare void @llvm.amdgcn.s.barrier() #1
attributes #0 = { nounwind }
attributes #1 = { nounwind convergent }
diff --git a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
index e9aa1acd5fec..77e92bd7506c 100644
--- a/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
+++ b/test/Transforms/LoopUnroll/PowerPC/a2-unrolling.ll
@@ -1,23 +1,5 @@
-; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s
-define void @unroll_opt_for_size() nounwind optsize {
-entry:
- br label %loop
-
-loop:
- %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
- %inc = add i32 %iv, 1
- %exitcnd = icmp uge i32 %inc, 1024
- br i1 %exitcnd, label %exit, label %loop
-
-exit:
- ret void
-}
-
-; CHECK-LABEL: @unroll_opt_for_size
-; CHECK: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK: icmp
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=a2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
@@ -40,8 +22,13 @@ for.end: ; preds = %for.body, %entry
ret i32 %sum.0.lcssa
}
-; CHECK-LABEL: @test
-; CHECK: for.body.prol{{.*}}:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG-LABEL: @test
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.7, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil{{.*}}:
+
+; PROLOG-LABEL: @test
+; PROLOG: for.body.prol{{.*}}:
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.7, label %for.end.loopexit{{.*}}, label %for.body
diff --git a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
index 7a50fc0a4f49..c9677d83e377 100644
--- a/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
+++ b/test/Transforms/LoopUnroll/PowerPC/p7-unrolling.ll
@@ -1,53 +1,4 @@
; RUN: opt < %s -S -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 -loop-unroll | FileCheck %s
-define void @unroll_opt_for_size() nounwind optsize {
-entry:
- br label %loop
-
-loop:
- %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
- %inc = add i32 %iv, 1
- %exitcnd = icmp uge i32 %inc, 1024
- br i1 %exitcnd, label %exit, label %loop
-
-exit:
- ret void
-}
-
-; CHECK-LABEL: @unroll_opt_for_size
-; CHECK: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: icmp
-
define void @unroll_default() nounwind {
entry:
br label %loop
diff --git a/test/Transforms/LoopUnroll/X86/mmx.ll b/test/Transforms/LoopUnroll/X86/mmx.ll
index 2c4aa086e83c..7f00545b71f0 100644
--- a/test/Transforms/LoopUnroll/X86/mmx.ll
+++ b/test/Transforms/LoopUnroll/X86/mmx.ll
@@ -14,9 +14,9 @@ for.body: ; preds = %for.body, %entry
exit: ; preds = %for.body
%ret = phi x86_mmx [ undef, %for.body ]
- ; CHECK: %[[ret_unr:.*]] = phi x86_mmx [ undef,
- ; CHECK: %[[ret_ph:.*]] = phi x86_mmx [ undef,
- ; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_unr]], {{.*}} ], [ %[[ret_ph]]
+ ; CHECK: %[[ret_ph:.*]] = phi x86_mmx [ undef, %entry
+ ; CHECK: %[[ret_ph1:.*]] = phi x86_mmx [ undef,
+ ; CHECK: %[[ret:.*]] = phi x86_mmx [ %[[ret_ph]], {{.*}} ], [ %[[ret_ph1]],
; CHECK: ret x86_mmx %[[ret]]
ret x86_mmx %ret
}
diff --git a/test/Transforms/LoopUnroll/convergent.ll b/test/Transforms/LoopUnroll/convergent.ll
new file mode 100644
index 000000000000..4109e961bf0f
--- /dev/null
+++ b/test/Transforms/LoopUnroll/convergent.ll
@@ -0,0 +1,83 @@
+; RUN: opt < %s -loop-unroll -unroll-runtime -unroll-allow-partial -S | FileCheck %s
+
+declare void @f() convergent
+
+; Although this loop contains a convergent instruction, it should be
+; fully unrolled.
+;
+; CHECK-LABEL: @full_unroll(
+define i32 @full_unroll() {
+entry:
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() ;convergent
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, 3
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction, but it should be partially
+; unrolled. The unroll count is the largest power of 2 that divides the
+; multiple -- 4, in this case.
+;
+; CHECK-LABEL: @runtime_unroll(
+define i32 @runtime_unroll(i32 %n) {
+entry:
+ %loop_ctl = mul nsw i32 %n, 12
+ br label %l3
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() convergent
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3
+
+exit:
+ ret i32 0
+}
+
+; This loop contains a convergent instruction, so its partial unroll
+; count must divide its trip multiple. This overrides its unroll
+; pragma -- we unroll exactly 8 times, even though 16 is requested.
+; CHECK-LABEL: @pragma_unroll
+define i32 @pragma_unroll(i32 %n) {
+entry:
+ %loop_ctl = mul nsw i32 %n, 24
+ br label %l3, !llvm.loop !0
+
+l3:
+ %x.0 = phi i32 [ 0, %entry ], [ %inc, %l3 ]
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK: call void @f()
+; CHECK-NOT: call void @f()
+ call void @f() convergent
+ %inc = add nsw i32 %x.0, 1
+ %exitcond = icmp eq i32 %inc, %loop_ctl
+ br i1 %exitcond, label %exit, label %l3, !llvm.loop !0
+
+exit:
+ ret i32 0
+}
+
+!0 = !{!0, !{!"llvm.loop.unroll.count", i32 16}}
diff --git a/test/Transforms/LoopUnroll/full-unroll-crashers.ll b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
index e932851042ad..f5c7734de968 100644
--- a/test/Transforms/LoopUnroll/full-unroll-crashers.ll
+++ b/test/Transforms/LoopUnroll/full-unroll-crashers.ll
@@ -1,5 +1,5 @@
; Check that we don't crash on corner cases.
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=1 -unroll-percent-dynamic-cost-saved-threshold=20 -o /dev/null
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
@@ -100,3 +100,125 @@ for.body:
for.exit:
ret <4 x i32> %r
}
+
+define void @ptrtoint_cast() optsize {
+entry:
+ br label %for.body
+
+for.body:
+ br i1 true, label %for.inc, label %if.then
+
+if.then:
+ %arraydecay = getelementptr inbounds [1 x i32], [1 x i32]* null, i64 0, i64 0
+ %x = ptrtoint i32* %arraydecay to i64
+ br label %for.inc
+
+for.inc:
+ br i1 false, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup:
+ ret void
+}
+
+define void @ptrtoint_cast2() {
+entry:
+ br i1 false, label %for.body.lr.ph, label %exit
+
+for.body.lr.ph:
+ br label %for.body
+
+for.body:
+ %iv = phi i32 [ 0, %for.body.lr.ph ], [ %inc, %for.body ]
+ %offset = getelementptr inbounds float, float* null, i32 3
+ %bc = bitcast float* %offset to i64*
+ %inc = add nuw nsw i32 %iv, 1
+ br i1 false, label %for.body, label %exit
+
+exit:
+ ret void
+}
+
+@i = external global i32, align 4
+
+define void @folded_not_to_constantint() {
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %m = phi i32* [ @i, %entry ], [ %m, %for.inc ]
+ br i1 undef, label %if.else, label %if.then
+
+if.then:
+ unreachable
+
+if.else:
+ %cmp = icmp ult i32* %m, null
+ br i1 %cmp, label %cond.false, label %for.inc
+
+cond.false:
+ unreachable
+
+for.inc:
+ %inc = add nuw nsw i32 %iv, 1
+ %cmp2 = icmp ult i32 %inc, 10
+ br i1 %cmp2, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+define void @index_too_large() {
+entry:
+ br label %for.body
+
+for.body:
+ %iv = phi i64 [ -73631599, %entry ], [ %iv.next, %for.inc ]
+ br i1 undef, label %for.body2, label %for.inc
+
+for.body2:
+ %idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
+ %x = load i32, i32* %idx, align 1
+ br label %for.inc
+
+for.inc:
+ %iv.next = add nsw i64 %iv, -1
+ br i1 undef, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+define void @cmp_type_mismatch() {
+entry:
+ br label %for.header
+
+for.header:
+ br label %for.body
+
+for.body:
+ %d = phi i32* [ null, %for.header ]
+ %cmp = icmp eq i32* %d, null
+ br i1 undef, label %for.end, label %for.header
+
+for.end:
+ ret void
+}
+
+define void @load_type_mismatch() {
+entry:
+ br label %for.body
+
+for.body:
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %bc = bitcast i32* %arrayidx1 to i64*
+ %x1 = load i64, i64* %bc, align 4
+ %x2 = add i64 10, %x1
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
index 5df48e8c380b..11c9f9635ca5 100644
--- a/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-2.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=70 -unroll-dynamic-cost-savings-discount=90 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@unknown_global = internal unnamed_addr global [9 x i32] [i32 0, i32 -1, i32 0, i32 -1, i32 5, i32 -1, i32 0, i32 -1, i32 0], align 16
@@ -55,3 +55,35 @@ loop.end: ; preds = %loop
%r.lcssa = phi i32 [ %r, %loop ]
ret i32 %r.lcssa
}
+
+; In this case the loaded value is used only to control branch.
+; If we missed that, we could've thought that it's unused and unrolling would
+; clean up almost entire loop. Make sure that we do not unroll such loop.
+; CHECK-LABEL: @foo3
+; CHECK: br i1 %exitcond, label %loop.end, label %loop.header
+define i32 @foo3(i32* noalias nocapture readonly %src) {
+entry:
+ br label %loop.header
+
+loop.header:
+ %iv = phi i64 [ 0, %entry ], [ %inc, %loop.latch ]
+ %r1 = phi i32 [ 0, %entry ], [ %r3, %loop.latch ]
+ %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
+ %src_element = load i32, i32* %arrayidx, align 4
+ %cmp = icmp eq i32 0, %src_element
+ br i1 %cmp, label %loop.if, label %loop.latch
+
+loop.if:
+ %r2 = add i32 %r1, 1
+ br label %loop.latch
+
+loop.latch:
+ %r3 = phi i32 [%r1, %loop.header], [%r2, %loop.if]
+ %inc = add nuw nsw i64 %iv, 1
+ %exitcond = icmp eq i64 %inc, 9
+ br i1 %exitcond, label %loop.end, label %loop.header
+
+loop.end:
+ %r.lcssa = phi i32 [ %r3, %loop.latch ]
+ ret i32 %r.lcssa
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
deleted file mode 100644
index cd8cfd75424f..000000000000
--- a/test/Transforms/LoopUnroll/full-unroll-heuristics-cast.ll
+++ /dev/null
@@ -1,97 +0,0 @@
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
-target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
-
-@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1], align 16
-
-; We should be able to propagate constant data through different types of
-; casts. For example, in this test we have a load, which becomes constant after
-; unrolling, which then is truncated to i8. Obviously, truncated value is also a
-; constant, which can be used in the further simplifications.
-;
-; We expect this loop to be unrolled, because in this case load would become
-; constant, which is 0 in many cases, and which, in its turn, helps to simplify
-; following multiplication and addition. In total, unrolling should help to
-; optimize ~60% of all instructions in this case.
-;
-; CHECK-LABEL: @const_load_trunc
-; CHECK-NOT: br i1
-; CHECK: ret i8 %
-define i8 @const_load_trunc(i32* noalias nocapture readonly %src) {
-entry:
- br label %loop
-
-loop: ; preds = %loop, %entry
- %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
- %r = phi i8 [ 0, %entry ], [ %add, %loop ]
- %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
- %src_element = load i32, i32* %arrayidx, align 4
- %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
- %const_array_element = load i32, i32* %array_const_idx, align 4
- %x = trunc i32 %src_element to i8
- %y = trunc i32 %const_array_element to i8
- %mul = mul nsw i8 %x, %y
- %add = add nsw i8 %mul, %r
- %inc = add nuw nsw i64 %iv, 1
- %exitcond86.i = icmp eq i64 %inc, 10
- br i1 %exitcond86.i, label %loop.end, label %loop
-
-loop.end: ; preds = %loop
- %r.lcssa = phi i8 [ %r, %loop ]
- ret i8 %r.lcssa
-}
-
-; The same test as before, but with ZEXT instead of TRUNC.
-; CHECK-LABEL: @const_load_zext
-; CHECK-NOT: br i1
-; CHECK: ret i64 %
-define i64 @const_load_zext(i32* noalias nocapture readonly %src) {
-entry:
- br label %loop
-
-loop: ; preds = %loop, %entry
- %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
- %r = phi i64 [ 0, %entry ], [ %add, %loop ]
- %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
- %src_element = load i32, i32* %arrayidx, align 4
- %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
- %const_array_element = load i32, i32* %array_const_idx, align 4
- %x = zext i32 %src_element to i64
- %y = zext i32 %const_array_element to i64
- %mul = mul nsw i64 %x, %y
- %add = add nsw i64 %mul, %r
- %inc = add nuw nsw i64 %iv, 1
- %exitcond86.i = icmp eq i64 %inc, 10
- br i1 %exitcond86.i, label %loop.end, label %loop
-
-loop.end: ; preds = %loop
- %r.lcssa = phi i64 [ %r, %loop ]
- ret i64 %r.lcssa
-}
-
-; The same test as the first one, but with SEXT instead of TRUNC.
-; CHECK-LABEL: @const_load_sext
-; CHECK-NOT: br i1
-; CHECK: ret i64 %
-define i64 @const_load_sext(i32* noalias nocapture readonly %src) {
-entry:
- br label %loop
-
-loop: ; preds = %loop, %entry
- %iv = phi i64 [ 0, %entry ], [ %inc, %loop ]
- %r = phi i64 [ 0, %entry ], [ %add, %loop ]
- %arrayidx = getelementptr inbounds i32, i32* %src, i64 %iv
- %src_element = load i32, i32* %arrayidx, align 4
- %array_const_idx = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv
- %const_array_element = load i32, i32* %array_const_idx, align 4
- %x = sext i32 %src_element to i64
- %y = sext i32 %const_array_element to i64
- %mul = mul nsw i64 %x, %y
- %add = add nsw i64 %mul, %r
- %inc = add nuw nsw i64 %iv, 1
- %exitcond86.i = icmp eq i64 %inc, 10
- br i1 %exitcond86.i, label %loop.end, label %loop
-
-loop.end: ; preds = %loop
- %r.lcssa = phi i64 [ %r, %loop ]
- ret i64 %r.lcssa
-}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
index f7758fa22008..12ad4f26bf89 100644
--- a/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-cmp.ll
@@ -1,41 +1,8 @@
-; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=50 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=40 | FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
@known_constant = internal unnamed_addr constant [10 x i32] [i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1], align 16
-; We should be able to propagate constant data through comparisons.
-; For example, in this test we have a load, which becomes constant after
-; unrolling, making comparison with 0 also known to be 0 (false) - and that
-; will trigger further simplifications.
-;
-; We expect this loop to be unrolled, because in this case load would become
-; constant, which is always 1, and which, in its turn, helps to simplify
-; following comparison, zero-extension, and addition. In total, unrolling should help to
-; optimize more than 50% of all instructions in this case.
-;
-; CHECK-LABEL: @const_compare
-; CHECK-NOT: br i1 %
-; CHECK: ret i32
-define i32 @const_compare(i32* noalias nocapture readonly %b) {
-entry:
- br label %for.body
-
-for.body: ; preds = %for.inc, %entry
- %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
- %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
- %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
- %x1 = load i32, i32* %arrayidx1, align 4
- %cmp = icmp eq i32 %x1, 0
- %cast = zext i1 %cmp to i32
- %iv.1 = add nuw nsw i64 %iv.0, 1
- %r.1 = add i32 %r.0, %cast
- %exitcond = icmp eq i64 %iv.1, 10
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.inc
- ret i32 %r.1
-}
-
; If we can figure out result of comparison on each iteration, we can resolve
; the depending branch. That means, that the unrolled version of the loop would
; have less code, because we don't need not-taken basic blocks there.
@@ -73,70 +40,6 @@ for.end: ; preds = %for.inc
ret i32 %r.1
}
-; This test is similar to the previous one, but in this we use IV in comparison
-; (not a loaded value as we did there).
-; CHECK-LABEL: @branch_iv
-; CHECK-NOT: br i1 %
-; CHECK: ret i64
-define i64 @branch_iv(i64* noalias nocapture readonly %b) {
-entry:
- br label %for.body
-
-for.body: ; preds = %for.inc, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
- %r.030 = phi i64 [ 0, %entry ], [ %r.1, %for.inc ]
- %cmp3 = icmp eq i64 %indvars.iv, 5
- %tmp3 = add nuw nsw i64 %indvars.iv, 1
- br i1 %cmp3, label %if.then, label %for.inc
-
-if.then: ; preds = %for.body
- %arrayidx2 = getelementptr inbounds i64, i64* %b, i64 %tmp3
- %tmp1 = load i64, i64* %arrayidx2, align 4
- %add = add nsw i64 %tmp1, %r.030
- br label %for.inc
-
-for.inc: ; preds = %if.then, %for.body
- %r.1 = phi i64 [ %add, %if.then ], [ %r.030, %for.body ]
- %exitcond = icmp eq i64 %tmp3, 20
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.inc
- ret i64 %r.1
-}
-
-; Induction variables are often casted to another type, and that shouldn't
-; prevent us from folding branches. Tthis test specifically checks if we can
-; handle this. Other than thatm it's similar to the previous test.
-; CHECK-LABEL: @branch_iv_trunc
-; CHECK-NOT: br i1 %
-; CHECK: ret i32
-define i32 @branch_iv_trunc(i32* noalias nocapture readonly %b) {
-entry:
- br label %for.body
-
-for.body: ; preds = %for.inc, %entry
- %indvars.iv = phi i64 [ 0, %entry ], [ %tmp3, %for.inc ]
- %r.030 = phi i32 [ 0, %entry ], [ %r.1, %for.inc ]
- %tmp2 = trunc i64 %indvars.iv to i32
- %cmp3 = icmp eq i32 %tmp2, 5
- %tmp3 = add nuw nsw i64 %indvars.iv, 1
- br i1 %cmp3, label %if.then, label %for.inc
-
-if.then: ; preds = %for.body
- %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %tmp3
- %tmp1 = load i32, i32* %arrayidx2, align 4
- %add = add nsw i32 %tmp1, %r.030
- br label %for.inc
-
-for.inc: ; preds = %if.then, %for.body
- %r.1 = phi i32 [ %add, %if.then ], [ %r.030, %for.body ]
- %exitcond = icmp eq i64 %tmp3, 10
- br i1 %exitcond, label %for.end, label %for.body
-
-for.end: ; preds = %for.inc
- ret i32 %r.1
-}
-
; Check that we don't crash when we analyze icmp with pointer-typed IV and a
; pointer.
; CHECK-LABEL: @ptr_cmp_crash
@@ -173,35 +76,3 @@ loop.body:
loop.exit:
ret void
}
-
-; Loop unroller should be able to predict that a comparison would become
-; constant if the operands are pointers with the same base and constant
-; offsets.
-; We expect this loop to be unrolled, since most of its instructions would
-; become constant after it.
-; CHECK-LABEL: @ptr_cmp
-; CHECK-NOT: br i1 %
-; CHECK: ret i64
-define i64 @ptr_cmp(i8 * %a) {
-entry:
- %limit = getelementptr i8, i8* %a, i64 40
- %start.iv2 = getelementptr i8, i8* %a, i64 7
- br label %loop.body
-
-loop.body:
- %iv.0 = phi i8* [ %a, %entry ], [ %iv.1, %loop.body ]
- %iv2.0 = phi i8* [ %start.iv2, %entry ], [ %iv2.1, %loop.body ]
- %r.0 = phi i64 [ 0, %entry ], [ %r.1, %loop.body ]
- %cast = ptrtoint i8* %iv.0 to i64
- %cmp = icmp eq i8* %iv2.0, %iv.0
- %sub = sext i1 %cmp to i64
- %mul = mul i64 %sub, %cast
- %r.1 = add i64 %r.0, %mul
- %iv.1 = getelementptr inbounds i8, i8* %iv.0, i64 1
- %iv2.1 = getelementptr inbounds i8, i8* %iv2.0, i64 1
- %exitcond = icmp ne i8* %iv.1, %limit
- br i1 %exitcond, label %loop.body, label %loop.exit
-
-loop.exit:
- ret i64 %r.1
-}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
new file mode 100644
index 000000000000..6ee73b6fe4f3
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-dce.ll
@@ -0,0 +1,38 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@known_constant = internal unnamed_addr constant [10 x i32] [i32 0, i32 0, i32 0, i32 0, i32 1, i32 0, i32 0, i32 0, i32 0, i32 0], align 16
+
+; If a load becomes a constant after loop unrolling, we sometimes can simplify
+; CFG. This test verifies that we handle such cases.
+; After one operand in an instruction is constant-folded and the
+; instruction is simplified, the other operand might become dead.
+; In this test we have::
+; for i in 1..10:
+; r += A[i] * B[i]
+; A[i] is 0 almost at every iteration, so there is no need in loading B[i] at
+; all.
+
+
+; CHECK-LABEL: @unroll_dce
+; CHECK-NOT: br i1 %exitcond, label %for.end, label %for.body
+define i32 @unroll_dce(i32* noalias nocapture readonly %b) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+ %r.0 = phi i32 [ 0, %entry ], [ %r.1, %for.body ]
+ %arrayidx1 = getelementptr inbounds [10 x i32], [10 x i32]* @known_constant, i64 0, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+ %x2 = load i32, i32* %arrayidx2, align 4
+ %mul = mul i32 %x1, %x2
+ %r.1 = add i32 %mul, %r.0
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 %r.1
+}
diff --git a/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll b/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
new file mode 100644
index 000000000000..723a384ea2d1
--- /dev/null
+++ b/test/Transforms/LoopUnroll/full-unroll-heuristics-geps.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -S -loop-unroll -unroll-max-iteration-count-to-analyze=100 -unroll-dynamic-cost-savings-discount=1000 -unroll-threshold=10 -unroll-percent-dynamic-cost-saved-threshold=60 | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; When examining gep-instructions we shouldn't consider them simplified if the
+; corresponding memory access isn't simplified. Doing the opposite might bias
+; our estimate, so that we might decide to unroll even a simple memcpy loop.
+;
+; Thus, the following loop shouldn't be unrolled:
+; CHECK-LABEL: @not_simplified_geps
+; CHECK: br i1 %
+; CHECK: ret void
+define void @not_simplified_geps(i32* noalias %b, i32* noalias %c) {
+entry:
+ br label %for.body
+
+for.body:
+ %iv.0 = phi i64 [ 0, %entry ], [ %iv.1, %for.body ]
+ %arrayidx1 = getelementptr inbounds i32, i32* %b, i64 %iv.0
+ %x1 = load i32, i32* %arrayidx1, align 4
+ %arrayidx2 = getelementptr inbounds i32, i32* %c, i64 %iv.0
+ store i32 %x1, i32* %arrayidx2, align 4
+ %iv.1 = add nuw nsw i64 %iv.0, 1
+ %exitcond = icmp eq i64 %iv.1, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll b/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
index 6b1943f13bac..bdb8566e0dfc 100644
--- a/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
+++ b/test/Transforms/LoopUnroll/high-cost-trip-count-computation.ll
@@ -24,4 +24,38 @@ loopexit: ; preds = %loop
ret i32 0
}
+;; Though SCEV for loop tripcount contains division,
+;; it shouldn't be considered expensive, since the division already
+;; exists in the code and we don't need to expand it once more.
+;; Thus, it shouldn't prevent us from unrolling the loop.
+
+define i32 @test2(i64* %loc, i64 %conv7) {
+; CHECK-LABEL: @test2(
+; CHECK: udiv
+; CHECK: udiv
+; CHECK-NOT: udiv
+; CHECK-LABEL: for.body
+entry:
+ %rem0 = load i64, i64* %loc, align 8
+ %ExpensiveComputation = udiv i64 %rem0, 42 ; <<< Extra computations are added to the trip-count expression
+ br label %bb1
+bb1:
+ %div11 = udiv i64 %ExpensiveComputation, %conv7
+ %cmp.i38 = icmp ugt i64 %div11, 1
+ %div12 = select i1 %cmp.i38, i64 %div11, i64 1
+ br label %for.body
+for.body:
+ %rem1 = phi i64 [ %rem0, %bb1 ], [ %rem2, %for.body ]
+ %k1 = phi i64 [ %div12, %bb1 ], [ %dec, %for.body ]
+ %mul1 = mul i64 %rem1, 48271
+ %rem2 = urem i64 %mul1, 2147483647
+ %dec = add i64 %k1, -1
+ %cmp = icmp eq i64 %dec, 0
+ br i1 %cmp, label %exit, label %for.body
+exit:
+ %rem3 = phi i64 [ %rem2, %for.body ]
+ store i64 %rem3, i64* %loc, align 8
+ ret i32 0
+}
+
!0 = !{i64 1, i64 100}
diff --git a/test/Transforms/LoopUnroll/nsw-tripcount.ll b/test/Transforms/LoopUnroll/nsw-tripcount.ll
deleted file mode 100644
index 98cab32a42a6..000000000000
--- a/test/Transforms/LoopUnroll/nsw-tripcount.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: opt -loop-unroll -S %s | FileCheck %s
-
-; extern void f(int);
-; void test1(int v) {
-; for (int i=v; i<=v+1; ++i)
-; f(i);
-; }
-;
-; We can use the nsw information to see that the tripcount will be 2, so the
-; loop should be unrolled as this is always beneficial
-
-declare void @f(i32)
-
-; CHECK-LABEL: @test1
-define void @test1(i32 %v) {
-entry:
- %add = add nsw i32 %v, 1
- br label %for.body
-
-for.body:
- %i.04 = phi i32 [ %v, %entry ], [ %inc, %for.body ]
- tail call void @f(i32 %i.04)
- %inc = add nsw i32 %i.04, 1
- %cmp = icmp slt i32 %i.04, %add
- br i1 %cmp, label %for.body, label %for.end
-
-; CHECK: call void @f
-; CHECK-NOT: br i1
-; CHECK: call void @f
-for.end:
- ret void
-}
diff --git a/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll b/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll
new file mode 100644
index 000000000000..a68a9ef2730b
--- /dev/null
+++ b/test/Transforms/LoopUnroll/partial-unroll-const-bounds.ll
@@ -0,0 +1,29 @@
+; RUN: opt < %s -S -unroll-threshold=20 -loop-unroll -unroll-allow-partial -unroll-runtime -unroll-allow-remainder -unroll-dynamic-cost-savings-discount=0 | FileCheck %s
+
+; The Loop TripCount is 9. However unroll factors 3 or 9 exceed given threshold.
+; The test checks that we choose a smaller, power-of-two, unroll count and do not give up on unrolling.
+
+; CHECK: for.body:
+; CHECK: store
+; CHECK: for.body.1:
+; CHECK: store
+
+define void @foo(i32* nocapture %a, i32* nocapture readonly %b) nounwind uwtable {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 1, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %ld = load i32, i32* %arrayidx, align 4
+ %idxprom1 = sext i32 %ld to i64
+ %arrayidx2 = getelementptr inbounds i32, i32* %a, i64 %idxprom1
+ %st = trunc i64 %indvars.iv to i32
+ store i32 %st, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 10
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll b/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
deleted file mode 100644
index e5e0151761bf..000000000000
--- a/test/Transforms/LoopUnroll/partial-unroll-optsize.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: opt < %s -S -loop-unroll -unroll-allow-partial | FileCheck %s
-; RUN: sed -e 's/optsize/minsize/' %s | opt -S -loop-unroll -unroll-allow-partial | FileCheck %s
-
-; Loop size = 3, when the function has the optsize attribute, the
-; OptSizeUnrollThreshold, i.e. 50, is used, hence the loop should be unrolled
-; by 32 times because (1 * 32) + 2 < 50 (whereas (1 * 64 + 2) is not).
-define void @unroll_opt_for_size() nounwind optsize {
-entry:
- br label %loop
-
-loop:
- %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
- %inc = add i32 %iv, 1
- %exitcnd = icmp uge i32 %inc, 1024
- br i1 %exitcnd, label %exit, label %loop
-
-exit:
- ret void
-}
-
-; CHECK: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: add
-; CHECK-NEXT: icmp
diff --git a/test/Transforms/LoopUnroll/pr27157.ll b/test/Transforms/LoopUnroll/pr27157.ll
new file mode 100644
index 000000000000..917bcf1f9448
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr27157.ll
@@ -0,0 +1,53 @@
+; RUN: opt -loop-unroll -debug-only=loop-unroll -disable-output < %s
+; REQUIRES: asserts
+; Compile this test with debug flag on to verify domtree right after loop unrolling.
+target datalayout = "E-m:e-i1:8:16-i8:8:16-i64:64-f128:64-v128:64-a:8:16-n32:64"
+
+; PR27157
+define void @foo() {
+entry:
+ br label %loop_header
+loop_header:
+ %iv = phi i64 [ 0, %entry ], [ %iv_next, %loop_latch ]
+ br i1 undef, label %loop_latch, label %loop_exiting_bb1
+loop_exiting_bb1:
+ br i1 false, label %loop_exiting_bb2, label %exit1.loopexit
+loop_exiting_bb2:
+ br i1 false, label %loop_latch, label %bb
+bb:
+ br label %exit1
+loop_latch:
+ %iv_next = add nuw nsw i64 %iv, 1
+ %cmp = icmp ne i64 %iv_next, 2
+ br i1 %cmp, label %loop_header, label %exit2
+exit1.loopexit:
+ br label %exit1
+exit1:
+ ret void
+exit2:
+ ret void
+}
+
+define void @foo2() {
+entry:
+ br label %loop.header
+loop.header:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %latch ]
+ %iv.inc = add i32 %iv, 1
+ br i1 undef, label %diamond, label %latch
+diamond:
+ br i1 undef, label %left, label %right
+left:
+ br i1 undef, label %exit, label %merge
+right:
+ br i1 undef, label %exit, label %merge
+merge:
+ br label %latch
+latch:
+ %end.cond = icmp eq i32 %iv, 1
+ br i1 %end.cond, label %exit1, label %loop.header
+exit:
+ ret void
+exit1:
+ ret void
+}
diff --git a/test/Transforms/LoopUnroll/pr28132.ll b/test/Transforms/LoopUnroll/pr28132.ll
new file mode 100644
index 000000000000..dc2c0b880239
--- /dev/null
+++ b/test/Transforms/LoopUnroll/pr28132.ll
@@ -0,0 +1,77 @@
+; RUN: opt -loop-unroll -S < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc"
+
+declare void @fn1(i8*)
+
+declare i1 @fn2(i8*, i8*)
+
+define void @fn4() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %i.05 = phi i8 [ 0, %entry ], [ %inc, %for.inc ]
+ store i8 undef, i8* undef, align 4
+ invoke void @fn1(i8* undef)
+ to label %call.i.noexc unwind label %ehcleanup
+
+call.i.noexc: ; preds = %for.body
+ %call1.i2 = invoke i1 @fn2(i8* undef, i8* undef)
+ to label %call1.i.noexc unwind label %ehcleanup
+
+call1.i.noexc: ; preds = %call.i.noexc
+ br i1 undef, label %if.then.i, label %if.end4.i
+
+if.then.i: ; preds = %call1.i.noexc
+ %tmp1 = load i8, i8* undef, align 4
+ %tobool.i = icmp eq i8 undef, undef
+ br i1 undef, label %if.end4.i, label %if.then2.i
+
+if.then2.i: ; preds = %if.then.i
+ %call3.i3 = invoke i1 @fn2(i8* undef, i8* null)
+ to label %call3.i.noexc unwind label %ehcleanup
+
+call3.i.noexc: ; preds = %if.then2.i
+ br label %if.end4.i
+
+if.end4.i: ; preds = %call3.i.noexc, %if.then.i, %call1.i.noexc
+ %tmp2 = load i8, i8* undef, align 4
+ br label %if.then6.i
+
+if.then6.i: ; preds = %if.end4.i
+ %call7.i4 = invoke i1 @fn2(i8* undef, i8* null)
+ to label %call7.i.noexc unwind label %ehcleanup
+
+call7.i.noexc: ; preds = %if.then6.i
+ br label %fn3
+
+fn3: ; preds = %call7.i.noexc
+ %tmp3 = load i8, i8* undef, align 4
+ %inc.i = add nsw i8 undef, undef
+ store i8 undef, i8* undef, align 4
+ br label %for.inc
+
+for.inc: ; preds = %fn3
+ %inc = add nsw i8 %i.05, 1
+ %cmp = icmp slt i8 %inc, 6
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end: ; preds = %for.inc
+ invoke void @throw()
+ to label %unreachable unwind label %ehcleanup
+
+ehcleanup: ; preds = %for.end, %if.then6.i, %if.then2.i, %call.i.noexc, %for.body
+ %cp = cleanuppad within none []
+ cleanupret from %cp unwind to caller
+
+; CHECK: cleanuppad
+; CHECK-NOT: cleanuppad
+
+unreachable: ; preds = %for.end
+ unreachable
+}
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @throw()
diff --git a/test/Transforms/LoopUnroll/rebuild_lcssa.ll b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
index 49498492344a..9de638ce4f80 100644
--- a/test/Transforms/LoopUnroll/rebuild_lcssa.ll
+++ b/test/Transforms/LoopUnroll/rebuild_lcssa.ll
@@ -117,3 +117,37 @@ Exit:
%a_lcssa2 = phi i8* [ %a_lcssa1, %L2_exit ]
ret void
}
+
+; PR26688
+; CHECK-LABEL: @foo4
+define i8 @foo4() {
+entry:
+ br label %L1_header
+
+L1_header:
+ %x = icmp eq i32 1, 0
+ br label %L2_header
+
+L2_header:
+ br label %L3_header
+
+L3_header:
+ br i1 true, label %L2_header, label %L3_exiting
+
+L3_exiting:
+ br i1 true, label %L3_body, label %L1_latch
+
+; CHECK: L3_body:
+; CHECK-NEXT: %x.lcssa = phi i1
+L3_body:
+ br i1 %x, label %L3_latch, label %L3_latch
+
+L3_latch:
+ br i1 false, label %L3_header, label %exit
+
+L1_latch:
+ br label %L1_header
+
+exit:
+ ret i8 0
+}
diff --git a/test/Transforms/LoopUnroll/runtime-loop.ll b/test/Transforms/LoopUnroll/runtime-loop.ll
index fea15b65d5a5..2fc4dbd26c77 100644
--- a/test/Transforms/LoopUnroll/runtime-loop.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop.ll
@@ -1,18 +1,30 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
; Tests for unrolling loops with run-time trip counts
-; CHECK: %xtraiter = and i32 %n
-; CHECK: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK: br i1 %lcmp.mod, label %for.body.prol, label %for.body.preheader.split
+; EPILOG: %xtraiter = and i32 %n
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG: br i1 %lcmp.mod, label %for.body.epil.preheader, label %for.end.loopexit
+
+; PROLOG: %xtraiter = and i32 %n
+; PROLOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG: br i1 %lcmp.mod, label %for.body.prol.preheader, label %for.body.prol.loopexit
+
+; EPILOG: for.body.epil:
+; EPILOG: %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body.epil ], [ %indvars.iv.unr, %for.body.epil.preheader ]
+; EPILOG: %epil.iter.sub = sub i32 %epil.iter, 1
+; EPILOG: %epil.iter.cmp = icmp ne i32 %epil.iter.sub, 0
+; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.end.loopexit.epilog-lcssa, !llvm.loop !0
+
+; PROLOG: for.body.prol:
+; PROLOG: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.prol.preheader ]
+; PROLOG: %prol.iter.sub = sub i32 %prol.iter, 1
+; PROLOG: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
+; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit, !llvm.loop !0
-; CHECK: for.body.prol:
-; CHECK: %indvars.iv.prol = phi i64 [ %indvars.iv.next.prol, %for.body.prol ], [ 0, %for.body.preheader ]
-; CHECK: %prol.iter.sub = sub i32 %prol.iter, 1
-; CHECK: %prol.iter.cmp = icmp ne i32 %prol.iter.sub, 0
-; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split, !llvm.loop !0
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
@@ -39,8 +51,11 @@ for.end: ; preds = %for.body, %entry
; Still try to completely unroll loops with compile-time trip counts
; even if the -unroll-runtime is specified
-; CHECK: for.body:
-; CHECK-NOT: for.body.prol:
+; EPILOG: for.body:
+; EPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body:
+; PROLOG-NOT: for.body.prol:
define i32 @test1(i32* nocapture %a) nounwind uwtable readonly {
entry:
@@ -64,7 +79,8 @@ for.end: ; preds = %for.body
; This is test 2007-05-09-UnknownTripCount.ll which can be unrolled now
; if the -unroll-runtime option is turned on
-; CHECK: bb72.2:
+; EPILOG: bb72.2:
+; PROLOG: bb72.2:
define void @foo(i32 %trips) {
entry:
@@ -86,8 +102,11 @@ cond_true138:
; Test run-time unrolling for a loop that counts down by -2.
-; CHECK: for.body.prol:
-; CHECK: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.preheader.split
+; EPILOG: for.body.epil:
+; EPILOG: br i1 %epil.iter.cmp, label %for.body.epil, label %for.cond.for.end_crit_edge.epilog-lcssa
+
+; PROLOG: for.body.prol:
+; PROLOG: br i1 %prol.iter.cmp, label %for.body.prol, label %for.body.prol.loopexit
define zeroext i16 @down(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
@@ -116,8 +135,11 @@ for.end: ; preds = %for.cond.for.end_cr
}
; Test run-time unrolling disable metadata.
-; CHECK: for.body:
-; CHECK-NOT: for.body.prol:
+; EPILOG: for.body:
+; EPILOG-NOT: for.body.epil:
+
+; PROLOG: for.body:
+; PROLOG-NOT: for.body.prol:
define zeroext i16 @test2(i16* nocapture %p, i32 %len) nounwind uwtable readonly {
entry:
@@ -148,6 +170,8 @@ for.end: ; preds = %for.cond.for.end_cr
!0 = distinct !{!0, !1}
!1 = !{!"llvm.loop.unroll.runtime.disable"}
-; CHECK: !0 = distinct !{!0, !1}
-; CHECK: !1 = !{!"llvm.loop.unroll.disable"}
+; EPILOG: !0 = distinct !{!0, !1}
+; EPILOG: !1 = !{!"llvm.loop.unroll.disable"}
+; PROLOG: !0 = distinct !{!0, !1}
+; PROLOG: !1 = !{!"llvm.loop.unroll.disable"}
diff --git a/test/Transforms/LoopUnroll/runtime-loop1.ll b/test/Transforms/LoopUnroll/runtime-loop1.ll
index dcf159a09a1d..a2e2f8811d63 100644
--- a/test/Transforms/LoopUnroll/runtime-loop1.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop1.ll
@@ -1,19 +1,35 @@
-; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-runtime -unroll-count=2 -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; This tests that setting the unroll count works
-; CHECK: for.body.preheader:
-; CHECK: br {{.*}} label %for.body.prol, label %for.body.preheader.split, !dbg [[PH_LOC:![0-9]+]]
-; CHECK: for.body.prol:
-; CHECK: br label %for.body.preheader.split, !dbg [[BODY_LOC:![0-9]+]]
-; CHECK: for.body.preheader.split:
-; CHECK: br {{.*}} label %for.end.loopexit, label %for.body.preheader.split.split, !dbg [[PH_LOC]]
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
-; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
-; CHECK-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
-; CHECK-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+; EPILOG: for.body.preheader:
+; EPILOG: br i1 %1, label %for.end.loopexit.unr-lcssa, label %for.body.preheader.new, !dbg [[PH_LOC:![0-9]+]]
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.1, label %for.end.loopexit.unr-lcssa.loopexit, label %for.body, !dbg [[BODY_LOC:![0-9]+]]
+; EPILOG-NOT: br i1 %niter.ncmp.2, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil.preheader:
+; EPILOG: br label %for.body.epil, !dbg [[EXIT_LOC:![0-9]+]]
+; EPILOG: for.body.epil:
+; EPILOG: br label %for.end.loopexit.epilog-lcssa, !dbg [[BODY_LOC:![0-9]+]]
+
+; EPILOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
+; EPILOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
+; EPILOG-DAG: [[EXIT_LOC]] = !DILocation(line: 103, column: 1, scope: !{{.*}})
+
+; PROLOG: for.body.preheader:
+; PROLOG: br {{.*}} label %for.body.prol.preheader, label %for.body.prol.loopexit, !dbg [[PH_LOC:![0-9]+]]
+; PROLOG: for.body.prol:
+; PROLOG: br label %for.body.prol.loopexit, !dbg [[BODY_LOC:![0-9]+]]
+; PROLOG: for.body.prol.loopexit:
+; PROLOG: br {{.*}} label %for.end.loopexit, label %for.body.preheader.new, !dbg [[PH_LOC]]
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.1, label %for.end.loopexit.unr-lcssa, label %for.body, !dbg [[BODY_LOC]]
+; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+
+; PROLOG-DAG: [[PH_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
+; PROLOG-DAG: [[BODY_LOC]] = !DILocation(line: 102, column: 1, scope: !{{.*}})
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly !dbg !6 {
entry:
@@ -37,6 +53,7 @@ for.end: ; preds = %for.body, %entry
}
!llvm.module.flags = !{!0, !1, !2}
+!llvm.dbg.cu = !{!11}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{i32 1, !"PIC Level", i32 2}
@@ -44,8 +61,12 @@ for.end: ; preds = %for.body, %entry
!3 = !{}
!4 = !DISubroutineType(types: !3)
!5 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !3)
+!6 = distinct !DISubprogram(name: "test", scope: !5, file: !5, line: 99, type: !4, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, unit: !11, variables: !3)
!7 = !DILocation(line: 100, column: 1, scope: !6)
!8 = !DILocation(line: 101, column: 1, scope: !6)
!9 = !DILocation(line: 102, column: 1, scope: !6)
!10 = !DILocation(line: 103, column: 1, scope: !6)
+!11 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !5,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
diff --git a/test/Transforms/LoopUnroll/runtime-loop2.ll b/test/Transforms/LoopUnroll/runtime-loop2.ll
index 7c6bb9690551..63b386c08d05 100644
--- a/test/Transforms/LoopUnroll/runtime-loop2.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop2.ll
@@ -1,12 +1,18 @@
-; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-count=8 | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -loop-unroll -unroll-threshold=25 -unroll-runtime -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Choose a smaller, power-of-two, unroll count if the loop is too large.
; This test makes sure we're not unrolling 'odd' counts
-; CHECK: for.body.prol:
-; CHECK: for.body:
-; CHECK: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
-; CHECK-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
+; EPILOG: for.body:
+; EPILOG: br i1 %niter.ncmp.3, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
+; EPILOG-NOT: br i1 %niter.ncmp.4, label %for.end.loopexit.unr-lcssa.loopexit{{.*}}, label %for.body
+; EPILOG: for.body.epil:
+
+; PROLOG: for.body.prol:
+; PROLOG: for.body:
+; PROLOG: br i1 %exitcond.3, label %for.end.loopexit{{.*}}, label %for.body
+; PROLOG-NOT: br i1 %exitcond.4, label %for.end.loopexit{{.*}}, label %for.body
define i32 @test(i32* nocapture %a, i32 %n) nounwind uwtable readonly {
entry:
diff --git a/test/Transforms/LoopUnroll/runtime-loop4.ll b/test/Transforms/LoopUnroll/runtime-loop4.ll
index 5014c739e4c9..8f1589134f75 100644
--- a/test/Transforms/LoopUnroll/runtime-loop4.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop4.ll
@@ -1,13 +1,21 @@
-; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s
+; RUN: opt < %s -S -O2 -unroll-runtime=true | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -O2 -unroll-runtime=true -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
; Check runtime unrolling prologue can be promoted by LICM pass.
-; CHECK: entry:
-; CHECK: %xtraiter
-; CHECK: %lcmp.mod
-; CHECK: loop1:
-; CHECK: br i1 %lcmp.mod
-; CHECK: loop2.prol:
+; EPILOG: entry:
+; EPILOG: %xtraiter
+; EPILOG: %lcmp.mod
+; EPILOG: loop1:
+; EPILOG: br i1 %lcmp.mod
+; EPILOG: loop2.epil:
+
+; PROLOG: entry:
+; PROLOG: %xtraiter
+; PROLOG: %lcmp.mod
+; PROLOG: loop1:
+; PROLOG: br i1 %lcmp.mod
+; PROLOG: loop2.prol:
define void @unroll(i32 %iter, i32* %addr1, i32* %addr2) nounwind {
entry:
diff --git a/test/Transforms/LoopUnroll/runtime-loop5.ll b/test/Transforms/LoopUnroll/runtime-loop5.ll
index e8d51775ce18..78a3eeb1a37a 100644
--- a/test/Transforms/LoopUnroll/runtime-loop5.ll
+++ b/test/Transforms/LoopUnroll/runtime-loop5.ll
@@ -11,9 +11,6 @@ entry:
%cmp1 = icmp eq i3 %n, 0
br i1 %cmp1, label %for.end, label %for.body
-; UNROLL-16-NOT: for.body.prol:
-; UNROLL-4: for.body.prol:
-
for.body: ; preds = %for.body, %entry
; UNROLL-16-LABEL: for.body:
; UNROLL-4-LABEL: for.body:
@@ -39,6 +36,10 @@ for.body: ; preds = %for.body, %entry
; UNROLL-16-LABEL: for.end
; UNROLL-4-LABEL: for.end
+
+; UNROLL-16-NOT: for.body.epil:
+; UNROLL-4: for.body.epil:
+
for.end: ; preds = %for.body, %entry
%sum.0.lcssa = phi i3 [ 0, %entry ], [ %add, %for.body ]
ret i3 %sum.0.lcssa
diff --git a/test/Transforms/LoopUnroll/tripcount-overflow.ll b/test/Transforms/LoopUnroll/tripcount-overflow.ll
index 052077cdd5dd..7156629af6d1 100644
--- a/test/Transforms/LoopUnroll/tripcount-overflow.ll
+++ b/test/Transforms/LoopUnroll/tripcount-overflow.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll | FileCheck %s -check-prefix=EPILOG
+; RUN: opt < %s -S -unroll-runtime -unroll-count=2 -loop-unroll -unroll-runtime-epilog=false | FileCheck %s -check-prefix=PROLOG
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; This test case documents how runtime loop unrolling handles the case
@@ -9,17 +10,28 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
; is divisible by 2. The prologue then branches to the unrolled loop
; and executes the 2^32 iterations there, in groups of 2.
+; EPILOG: entry:
-; CHECK: entry:
-; CHECK-NEXT: %0 = add i32 %N, 1
-; CHECK-NEXT: %xtraiter = and i32 %0, 1
-; CHECK-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
-; CHECK-NEXT: br i1 %lcmp.mod, label %while.body.prol, label %entry.split
+; EPILOG-NEXT: %0 = add i32 %N, 1
+; EPILOG-NEXT: %xtraiter = and i32 %0, 1
+; EPILOG-NEXT: %1 = icmp ult i32 %N, 1
+; EPILOG-NEXT: br i1 %1, label %while.end.unr-lcssa, label %entry.new
+; EPILOG: while.body:
-; CHECK: while.body.prol:
-; CHECK: br label %entry.split
+; EPILOG: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; EPILOG-NEXT: br i1 %lcmp.mod, label %while.body.epil.preheader, label %while.end
+; EPILOG: while.body.epil:
-; CHECK: entry.split:
+; PROLOG: entry:
+; PROLOG-NEXT: %0 = add i32 %N, 1
+; PROLOG-NEXT: %xtraiter = and i32 %0, 1
+; PROLOG-NEXT: %lcmp.mod = icmp ne i32 %xtraiter, 0
+; PROLOG-NEXT: br i1 %lcmp.mod, label %while.body.prol.preheader, label %while.body.prol.loopexit
+; PROLOG: while.body.prol:
+
+; PROLOG: %1 = icmp ult i32 %N, 1
+; PROLOG-NEXT: br i1 %1, label %while.end, label %entry.new
+; PROLOG: while.body:
; Function Attrs: nounwind readnone ssp uwtable
define i32 @foo(i32 %N) {
diff --git a/test/Transforms/LoopUnroll/unroll-cleanup.ll b/test/Transforms/LoopUnroll/unroll-cleanup.ll
index 1e42203876ec..163a469661c8 100644
--- a/test/Transforms/LoopUnroll/unroll-cleanup.ll
+++ b/test/Transforms/LoopUnroll/unroll-cleanup.ll
@@ -4,14 +4,14 @@
; RUN: opt < %s -O2 -S | FileCheck %s
; After loop unroll:
-; %dec18 = add nsw i32 %dec18.in, -1
+; %niter.nsub = add nsw i32 %niter, -1
; ...
-; %dec18.1 = add nsw i32 %dec18, -1
+; %niter.nsub.1 = add nsw i32 %niter.nsub, -1
; should be merged to:
-; %dec18.1 = add nsw i32 %dec18.in, -2
+; %dec18.1 = add nsw i32 %niter, -2
;
; CHECK-LABEL: @_Z3fn1v(
-; CHECK: %dec18.1 = add nsw i32 %dec18.in, -2
+; CHECK: %niter.nsub.1 = add i32 %niter, -2
; ModuleID = '<stdin>'
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/LoopUnroll/unroll-cleanuppad.ll b/test/Transforms/LoopUnroll/unroll-cleanuppad.ll
new file mode 100644
index 000000000000..67f3194f4741
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-cleanuppad.ll
@@ -0,0 +1,40 @@
+; RUN: opt -S -loop-unroll %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+define void @test1() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %entry, %for.inc
+ %phi = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ invoke void @callee(i32 %phi)
+ to label %for.inc unwind label %ehcleanup
+
+for.inc: ; preds = %for.body
+ %inc = add nuw nsw i32 %phi, 1
+ %cmp = icmp slt i32 %inc, 3
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.inc
+ call void @dtor()
+ ret void
+
+ehcleanup: ; preds = %for.body
+ %cp = cleanuppad within none []
+ call void @dtor() [ "funclet"(token %cp) ]
+ cleanupret from %cp unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: invoke void @callee(i32 0
+
+; CHECK: invoke void @callee(i32 1
+
+; CHECK: invoke void @callee(i32 2
+
+declare void @callee(i32)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @dtor()
diff --git a/test/Transforms/LoopUnroll/unroll-count.ll b/test/Transforms/LoopUnroll/unroll-count.ll
new file mode 100644
index 000000000000..f22c22bab0f1
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-count.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -S -loop-unroll -unroll-count=2 | FileCheck %s
+; Checks that "llvm.loop.unroll.disable" is set when
+; unroll with count set by user has been applied.
+;
+; CHECK-LABEL: @foo(
+; CHECK: llvm.loop.unroll.disable
+
+define void @foo(i32* nocapture %a) {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
diff --git a/test/Transforms/LoopUnroll/unroll-opt-attribute.ll b/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
new file mode 100644
index 000000000000..f4a76c71368d
--- /dev/null
+++ b/test/Transforms/LoopUnroll/unroll-opt-attribute.ll
@@ -0,0 +1,130 @@
+; RUN: opt < %s -S -loop-unroll -unroll-count=4 | FileCheck -check-prefix=CHECK_COUNT4 %s
+; RUN: opt < %s -S -loop-unroll | FileCheck -check-prefix=CHECK_NOCOUNT %s
+
+
+;///////////////////// TEST 1 //////////////////////////////
+
+; This test shows that the loop is unrolled according to the specified
+; unroll factor.
+
+define void @Test1() nounwind {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; CHECK_COUNT4-LABEL: @Test1
+; CHECK_COUNT4: phi
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: icmp
+
+
+;///////////////////// TEST 2 //////////////////////////////
+
+; This test shows that with optnone attribute, the loop is not unrolled
+; even if an unroll factor was specified.
+
+define void @Test2() nounwind optnone noinline {
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %inc, %loop ]
+ %inc = add i32 %iv, 1
+ %exitcnd = icmp uge i32 %inc, 1024
+ br i1 %exitcnd, label %exit, label %loop
+
+exit:
+ ret void
+}
+
+; CHECK_COUNT4-LABEL: @Test2
+; CHECK_COUNT4: phi
+; CHECK_COUNT4-NEXT: add
+; CHECK_COUNT4-NEXT: icmp
+
+
+;///////////////////// TEST 3 //////////////////////////////
+
+; This test shows that this loop is fully unrolled by default.
+
+@tab = common global [24 x i32] zeroinitializer, align 4
+
+define i32 @Test3() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+ store i32 %i.05, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 24
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 42
+}
+
+; CHECK_NOCOUNT-LABEL: @Test3
+; CHECK_NOCOUNT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: store
+; CHECK_NOCOUNT-NEXT: ret
+
+
+;///////////////////// TEST 4 //////////////////////////////
+
+; This test shows that with optsize attribute, this loop is not unrolled.
+
+define i32 @Test4() optsize {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %i.05 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %arrayidx = getelementptr inbounds [24 x i32], [24 x i32]* @tab, i32 0, i32 %i.05
+ store i32 %i.05, i32* %arrayidx, align 4
+ %inc = add nuw nsw i32 %i.05, 1
+ %exitcond = icmp eq i32 %inc, 24
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret i32 42
+}
+
+; CHECK_NOCOUNT-LABEL: @Test4
+; CHECK_NOCOUNT: phi
+; CHECK_NOCOUNT: icmp
diff --git a/test/Transforms/LoopUnroll/unroll-pragmas.ll b/test/Transforms/LoopUnroll/unroll-pragmas.ll
index b915b4fdf489..5b405a030a16 100644
--- a/test/Transforms/LoopUnroll/unroll-pragmas.ll
+++ b/test/Transforms/LoopUnroll/unroll-pragmas.ll
@@ -108,6 +108,29 @@ for.end: ; preds = %for.body
!3 = !{!3, !4}
!4 = !{!"llvm.loop.unroll.full"}
+; #pragma clang loop unroll(full)
+; Loop should be fully unrolled, even for optsize.
+;
+; CHECK-LABEL: @loop64_with_full_optsize(
+; CHECK-NOT: br i1
+define void @loop64_with_full_optsize(i32* nocapture %a) optsize {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 64
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !3
+
+for.end: ; preds = %for.body
+ ret void
+}
+
; #pragma clang loop unroll_count(4)
; Loop should be unrolled 4 times.
;
@@ -171,10 +194,6 @@ for.end: ; preds = %for.body, %entry
; should be duplicated (original and 4x unrolled).
;
; CHECK-LABEL: @runtime_loop_with_count4(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body
; CHECK: store
; CHECK: store
@@ -182,6 +201,10 @@ for.end: ; preds = %for.body, %entry
; CHECK: store
; CHECK-NOT: store
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_count4(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -287,10 +310,6 @@ for.end: ; preds = %for.body
; (original and 8x).
;
; CHECK-LABEL: @runtime_loop_with_enable(
-; CHECK: for.body.prol:
-; CHECK: store
-; CHECK-NOT: store
-; CHECK: br i1
; CHECK: for.body:
; CHECK: store i32
; CHECK: store i32
@@ -302,6 +321,10 @@ for.end: ; preds = %for.body
; CHECK: store i32
; CHECK-NOT: store i32
; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
define void @runtime_loop_with_enable(i32* nocapture %a, i32 %b) {
entry:
%cmp3 = icmp sgt i32 %b, 0
@@ -322,3 +345,40 @@ for.end: ; preds = %for.body, %entry
ret void
}
!15 = !{!15, !14}
+
+; #pragma clang loop unroll_count(3)
+; Loop has a runtime trip count. Runtime unrolling should occur and loop
+; should be duplicated (original and 3x unrolled).
+;
+; CHECK-LABEL: @runtime_loop_with_count3(
+; CHECK: for.body
+; CHECK: store
+; CHECK: store
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+; CHECK: for.body.epil:
+; CHECK: store
+; CHECK-NOT: store
+; CHECK: br i1
+define void @runtime_loop_with_count3(i32* nocapture %a, i32 %b) {
+entry:
+ %cmp3 = icmp sgt i32 %b, 0
+ br i1 %cmp3, label %for.body, label %for.end, !llvm.loop !16
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %b
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !16
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+!16 = !{!16, !17}
+!17 = !{!"llvm.loop.unroll.count", i32 3}
diff --git a/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll b/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
index 1a929d68573a..5d763a9b3e70 100644
--- a/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
+++ b/test/Transforms/LoopUnswitch/2011-09-26-EHCrash.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -scalarrepl-ssa -loop-unswitch -disable-output
+; RUN: opt < %s -sroa -loop-unswitch -disable-output
; PR11016
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
target triple = "x86_64-apple-macosx10.7.2"
diff --git a/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll
index d536da1e8b60..a215be9d4877 100644
--- a/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll
+++ b/test/Transforms/LoopUnswitch/2015-06-17-Metadata.ll
@@ -16,23 +16,23 @@ for.body: ; preds = %for.inc, %for.body.
%cmp1 = icmp eq i32 %a, 12345
br i1 %cmp1, label %if.then, label %if.else, !prof !0
; CHECK: %cmp1 = icmp eq i32 %a, 12345
-; CHECK-NEXT: br i1 %cmp1, label %if.then.us, label %if.else, !prof !0
+; CHECK-NEXT: br i1 %cmp1, label %for.body.us, label %for.body, !prof !0
if.then: ; preds = %for.body
-; CHECK: if.then.us:
+; CHECK: for.body.us:
; CHECK: add nsw i32 %{{.*}}, 123
; CHECK: %exitcond.us = icmp eq i32 %inc.us, %b
-; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %if.then.us
+; CHECK: br i1 %exitcond.us, label %for.cond.cleanup, label %for.body.us
%add = add nsw i32 %add.i, 123
br label %for.inc
if.else: ; preds = %for.body
%mul = mul nsw i32 %mul.i, %b
br label %for.inc
-; CHECK: if.else:
+; CHECK: for.body:
; CHECK: %mul = mul nsw i32 %mul.i, %b
; CHECK: %inc = add nuw nsw i32 %inc.i, 1
; CHECK: %exitcond = icmp eq i32 %inc, %b
-; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %if.else
+; CHECK: br i1 %exitcond, label %for.cond.cleanup, label %for.body
for.inc: ; preds = %if.then, %if.else
%mul.p = phi i32 [ %b, %if.then ], [ %mul, %if.else ]
%add.p = phi i32 [ %add, %if.then ], [ %a, %if.else ]
diff --git a/test/Transforms/LoopUnswitch/exponential-behavior.ll b/test/Transforms/LoopUnswitch/exponential-behavior.ll
new file mode 100644
index 000000000000..fb5a1ccf87b8
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/exponential-behavior.ll
@@ -0,0 +1,51 @@
+; RUN: opt -loop-unswitch -S < %s | FileCheck %s
+
+define void @f(i32 %n, i32* %ptr) {
+; CHECK-LABEL: @f(
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %be ]
+ %iv.inc = add i32 %iv, 1
+ %unswitch_cond_root = icmp ne i32 %iv.inc, 42
+ %us.0 = and i1 %unswitch_cond_root, %unswitch_cond_root
+ %us.1 = and i1 %us.0, %us.0
+ %us.2 = and i1 %us.1, %us.1
+ %us.3 = and i1 %us.2, %us.2
+ %us.4 = and i1 %us.3, %us.3
+ %us.5 = and i1 %us.4, %us.4
+ %us.6 = and i1 %us.5, %us.5
+ %us.7 = and i1 %us.6, %us.6
+ %us.8 = and i1 %us.7, %us.7
+ %us.9 = and i1 %us.8, %us.8
+ %us.10 = and i1 %us.9, %us.9
+ %us.11 = and i1 %us.10, %us.10
+ %us.12 = and i1 %us.11, %us.11
+ %us.13 = and i1 %us.12, %us.12
+ %us.14 = and i1 %us.13, %us.13
+ %us.15 = and i1 %us.14, %us.14
+ %us.16 = and i1 %us.15, %us.15
+ %us.17 = and i1 %us.16, %us.16
+ %us.18 = and i1 %us.17, %us.17
+ %us.19 = and i1 %us.18, %us.18
+ %us.20 = and i1 %us.19, %us.19
+ %us.21 = and i1 %us.20, %us.20
+ %us.22 = and i1 %us.21, %us.21
+ %us.23 = and i1 %us.22, %us.22
+ %us.24 = and i1 %us.23, %us.23
+ %us.25 = and i1 %us.24, %us.24
+ %us.26 = and i1 %us.25, %us.25
+ %us.27 = and i1 %us.26, %us.26
+ %us.28 = and i1 %us.27, %us.27
+ %us.29 = and i1 %us.28, %us.28
+ br i1 %us.29, label %leave, label %be
+
+be:
+ store volatile i32 0, i32* %ptr
+ %becond = icmp ult i32 %iv.inc, %n
+ br i1 %becond, label %leave, label %loop
+
+leave:
+ ret void
+}
diff --git a/test/Transforms/LoopUnswitch/guards.ll b/test/Transforms/LoopUnswitch/guards.ll
new file mode 100644
index 000000000000..558853389602
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/guards.ll
@@ -0,0 +1,97 @@
+; RUN: opt -S -loop-unswitch < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define void @f_0(i32 %n, i32* %ptr, i1 %c) {
+; CHECK-LABEL: @f_0(
+; CHECK: loop.us:
+; CHECK-NOT: guard
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+ store volatile i32 0, i32* %ptr
+ %becond = icmp ult i32 %iv.inc, %n
+ br i1 %becond, label %leave, label %loop
+
+leave:
+ ret void
+}
+
+define void @f_1(i32 %n, i32* %ptr, i1 %c_0, i1 %c_1) {
+; CHECK-LABEL: @f_1(
+; CHECK: loop.us.us:
+; CHECK-NOT: guard
+; CHECK: loop.us:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 2) ]
+; CHECK-NOT: guard
+; CHECK: loop.us1:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 1) ]
+; CHECK-NOT: guard
+; CHECK: loop:
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 1) ]
+; CHECK: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"(i32 2) ]
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ call void(i1, ...) @llvm.experimental.guard(i1 %c_0) [ "deopt"(i32 1) ]
+ store volatile i32 0, i32* %ptr
+ call void(i1, ...) @llvm.experimental.guard(i1 %c_1) [ "deopt"(i32 2) ]
+ %becond = icmp ult i32 %iv.inc, %n
+ br i1 %becond, label %leave, label %loop
+
+leave:
+ ret void
+}
+
+; Basic negative test
+
+define void @f_3(i32 %n, i32* %ptr, i1* %c_ptr) {
+; CHECK-LABEL: @f_3(
+; CHECK-NOT: loop.us:
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ %c = load volatile i1, i1* %c_ptr
+ call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+ store volatile i32 0, i32* %ptr
+ %becond = icmp ult i32 %iv.inc, %n
+ br i1 %becond, label %leave, label %loop
+
+leave:
+ ret void
+}
+
+define void @f_4(i32 %n, i32* %ptr, i1 %c) {
+; CHECK-LABEL: @f_4(
+;
+; Demonstrate that unswitching on one guard can cause another guard to
+; be erased (this has implications on what guards we can keep raw
+; pointers to).
+entry:
+ br label %loop
+
+loop:
+ %iv = phi i32 [ 0, %entry ], [ %iv.inc, %loop ]
+ %iv.inc = add i32 %iv, 1
+ call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"(i32 1) ]
+ store volatile i32 0, i32* %ptr
+ %neg = xor i1 %c, 1
+ call void(i1, ...) @llvm.experimental.guard(i1 %neg) [ "deopt"(i32 2) ]
+ %becond = icmp ult i32 %iv.inc, %n
+ br i1 %becond, label %leave, label %loop
+
+leave:
+ ret void
+}
diff --git a/test/Transforms/LoopUnswitch/infinite-loop.ll b/test/Transforms/LoopUnswitch/infinite-loop.ll
index 3d1c895edec9..0aef9092a1fe 100644
--- a/test/Transforms/LoopUnswitch/infinite-loop.ll
+++ b/test/Transforms/LoopUnswitch/infinite-loop.ll
@@ -16,10 +16,10 @@
; CHECK-NEXT: br i1 %a, label %entry.split, label %abort0.split
; CHECK: entry.split:
-; CHECK-NEXT: br i1 %b, label %cond.end, label %abort1.split
+; CHECK-NEXT: br i1 %b, label %for.body, label %abort1.split
-; CHECK: cond.end:
-; CHECK-NEXT: br label %cond.end
+; CHECK: for.body:
+; CHECK-NEXT: br label %for.body
; CHECK: abort0.split:
; CHECK-NEXT: call void @end0() [[NOR_NUW:#[0-9]+]]
diff --git a/test/Transforms/LoopUnswitch/msan.ll b/test/Transforms/LoopUnswitch/msan.ll
new file mode 100644
index 000000000000..a5e10e828a7b
--- /dev/null
+++ b/test/Transforms/LoopUnswitch/msan.ll
@@ -0,0 +1,153 @@
+; RUN: opt < %s -loop-unswitch -verify-loop-info -S < %s 2>&1 | FileCheck %s
+
+@sink = global i32 0, align 4
+@y = global i64 0, align 8
+
+; The following is approximately:
+; void f(bool x, int p, int q) {
+; volatile bool x2 = x;
+; for (int i = 0; i < 1; ++i) {
+; if (x2) {
+; if (y)
+; sink = p;
+; else
+; sink = q;
+; }
+; }
+; }
+; With MemorySanitizer, the loop can not be unswitched on "y", because "y" could
+; be uninitialized when x == false.
+; Test that the branch on "y" is inside the loop (after the first unconditional
+; branch).
+
+define void @may_not_execute(i1 zeroext %x, i32 %p, i32 %q) sanitize_memory {
+; CHECK-LABEL: @may_not_execute(
+entry:
+; CHECK: %[[Y:.*]] = load i64, i64* @y, align 8
+; CHECK: %[[YB:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK-NOT: br i1
+; CHECK: br label
+; CHECK: br i1 %[[YB]]
+
+ %x2 = alloca i8, align 1
+ %frombool1 = zext i1 %x to i8
+ store volatile i8 %frombool1, i8* %x2, align 1
+ %0 = load i64, i64* @y, align 8
+ %tobool3 = icmp eq i64 %0, 0
+ br label %for.body
+
+for.body:
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %x2.0. = load volatile i8, i8* %x2, align 1
+ %tobool2 = icmp eq i8 %x2.0., 0
+ br i1 %tobool2, label %for.inc, label %if.then
+
+if.then:
+ br i1 %tobool3, label %if.else, label %if.then4
+
+if.then4:
+ store volatile i32 %p, i32* @sink, align 4
+ br label %for.inc
+
+if.else:
+ store volatile i32 %q, i32* @sink, align 4
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.01, 1
+ %cmp = icmp slt i32 %inc, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+
+; The same as above, but "y" is a function parameter instead of a global.
+; This shows that it is not enough to suppress hoisting of load instructions,
+; the actual problem is in the speculative branching.
+
+define void @may_not_execute2(i1 zeroext %x, i1 zeroext %y, i32 %p, i32 %q) sanitize_memory {
+; CHECK-LABEL: @may_not_execute2(
+entry:
+; CHECK-NOT: br i1
+; CHECK: br label
+; CHECK: br i1 %y,
+ %x2 = alloca i8, align 1
+ %frombool2 = zext i1 %x to i8
+ store volatile i8 %frombool2, i8* %x2, align 1
+ br label %for.body
+
+for.body:
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ %x2.0. = load volatile i8, i8* %x2, align 1
+ %tobool3 = icmp eq i8 %x2.0., 0
+ br i1 %tobool3, label %for.inc, label %if.then
+
+if.then:
+ br i1 %y, label %if.then5, label %if.else
+
+if.then5:
+ store volatile i32 %p, i32* @sink, align 4
+ br label %for.inc
+
+if.else:
+ store volatile i32 %q, i32* @sink, align 4
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.01, 1
+ %cmp = icmp slt i32 %inc, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+
+; The following is approximately:
+; void f(bool x, int p, int q) {
+; volatile bool x2 = x;
+; for (int i = 0; i < 1; ++i) {
+; if (y)
+; sink = p;
+; else
+; sink = q;
+; }
+; }
+; "if (y)" is guaranteed to execute; the loop can be unswitched.
+
+define void @must_execute(i1 zeroext %x, i32 %p, i32 %q) sanitize_memory {
+; CHECK-LABEL: @must_execute(
+entry:
+; CHECK: %[[Y:.*]] = load i64, i64* @y, align 8
+; CHECK-NEXT: %[[YB:.*]] = icmp eq i64 %[[Y]], 0
+; CHECK-NEXT: br i1 %[[YB]],
+
+ %x2 = alloca i8, align 1
+ %frombool1 = zext i1 %x to i8
+ store volatile i8 %frombool1, i8* %x2, align 1
+ %0 = load i64, i64* @y, align 8
+ %tobool2 = icmp eq i64 %0, 0
+ br label %for.body
+
+for.body:
+ %i.01 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
+ br i1 %tobool2, label %if.else, label %if.then
+
+if.then:
+ store volatile i32 %p, i32* @sink, align 4
+ br label %for.inc
+
+if.else:
+ store volatile i32 %q, i32* @sink, align 4
+ br label %for.inc
+
+for.inc:
+ %inc = add nsw i32 %i.01, 1
+ %cmp = icmp slt i32 %inc, 1
+ br i1 %cmp, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll b/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
new file mode 100644
index 000000000000..aba47f6c628f
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/backedge-overflow.ll
@@ -0,0 +1,166 @@
+; RUN: opt -mtriple=aarch64--linux-gnueabi -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 < %s -S | FileCheck %s
+
+; The following tests contain loops for which SCEV cannot determine the backedge
+; taken count. This is because the backedge taken condition is produced by an
+; icmp with one of the sides being a loop varying non-AddRec expression.
+; However, there is a possibility to normalize this to an AddRec expression
+; using SCEV predicates. This allows us to compute a 'guarded' backedge count.
+; The Loop Vectorizer is able to version to loop in order to use this guarded
+; backedge count and vectorize more loops.
+
+
+; CHECK-LABEL: test_sge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sge(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+ %indvars.next = add i16 %indvars.iv, 1
+ %indvars.ext = zext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp sge i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_uge
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_uge(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N, i32 %Offset) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ 0, %for.body.preheader ]
+ %indvars.next = add i16 %indvars.iv, 1
+
+ %indvars.ext = sext i16 %indvars.iv to i32
+ %indvars.access = add i32 %Offset, %indvars.ext
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.access
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.access
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = add i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.access
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp uge i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_ule
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_ule(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N,
+ i16 %M) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+ %indvars.next = sub i16 %indvars.iv, 1
+ %indvars.ext = zext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp ule i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: test_sle
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.body
+define void @test_sle(i32* noalias %A,
+ i32* noalias %B,
+ i32* noalias %C, i32 %N,
+ i16 %M) {
+entry:
+ %cmp13 = icmp eq i32 %N, 0
+ br i1 %cmp13, label %for.end, label %for.body.preheader
+
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i16 [ %indvars.next, %for.body ], [ %M, %for.body.preheader ]
+ %indvars.next = sub i16 %indvars.iv, 1
+ %indvars.ext = sext i16 %indvars.iv to i32
+
+ %arrayidx = getelementptr inbounds i32, i32* %B, i32 %indvars.ext
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds i32, i32* %C, i32 %indvars.ext
+ %1 = load i32, i32* %arrayidx3, align 4
+
+ %mul4 = mul i32 %1, %0
+
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i32 %indvars.ext
+ store i32 %mul4, i32* %arrayidx7, align 4
+
+ %exitcond = icmp sle i32 %indvars.ext, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll b/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
new file mode 100644
index 000000000000..5129568075f0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/first-order-recurrence.ll
@@ -0,0 +1,299 @@
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @recurrence_1
+;
+; void recurrence_1(int *a, int *b, int n) {
+; for(int i = 0; i < n; i++)
+; b[i] = a[i] + a[i - 1]
+; }
+;
+; CHECK: vector.ph:
+; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %pre_load, i32 3
+;
+; CHECK: vector.body:
+; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
+; CHECK: [[L1]] = load <4 x i32>
+; CHECK: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; CHECK: middle.block:
+; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
+;
+; CHECK: scalar.ph:
+; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %pre_load, %vector.memcheck ], [ %pre_load, %min.iters.checked ], [ %pre_load, %for.preheader ]
+;
+; CHECK: scalar.body:
+; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
+;
+; UNROLL: vector.body:
+; UNROLL: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ]
+; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; UNROLL: [[L2]] = load <4 x i32>
+; UNROLL: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL: {{.*}} = shufflevector <4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; UNROLL: middle.block:
+; UNROLL: %vector.recur.extract = extractelement <4 x i32> [[L2]], i32 3
+;
+define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) {
+entry:
+ br label %for.preheader
+
+for.preheader:
+ %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
+ %pre_load = load i32, i32* %arrayidx.phi.trans.insert
+ br label %scalar.body
+
+scalar.body:
+ %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
+ %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
+ %1 = load i32, i32* %arrayidx32
+ %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ %add35 = add i32 %1, %0
+ store i32 %add35, i32* %arrayidx34
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.exit, label %scalar.body
+
+for.exit:
+ ret void
+}
+
+; CHECK-LABEL: @recurrence_2
+;
+; int recurrence_2(int *a, int n) {
+; int minmax;
+; for (int i = 0; i < n; ++i)
+; minmax = min(minmax, max(a[i] - a[i-1], 0));
+; return minmax;
+; }
+;
+; CHECK: vector.ph:
+; CHECK: %vector.recur.init = insertelement <4 x i32> undef, i32 %.pre, i32 3
+;
+; CHECK: vector.body:
+; CHECK: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
+; CHECK: [[L1]] = load <4 x i32>
+; CHECK: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; CHECK: middle.block:
+; CHECK: %vector.recur.extract = extractelement <4 x i32> [[L1]], i32 3
+;
+; CHECK: scalar.ph:
+; CHECK: %scalar.recur.init = phi i32 [ %vector.recur.extract, %middle.block ], [ %.pre, %min.iters.checked ], [ %.pre, %for.preheader ]
+;
+; CHECK: scalar.body:
+; CHECK: %scalar.recur = phi i32 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
+;
+; UNROLL: vector.body:
+; UNROLL: %vector.recur = phi <4 x i32> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ]
+; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i32>
+; UNROLL: [[L2]] = load <4 x i32>
+; UNROLL: {{.*}} = shufflevector <4 x i32> %vector.recur, <4 x i32> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL: {{.*}} = shufflevector <4 x i32> [[L1]], <4 x i32> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; UNROLL: middle.block:
+; UNROLL: %vector.recur.extract = extractelement <4 x i32> [[L2]], i32 3
+;
+define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
+entry:
+ %cmp27 = icmp sgt i32 %n, 0
+ br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
+
+for.preheader:
+ %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
+ %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
+ br label %scalar.body
+
+for.cond.cleanup.loopexit:
+ %minmax.0.cond.lcssa = phi i32 [ %minmax.0.cond, %scalar.body ]
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %minmax.0.lcssa = phi i32 [ undef, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
+ ret i32 %minmax.0.lcssa
+
+scalar.body:
+ %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
+ %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
+ %minmax.028 = phi i32 [ undef, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx, align 4
+ %sub3 = sub nsw i32 %1, %0
+ %cmp4 = icmp sgt i32 %sub3, 0
+ %cond = select i1 %cmp4, i32 %sub3, i32 0
+ %cmp5 = icmp slt i32 %minmax.028, %cond
+ %minmax.0.cond = select i1 %cmp5, i32 %minmax.028, i32 %cond
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body
+}
+
+; CHECK-LABEL: @recurrence_3
+;
+; void recurrence_3(short *a, double *b, int n, float f, short p) {
+; b[0] = (double)a[0] - f * (double)p;
+; for (int i = 1; i < n; i++)
+; b[i] = (double)a[i] - f * (double)a[i - 1];
+; }
+;
+;
+; CHECK: vector.ph:
+; CHECK: %vector.recur.init = insertelement <4 x i16> undef, i16 %0, i32 3
+;
+; CHECK: vector.body:
+; CHECK: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ [[L1:%[a-zA-Z0-9.]+]], %vector.body ]
+; CHECK: [[L1]] = load <4 x i16>
+; CHECK: {{.*}} = shufflevector <4 x i16> %vector.recur, <4 x i16> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; CHECK: middle.block:
+; CHECK: %vector.recur.extract = extractelement <4 x i16> [[L1]], i32 3
+;
+; CHECK: scalar.ph:
+; CHECK: %scalar.recur.init = phi i16 [ %vector.recur.extract, %middle.block ], [ %0, %vector.memcheck ], [ %0, %min.iters.checked ], [ %0, %for.preheader ]
+;
+; CHECK: scalar.body:
+; CHECK: %scalar.recur = phi i16 [ %scalar.recur.init, %scalar.ph ], [ {{.*}}, %scalar.body ]
+;
+; UNROLL: vector.body:
+; UNROLL: %vector.recur = phi <4 x i16> [ %vector.recur.init, %vector.ph ], [ [[L2:%[a-zA-Z0-9.]+]], %vector.body ]
+; UNROLL: [[L1:%[a-zA-Z0-9.]+]] = load <4 x i16>
+; UNROLL: [[L2]] = load <4 x i16>
+; UNROLL: {{.*}} = shufflevector <4 x i16> %vector.recur, <4 x i16> [[L1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+; UNROLL: {{.*}} = shufflevector <4 x i16> [[L1]], <4 x i16> [[L2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
+;
+; UNROLL: middle.block:
+; UNROLL: %vector.recur.extract = extractelement <4 x i16> [[L2]], i32 3
+;
+define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) {
+entry:
+ %0 = load i16, i16* %a, align 2
+ %conv = sitofp i16 %0 to double
+ %conv1 = fpext float %f to double
+ %conv2 = sitofp i16 %p to double
+ %mul = fmul fast double %conv2, %conv1
+ %sub = fsub fast double %conv, %mul
+ store double %sub, double* %b, align 8
+ %cmp25 = icmp sgt i32 %n, 1
+ br i1 %cmp25, label %for.preheader, label %for.end
+
+for.preheader:
+ br label %scalar.body
+
+scalar.body:
+ %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
+ %advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ]
+ %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %advars.iv
+ %2 = load i16, i16* %arrayidx5, align 2
+ %conv6 = sitofp i16 %2 to double
+ %conv11 = sitofp i16 %1 to double
+ %mul12 = fmul fast double %conv11, %conv1
+ %sub13 = fsub fast double %conv6, %mul12
+ %arrayidx15 = getelementptr inbounds double, double* %b, i64 %advars.iv
+ store double %sub13, double* %arrayidx15, align 8
+ %advars.iv.next = add nuw nsw i64 %advars.iv, 1
+ %lftr.wideiv = trunc i64 %advars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %n
+ br i1 %exitcond, label %for.end.loopexit, label %scalar.body
+
+for.end.loopexit:
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: @PR26734
+;
+; void PR26734(short *a, int *b, int *c, int d, short *e) {
+; for (; d != 21; d++) {
+; *b &= *c;
+; *e = *a - 6;
+; *c = *e;
+; }
+; }
+;
+; CHECK-NOT: vector.ph:
+;
+define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
+entry:
+ %cmp4 = icmp eq i32 %d, 21
+ br i1 %cmp4, label %entry.for.end_crit_edge, label %for.body.lr.ph
+
+entry.for.end_crit_edge:
+ %.pre = load i32, i32* %b, align 4
+ br label %for.end
+
+for.body.lr.ph:
+ %0 = load i16, i16* %a, align 2
+ %sub = add i16 %0, -6
+ %conv2 = sext i16 %sub to i32
+ %c.promoted = load i32, i32* %c, align 4
+ %b.promoted = load i32, i32* %b, align 4
+ br label %for.body
+
+for.body:
+ %inc7 = phi i32 [ %d, %for.body.lr.ph ], [ %inc, %for.body ]
+ %and6 = phi i32 [ %b.promoted, %for.body.lr.ph ], [ %and, %for.body ]
+ %conv25 = phi i32 [ %c.promoted, %for.body.lr.ph ], [ %conv2, %for.body ]
+ %and = and i32 %and6, %conv25
+ %inc = add nsw i32 %inc7, 1
+ %cmp = icmp eq i32 %inc, 21
+ br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
+
+for.cond.for.end_crit_edge:
+ %and.lcssa = phi i32 [ %and, %for.body ]
+ store i32 %conv2, i32* %c, align 4
+ store i32 %and.lcssa, i32* %b, align 4
+ store i16 %sub, i16* %e, align 2
+ br label %for.end
+
+for.end:
+ ret void
+}
+
+; CHECK-LABEL: @PR27246
+;
+; int PR27246() {
+; unsigned int e, n;
+; for (int i = 1; i < 49; ++i) {
+; for (int k = i; k > 1; --k)
+; e = k;
+; n = e;
+; }
+; return n;
+; }
+;
+; CHECK-NOT: vector.ph:
+;
+define i32 @PR27246() {
+entry:
+ br label %for.cond1.preheader
+
+for.cond1.preheader:
+ %i.016 = phi i32 [ 1, %entry ], [ %inc, %for.cond.cleanup3 ]
+ %e.015 = phi i32 [ undef, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ]
+ br label %for.cond1
+
+for.cond.cleanup:
+ %e.1.lcssa.lcssa = phi i32 [ %e.1.lcssa, %for.cond.cleanup3 ]
+ ret i32 %e.1.lcssa.lcssa
+
+for.cond1:
+ %e.1 = phi i32 [ %k.0, %for.cond1 ], [ %e.015, %for.cond1.preheader ]
+ %k.0 = phi i32 [ %dec, %for.cond1 ], [ %i.016, %for.cond1.preheader ]
+ %cmp2 = icmp sgt i32 %k.0, 1
+ %dec = add nsw i32 %k.0, -1
+ br i1 %cmp2, label %for.cond1, label %for.cond.cleanup3
+
+for.cond.cleanup3:
+ %e.1.lcssa = phi i32 [ %e.1, %for.cond1 ]
+ %inc = add nuw nsw i32 %i.016, 1
+ %exitcond = icmp eq i32 %inc, 49
+ br i1 %exitcond, label %for.cond.cleanup, label %for.cond1.preheader
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
index a0e741a3cdbe..df1f9c619408 100644
--- a/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
+++ b/test/Transforms/LoopVectorize/AArch64/interleaved_cost.ll
@@ -14,6 +14,7 @@ entry:
; 8xi8 and 16xi8 are valid i8 vector types, so the cost of the interleaved
; access group is 2.
+; CHECK: LV: Checking a loop in "test_byte_interleaved_cost"
; CHECK: LV: Found an estimated cost of 2 for VF 8 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
; CHECK: LV: Found an estimated cost of 2 for VF 16 For instruction: %tmp = load i8, i8* %arrayidx0, align 4
@@ -37,3 +38,44 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
+
+%ig.factor.8 = type { double*, double, double, double, double, double, double, double }
+define double @wide_interleaved_group(%ig.factor.8* %s, double %a, double %b, i32 %n) {
+entry:
+ br label %for.body
+
+; Check the default cost of a strided load with a factor that is greater than
+; the maximum allowed. In this test, the interleave factor would be 8, which is
+; not supported.
+
+; CHECK: LV: Checking a loop in "wide_interleaved_group"
+; CHECK: LV: Found an estimated cost of 6 for VF 2 For instruction: %1 = load double, double* %0, align 8
+; CHECK: LV: Found an estimated cost of 0 for VF 2 For instruction: %5 = load double, double* %4, align 8
+; CHECK: LV: Found an estimated cost of 10 for VF 2 For instruction: store double %9, double* %10, align 8
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %r = phi double [ 0.000000e+00, %entry ], [ %12, %for.body ]
+ %0 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 2
+ %1 = load double, double* %0, align 8
+ %2 = fcmp fast olt double %1, %a
+ %3 = select i1 %2, double 0.000000e+00, double %1
+ %4 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 6
+ %5 = load double, double* %4, align 8
+ %6 = fcmp fast olt double %5, %a
+ %7 = select i1 %6, double 0.000000e+00, double %5
+ %8 = fmul fast double %7, %b
+ %9 = fadd fast double %8, %3
+ %10 = getelementptr inbounds %ig.factor.8, %ig.factor.8* %s, i64 %i, i32 3
+ store double %9, double* %10, align 8
+ %11 = fmul fast double %9, %9
+ %12 = fadd fast double %11, %r
+ %i.next = add nuw nsw i64 %i, 1
+ %13 = trunc i64 %i.next to i32
+ %cond = icmp eq i32 %13, %n
+ br i1 %cond, label %for.exit, label %for.body
+
+for.exit:
+ %r.lcssa = phi double [ %12, %for.body ]
+ ret double %r.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
index 51f899c2f645..c7ced757581a 100644
--- a/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
+++ b/test/Transforms/LoopVectorize/AArch64/loop-vectorization-factors.ll
@@ -205,5 +205,63 @@ for.body: ; preds = %for.body, %for.body
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
+; CHECK-LABEL: @add_phifail(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i32>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define void @add_phifail(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+ %cmp8 = icmp sgt i32 %len, 0
+ br i1 %cmp8, label %for.body, label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = add nuw nsw i32 %conv, 2
+ %conv1 = trunc i32 %add to i8
+ %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv1, i8* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+; CHECK-LABEL: @add_phifail2(
+; CHECK: load <16 x i8>, <16 x i8>*
+; CHECK: add nuw nsw <16 x i32>
+; CHECK: store <16 x i8>
+; Function Attrs: nounwind
+define i8 @add_phifail2(i8* noalias nocapture readonly %p, i8* noalias nocapture %q, i32 %len) #0 {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ %ret = trunc i32 %a_phi to i8
+ ret i8 %ret
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %a_phi = phi i32 [ %conv, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %p, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx
+ %conv = zext i8 %0 to i32
+ %add = add nuw nsw i32 %conv, 2
+ %conv1 = trunc i32 %add to i8
+ %arrayidx3 = getelementptr inbounds i8, i8* %q, i64 %indvars.iv
+ store i8 %conv1, i8* %arrayidx3
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %len
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
attributes #0 = { nounwind }
+
diff --git a/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll b/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
new file mode 100644
index 000000000000..8b9589aebba4
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/max-vf-for-interleaved.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -force-vector-interleave=1 -store-to-load-forwarding-conflict-detection=false -loop-vectorize -dce -instcombine -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+%struct.pair = type { i32, i32 }
+
+; Check vectorization of interleaved access groups with positive dependence
+; distances. In this test, the maximum safe dependence distance for
+; vectorization is 16 bytes. Normally, this would lead to a maximum VF of 4.
+; However, for interleaved groups, the effective VF is VF * IF, where IF is the
+; interleave factor. Here, the maximum safe dependence distance is recomputed
+; as 16 / IF bytes, resulting in VF=2. Since IF=2, we should generate <4 x i32>
+; loads and stores instead of <8 x i32> accesses.
+;
+; Note: LAA's conflict detection optimization has to be disabled for this test
+; to be vectorized.
+
+; struct pair {
+; int x;
+; int y;
+; };
+;
+; void max_vf(struct pair *restrict p) {
+; for (int i = 0; i < 1000; i++) {
+; p[i + 2].x = p[i].x
+; p[i + 2].y = p[i].y
+; }
+; }
+
+; CHECK-LABEL: @max_vf
+; CHECK: load <4 x i32>
+; CHECK: store <4 x i32>
+
+define void @max_vf(%struct.pair* noalias nocapture %p) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %0 = add nuw nsw i64 %i, 2
+ %p_i.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 0
+ %p_i_plus_2.x = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 0
+ %1 = load i32, i32* %p_i.x, align 4
+ store i32 %1, i32* %p_i_plus_2.x, align 4
+ %p_i.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %i, i32 1
+ %p_i_plus_2.y = getelementptr inbounds %struct.pair, %struct.pair* %p, i64 %0, i32 1
+ %2 = load i32, i32* %p_i.y, align 4
+ store i32 %2, i32* %p_i_plus_2.y, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp eq i64 %i.next, 1000
+ br i1 %cond, label %for.exit, label %for.body
+
+for.exit:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll b/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
new file mode 100644
index 000000000000..ffe8480138d0
--- /dev/null
+++ b/test/Transforms/LoopVectorize/AArch64/type-shrinkage-insertelt.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S < %s -loop-vectorize -force-vector-width=4 | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; CHECK-LABEL: test0
+define void @test0(i16* noalias %M3) {
+entry:
+ br label %if.then1165.us
+
+if.then1165.us: ; preds = %if.then1165.us, %entry
+ %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ]
+ %conv1177.us = zext i16 undef to i32
+ %add1178.us = add nsw i32 %conv1177.us, undef
+ %conv1179.us = trunc i32 %add1178.us to i16
+ %idxprom1181.us = ashr exact i64 undef, 32
+ %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
+ store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
+ %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
+ %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
+ br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us
+
+for.inc1286.loopexit: ; preds = %if.then1165.us
+ ret void
+}
+
+; CHECK-LABEL: test1
+define void @test1(i16* noalias %M3) {
+entry:
+ br label %if.then1165.us
+
+if.then1165.us: ; preds = %if.then1165.us, %entry
+ %indvars.iv1783 = phi i64 [ 0, %entry ], [ %indvars.iv.next1784, %if.then1165.us ]
+ %fptr = load i32, i32* undef, align 4
+ %conv1177.us = zext i16 undef to i32
+ %add1178.us = add nsw i32 %conv1177.us, %fptr
+ %conv1179.us = trunc i32 %add1178.us to i16
+ %idxprom1181.us = ashr exact i64 undef, 32
+ %arrayidx1185.us = getelementptr inbounds i16, i16* %M3, i64 %idxprom1181.us
+ store i16 %conv1179.us, i16* %arrayidx1185.us, align 2
+ %indvars.iv.next1784 = add nuw nsw i64 %indvars.iv1783, 1
+ %exitcond1785 = icmp eq i64 %indvars.iv.next1784, 16
+ br i1 %exitcond1785, label %for.inc1286.loopexit, label %if.then1165.us
+
+for.inc1286.loopexit: ; preds = %if.then1165.us
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll b/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
new file mode 100644
index 000000000000..369568f6dfaa
--- /dev/null
+++ b/test/Transforms/LoopVectorize/ARM/arm-ieee-vectorize.ll
@@ -0,0 +1,330 @@
+; RUN: opt -mtriple armv7-linux-gnueabihf -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX
+; RUN: opt -mtriple armv8-linux-gnu -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=LINUX
+; RUN: opt -mtriple armv7-unknwon-darwin -loop-vectorize -S %s -debug-only=loop-vectorize -o /dev/null 2>&1 | FileCheck %s --check-prefix=CHECK --check-prefix=DARWIN
+; REQUIRES: asserts
+
+; Testing the ability of the loop vectorizer to tell when SIMD is safe or not
+; regarding IEEE 754 standard.
+; On Linux, we only want the vectorizer to work when -ffast-math flag is set,
+; because NEON is not IEEE compliant.
+; Darwin, on the other hand, doesn't support subnormals, and all optimizations
+; are allowed, even without -ffast-math.
+
+; Integer loops are always vectorizeable
+; CHECK: Checking a loop in "sumi"
+; CHECK: We can vectorize this loop!
+define void @sumi(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %1, %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06
+ store i32 %mul, i32* %arrayidx2, align 4
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; Floating-point loops need fast-math to be vectorizeable
+; LINUX: Checking a loop in "sumf"
+; LINUX: Potentially unsafe FP op prevents vectorization
+; DARWIN: Checking a loop in "sumf"
+; DARWIN: We can vectorize this loop!
+define void @sumf(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06
+ %1 = load float, float* %arrayidx1, align 4
+ %mul = fmul float %0, %1
+ %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06
+ store float %mul, float* %arrayidx2, align 4
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; Integer loops are always vectorizeable
+; CHECK: Checking a loop in "redi"
+; CHECK: We can vectorize this loop!
+define i32 @redi(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, %Red.06
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ %add.lcssa = phi i32 [ %add, %for.body ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %Red.0.lcssa = phi i32 [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ]
+ ret i32 %Red.0.lcssa
+}
+
+; Floating-point loops need fast-math to be vectorizeable
+; LINUX: Checking a loop in "redf"
+; LINUX: Potentially unsafe FP op prevents vectorization
+; DARWIN: Checking a loop in "redf"
+; DARWIN: We can vectorize this loop!
+define float @redf(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07
+ %1 = load float, float* %arrayidx1, align 4
+ %mul = fmul float %0, %1
+ %add = fadd float %Red.06, %mul
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ %add.lcssa = phi float [ %add, %for.body ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %Red.0.lcssa = phi float [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ]
+ ret float %Red.0.lcssa
+}
+
+; Make sure calls that turn into builtins are also covered
+; LINUX: Checking a loop in "fabs"
+; LINUX: Potentially unsafe FP op prevents vectorization
+; DARWIN: Checking a loop in "fabs"
+; DARWIN: We can vectorize this loop!
+define void @fabs(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+entry:
+ %cmp10 = icmp eq i32 %N, 0
+ br i1 %cmp10, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011
+ %1 = load float, float* %arrayidx1, align 4
+ %fabsf = tail call float @fabsf(float %1) #1
+ %conv3 = fmul float %0, %fabsf
+ %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011
+ store float %conv3, float* %arrayidx4, align 4
+ %inc = add nuw nsw i32 %i.011, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+; Integer loops are always vectorizeable
+; CHECK: Checking a loop in "sumi_fast"
+; CHECK: We can vectorize this loop!
+define void @sumi_fast(i32* noalias nocapture readonly %A, i32* noalias nocapture readonly %B, i32* noalias nocapture %C, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i32 %i.06
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %B, i32 %i.06
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %1, %0
+ %arrayidx2 = getelementptr inbounds i32, i32* %C, i32 %i.06
+ store i32 %mul, i32* %arrayidx2, align 4
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; Floating-point loops can be vectorizeable with fast-math
+; CHECK: Checking a loop in "sumf_fast"
+; CHECK: We can vectorize this loop!
+define void @sumf_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.06 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %A, i32 %i.06
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.06
+ %1 = load float, float* %arrayidx1, align 4
+ %mul = fmul fast float %1, %0
+ %arrayidx2 = getelementptr inbounds float, float* %C, i32 %i.06
+ store float %mul, float* %arrayidx2, align 4
+ %inc = add nuw nsw i32 %i.06, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
+
+; Integer loops are always vectorizeable
+; CHECK: Checking a loop in "redi_fast"
+; CHECK: We can vectorize this loop!
+define i32 @redi_fast(i32* noalias nocapture readonly %a, i32* noalias nocapture readonly %b, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %Red.06 = phi i32 [ %add, %for.body ], [ undef, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %i.07
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds i32, i32* %b, i32 %i.07
+ %1 = load i32, i32* %arrayidx1, align 4
+ %mul = mul nsw i32 %1, %0
+ %add = add nsw i32 %mul, %Red.06
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ %add.lcssa = phi i32 [ %add, %for.body ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %Red.0.lcssa = phi i32 [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ]
+ ret i32 %Red.0.lcssa
+}
+
+; Floating-point loops can be vectorizeable with fast-math
+; CHECK: Checking a loop in "redf_fast"
+; CHECK: We can vectorize this loop!
+define float @redf_fast(float* noalias nocapture readonly %a, float* noalias nocapture readonly %b, i32 %N) {
+entry:
+ %cmp5 = icmp eq i32 %N, 0
+ br i1 %cmp5, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %i.07 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %Red.06 = phi float [ %add, %for.body ], [ undef, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds float, float* %a, i32 %i.07
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %b, i32 %i.07
+ %1 = load float, float* %arrayidx1, align 4
+ %mul = fmul fast float %1, %0
+ %add = fadd fast float %mul, %Red.06
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ %add.lcssa = phi float [ %add, %for.body ]
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %Red.0.lcssa = phi float [ undef, %entry ], [ %add.lcssa, %for.end.loopexit ]
+ ret float %Red.0.lcssa
+}
+
+; Make sure calls that turn into builtins are also covered
+; CHECK: Checking a loop in "fabs_fast"
+; CHECK: We can vectorize this loop!
+define void @fabs_fast(float* noalias nocapture readonly %A, float* noalias nocapture readonly %B, float* noalias nocapture %C, i32 %N) {
+entry:
+ %cmp10 = icmp eq i32 %N, 0
+ br i1 %cmp10, label %for.end, label %for.body
+
+for.body: ; preds = %entry, %for.body
+ %i.011 = phi i32 [ %inc, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds float, float* %A, i32 %i.011
+ %0 = load float, float* %arrayidx, align 4
+ %arrayidx1 = getelementptr inbounds float, float* %B, i32 %i.011
+ %1 = load float, float* %arrayidx1, align 4
+ %fabsf = tail call fast float @fabsf(float %1) #2
+ %conv3 = fmul fast float %fabsf, %0
+ %arrayidx4 = getelementptr inbounds float, float* %C, i32 %i.011
+ store float %conv3, float* %arrayidx4, align 4
+ %inc = add nuw nsw i32 %i.011, 1
+ %exitcond = icmp eq i32 %inc, %N
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body, %entry
+ ret void
+}
+
+declare float @fabsf(float)
+
+attributes #1 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind readnone "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="cortex-a8" "target-features"="+dsp,+neon,+vfp3" "unsafe-fp-math"="true" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll b/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
index de6595f64ed3..c88b496e4e9c 100644
--- a/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
+++ b/test/Transforms/LoopVectorize/PowerPC/large-loop-rdx.ll
@@ -12,7 +12,9 @@
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
-; CHECK-NEXT-NOT: fadd
+; CHECK-NEXT: =
+; CHECK-NOT: fadd
+; CHECK-SAME: >
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-ibm-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll b/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
index 2898af2986d3..cdd5f042350a 100644
--- a/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
+++ b/test/Transforms/LoopVectorize/PowerPC/small-loop-rdx.ll
@@ -12,7 +12,9 @@
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
; CHECK-NEXT: fadd
-; CHECK-NEXT-NOT: fadd
+; CHECK-NEXT: =
+; CHECK-NOT: fadd
+; CHECK-SAME: >
target datalayout = "e-m:e-i64:64-n32:64"
target triple = "powerpc64le-ibm-linux-gnu"
diff --git a/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll b/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
new file mode 100644
index 000000000000..8abc25ece35c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/PowerPC/vectorize-only-for-real.ll
@@ -0,0 +1,62 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "E-m:e-i64:64-n32:64"
+target triple = "powerpc64-bgq-linux"
+
+; Function Attrs: nounwind
+define zeroext i32 @test() #0 {
+; CHECK-LABEL: @test
+; CHECK-NOT: x i32>
+
+entry:
+ %a = alloca [1600 x i32], align 4
+ %c = alloca [1600 x i32], align 4
+ %0 = bitcast [1600 x i32]* %a to i8*
+ call void @llvm.lifetime.start(i64 6400, i8* %0) #3
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ %1 = bitcast [1600 x i32]* %c to i8*
+ call void @llvm.lifetime.start(i64 6400, i8* %1) #3
+ %arraydecay = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 0
+ %arraydecay1 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 0
+ %call = call signext i32 @bar(i32* %arraydecay, i32* %arraydecay1) #3
+ br label %for.body6
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv25 = phi i64 [ 0, %entry ], [ %indvars.iv.next26, %for.body ]
+ %arrayidx = getelementptr inbounds [1600 x i32], [1600 x i32]* %a, i64 0, i64 %indvars.iv25
+ %2 = trunc i64 %indvars.iv25 to i32
+ store i32 %2, i32* %arrayidx, align 4
+ %indvars.iv.next26 = add nuw nsw i64 %indvars.iv25, 1
+ %exitcond27 = icmp eq i64 %indvars.iv.next26, 1600
+ br i1 %exitcond27, label %for.cond.cleanup, label %for.body
+
+for.cond.cleanup5: ; preds = %for.body6
+ call void @llvm.lifetime.end(i64 6400, i8* nonnull %1) #3
+ call void @llvm.lifetime.end(i64 6400, i8* %0) #3
+ ret i32 %add
+
+for.body6: ; preds = %for.body6, %for.cond.cleanup
+ %indvars.iv = phi i64 [ 0, %for.cond.cleanup ], [ %indvars.iv.next, %for.body6 ]
+ %s.022 = phi i32 [ 0, %for.cond.cleanup ], [ %add, %for.body6 ]
+ %arrayidx8 = getelementptr inbounds [1600 x i32], [1600 x i32]* %c, i64 0, i64 %indvars.iv
+ %3 = load i32, i32* %arrayidx8, align 4
+ %add = add i32 %3, %s.022
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1600
+ br i1 %exitcond, label %for.cond.cleanup5, label %for.body6
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+declare signext i32 @bar(i32*, i32*) #2
+
+attributes #0 = { nounwind "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { "target-cpu"="a2q" "target-features"="+qpx,-altivec,-bpermd,-crypto,-direct-move,-extdiv,-power8-vector,-vsx" }
+attributes #3 = { nounwind }
+
diff --git a/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
index 65b3919585e3..fed186b9b675 100644
--- a/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
+++ b/test/Transforms/LoopVectorize/PowerPC/vsx-tsvc-s173.ll
@@ -43,7 +43,7 @@ for.end12: ; preds = %for.end, %entry
; CHECK-LABEL: @s173
; CHECK: load <4 x float>, <4 x float>*
-; CHECK: add i64 %index, 16000
+; CHECK: add nsw i64 %1, 16000
; CHECK: ret i32 0
}
diff --git a/test/Transforms/LoopVectorize/X86/avx1.ll b/test/Transforms/LoopVectorize/X86/avx1.ll
index 37977c43ac30..d384a8162ba1 100644
--- a/test/Transforms/LoopVectorize/X86/avx1.ll
+++ b/test/Transforms/LoopVectorize/X86/avx1.ll
@@ -1,11 +1,12 @@
-; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -mattr=avx,+slow-unaligned-mem-32 -S | FileCheck %s --check-prefix=SLOWMEM32 --check-prefix=CHECK
+; RUN: opt < %s -loop-vectorize -mattr=avx,-slow-unaligned-mem-32 -S | FileCheck %s --check-prefix=FASTMEM32 --check-prefix=CHECK
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
-;CHECK-LABEL: @read_mod_write_single_ptr(
-;CHECK: load <8 x float>
-;CHECK: ret i32
+; CHECK-LABEL: @read_mod_write_single_ptr(
+; CHECK: load <8 x float>
+; CHECK: ret i32
define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
@@ -26,9 +27,10 @@ define i32 @read_mod_write_single_ptr(float* nocapture %a, i32 %n) nounwind uwta
}
-;CHECK-LABEL: @read_mod_i64(
-;CHECK: load <2 x i64>
-;CHECK: ret i32
+; CHECK-LABEL: @read_mod_i64(
+; SLOWMEM32: load <2 x i64>
+; FASTMEM32: load <4 x i64>
+; CHECK: ret i32
define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 0
br i1 %1, label %.lr.ph, label %._crit_edge
@@ -47,3 +49,4 @@ define i32 @read_mod_i64(i64* nocapture %a, i32 %n) nounwind uwtable ssp {
._crit_edge: ; preds = %.lr.ph, %0
ret i32 undef
}
+
diff --git a/test/Transforms/LoopVectorize/X86/cost-model.ll b/test/Transforms/LoopVectorize/X86/cost-model.ll
index 013657102e6b..699dd5bf0354 100644
--- a/test/Transforms/LoopVectorize/X86/cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/cost-model.ll
@@ -39,3 +39,44 @@ for.body: ; preds = %for.body, %entry
for.end: ; preds = %for.body
ret void
}
+
+; This function uses a stride that is generally too big to benefit from vectorization without
+; really good support for a gather load. We were not computing an accurate cost for the
+; vectorization and subsequent scalarization of the pointer induction variables.
+
+define float @PR27826(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
+; CHECK-LABEL: @PR27826(
+; CHECK-NOT: <4 x float>
+; CHECK-NOT: <8 x float>
+; CHECK: ret float %s.0.lcssa
+
+entry:
+ %cmp = icmp sgt i32 %n, 0
+ br i1 %cmp, label %preheader, label %for.end
+
+preheader:
+ %t0 = sext i32 %n to i64
+ br label %for
+
+for:
+ %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
+ %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
+ %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ %t1 = load float, float* %arrayidx, align 4
+ %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %t2 = load float, float* %arrayidx3, align 4
+ %add = fadd fast float %t1, %s.02
+ %add4 = fadd fast float %add, %t2
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 8
+ %cmp1 = icmp slt i64 %indvars.iv.next, %t0
+ br i1 %cmp1, label %for, label %loopexit
+
+loopexit:
+ %add4.lcssa = phi float [ %add4, %for ]
+ br label %for.end
+
+for.end:
+ %s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ]
+ ret float %s.0.lcssa
+}
+
diff --git a/test/Transforms/LoopVectorize/X86/force-ifcvt.ll b/test/Transforms/LoopVectorize/X86/force-ifcvt.ll
new file mode 100644
index 000000000000..00764943556d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/force-ifcvt.ll
@@ -0,0 +1,41 @@
+; RUN: opt -loop-vectorize -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind uwtable
+define void @Test(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @Test
+; CHECK: <4 x i32>
+
+for.body: ; preds = %cond.end, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
+ %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0
+ %cmp1 = icmp eq i32 %0, 0
+ %arrayidx3 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx3, align 4, !llvm.mem.parallel_loop_access !0
+ br i1 %cmp1, label %cond.end, label %cond.false
+
+cond.false: ; preds = %for.body
+ %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx7, align 4, !llvm.mem.parallel_loop_access !0
+ %add = add nsw i32 %2, %1
+ br label %cond.end
+
+cond.end: ; preds = %for.body, %cond.false
+ %cond = phi i32 [ %add, %cond.false ], [ %1, %for.body ]
+ store i32 %cond, i32* %arrayidx3, align 4, !llvm.mem.parallel_loop_access !0
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 16
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end: ; preds = %cond.end
+ ret void
+}
+
+attributes #0 = { norecurse nounwind uwtable "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" }
+
+!0 = distinct !{!0}
diff --git a/test/Transforms/LoopVectorize/X86/funclet.ll b/test/Transforms/LoopVectorize/X86/funclet.ll
new file mode 100644
index 000000000000..88f15e7e1485
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/funclet.ll
@@ -0,0 +1,45 @@
+; RUN: opt -S -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+define void @test1() #0 personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @_CxxThrowException(i8* null, i8* null)
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ catchret from %1 to label %try.cont
+
+for.body: ; preds = %for.body, %catch
+ %i.07 = phi i32 [ 0, %catch ], [ %inc, %for.body ]
+ %call = call double @floor(double 1.0) #1 [ "funclet"(token %1) ]
+ %inc = add nuw nsw i32 %i.07, 1
+ %exitcond = icmp eq i32 %inc, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+
+try.cont: ; preds = %for.cond.cleanup
+ ret void
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
+; CHECK: call <16 x double> @llvm.floor.v16f64(<16 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare double @floor(double) #1
+
+attributes #0 = { "target-features"="+sse2" }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/LoopVectorize/X86/gather_scatter.ll b/test/Transforms/LoopVectorize/X86/gather_scatter.ll
new file mode 100644
index 000000000000..222dd7eef6b6
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/gather_scatter.ll
@@ -0,0 +1,236 @@
+; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
+
+;AVX1-NOT: llvm.masked
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc_linux"
+
+; The source code:
+;
+;void foo1(float * __restrict__ in, float * __restrict__ out, int * __restrict__ trigger, int * __restrict__ index) {
+;
+; for (int i=0; i < SIZE; ++i) {
+; if (trigger[i] > 0) {
+; out[i] = in[index[i]] + (float) 0.5;
+; }
+; }
+;}
+
+;AVX512-LABEL: @foo1
+;AVX512: llvm.masked.load.v16i32
+;AVX512: llvm.masked.gather.v16f32
+;AVX512: llvm.masked.store.v16f32
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo1(float* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) {
+entry:
+ %in.addr = alloca float*, align 8
+ %out.addr = alloca float*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %index.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store float* %in, float** %in.addr, align 8
+ store float* %out, float** %out.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32* %index, i32** %index.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %0, 4096
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %3, 0
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32, i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load i32*, i32** %index.addr, align 8
+ %arrayidx3 = getelementptr inbounds i32, i32* %5, i64 %idxprom2
+ %6 = load i32, i32* %arrayidx3, align 4
+ %idxprom4 = sext i32 %6 to i64
+ %7 = load float*, float** %in.addr, align 8
+ %arrayidx5 = getelementptr inbounds float, float* %7, i64 %idxprom4
+ %8 = load float, float* %arrayidx5, align 4
+ %add = fadd float %8, 5.000000e-01
+ %9 = load i32, i32* %i, align 4
+ %idxprom6 = sext i32 %9 to i64
+ %10 = load float*, float** %out.addr, align 8
+ %arrayidx7 = getelementptr inbounds float, float* %10, i64 %idxprom6
+ store float %add, float* %arrayidx7, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %11 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %11, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; The source code
+;void foo2 (In * __restrict__ in, float * __restrict__ out, int * __restrict__ trigger) {
+;
+; for (int i=0; i<SIZE; ++i) {
+; if (trigger[i] > 0) {
+; out[i] = in[i].b + (float) 0.5;
+; }
+; }
+;}
+
+%struct.In = type { float, float }
+
+;AVX512-LABEL: @foo2
+;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> %{{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32
+;AVX512: llvm.masked.store.v16f32
+;AVX512: ret void
+define void @foo2(%struct.In* noalias %in, float* noalias %out, i32* noalias %trigger, i32* noalias %index) #0 {
+entry:
+ %in.addr = alloca %struct.In*, align 8
+ %out.addr = alloca float*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %index.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store %struct.In* %in, %struct.In** %in.addr, align 8
+ store float* %out, float** %out.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32* %index, i32** %index.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %0, 4096
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %3, 0
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32, i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load %struct.In*, %struct.In** %in.addr, align 8
+ %arrayidx3 = getelementptr inbounds %struct.In, %struct.In* %5, i64 %idxprom2
+ %b = getelementptr inbounds %struct.In, %struct.In* %arrayidx3, i32 0, i32 1
+ %6 = load float, float* %b, align 4
+ %add = fadd float %6, 5.000000e-01
+ %7 = load i32, i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load float*, float** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds float, float* %8, i64 %idxprom4
+ store float %add, float* %arrayidx5, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %9 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %9, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
+; The source code
+;struct Out {
+; float a;
+; float b;
+;};
+;void foo3 (In * __restrict__ in, Out * __restrict__ out, int * __restrict__ trigger) {
+;
+; for (int i=0; i<SIZE; ++i) {
+; if (trigger[i] > 0) {
+; out[i].b = in[i].b + (float) 0.5;
+; }
+; }
+;}
+
+;AVX512-LABEL: @foo3
+;AVX512: getelementptr inbounds %struct.In, %struct.In* %in, <16 x i64> %{{.*}}, i32 1
+;AVX512: llvm.masked.gather.v16f32
+;AVX512: fadd <16 x float>
+;AVX512: getelementptr inbounds %struct.Out, %struct.Out* %out, <16 x i64> %{{.*}}, i32 1
+;AVX512: llvm.masked.scatter.v16f32
+;AVX512: ret void
+
+%struct.Out = type { float, float }
+
+define void @foo3(%struct.In* noalias %in, %struct.Out* noalias %out, i32* noalias %trigger) {
+entry:
+ %in.addr = alloca %struct.In*, align 8
+ %out.addr = alloca %struct.Out*, align 8
+ %trigger.addr = alloca i32*, align 8
+ %i = alloca i32, align 4
+ store %struct.In* %in, %struct.In** %in.addr, align 8
+ store %struct.Out* %out, %struct.Out** %out.addr, align 8
+ store i32* %trigger, i32** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %0, 4096
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32*, i32** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32* %2, i64 %idxprom
+ %3 = load i32, i32* %arrayidx, align 4
+ %cmp1 = icmp sgt i32 %3, 0
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32, i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load %struct.In*, %struct.In** %in.addr, align 8
+ %arrayidx3 = getelementptr inbounds %struct.In, %struct.In* %5, i64 %idxprom2
+ %b = getelementptr inbounds %struct.In, %struct.In* %arrayidx3, i32 0, i32 1
+ %6 = load float, float* %b, align 4
+ %add = fadd float %6, 5.000000e-01
+ %7 = load i32, i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load %struct.Out*, %struct.Out** %out.addr, align 8
+ %arrayidx5 = getelementptr inbounds %struct.Out, %struct.Out* %8, i64 %idxprom4
+ %b6 = getelementptr inbounds %struct.Out, %struct.Out* %arrayidx5, i32 0, i32 1
+ store float %add, float* %b6, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %9 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %9, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+declare void @llvm.masked.scatter.v16f32(<16 x float>, <16 x float*>, i32, <16 x i1>)
diff --git a/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll b/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll
new file mode 100644
index 000000000000..ee0a245c5e0d
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/imprecise-through-phis.ll
@@ -0,0 +1,75 @@
+; RUN: opt -S -loop-vectorize -mtriple=x86_64-apple-darwin %s | FileCheck %s
+
+; Two mostly identical functions. The only difference is the presence of
+; fast-math flags on the second. The loop is a pretty simple reduction:
+
+; for (int i = 0; i < 32; ++i)
+; if (arr[i] != 42)
+; tot += arr[i];
+
+define double @sumIfScalar(double* nocapture readonly %arr) {
+; CHECK-LABEL: define double @sumIfScalar
+; CHECK-NOT: <2 x double>
+
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [0, %entry], [%i.next, %next.iter]
+ %tot = phi double [0.0, %entry], [%tot.next, %next.iter]
+
+ %addr = getelementptr double, double* %arr, i32 %i
+ %nextval = load double, double* %addr
+
+ %tst = fcmp une double %nextval, 42.0
+ br i1 %tst, label %do.add, label %no.add
+
+do.add:
+ %tot.new = fadd double %tot, %nextval
+ br label %next.iter
+
+no.add:
+ br label %next.iter
+
+next.iter:
+ %tot.next = phi double [%tot, %no.add], [%tot.new, %do.add]
+ %i.next = add i32 %i, 1
+ %again = icmp ult i32 %i.next, 32
+ br i1 %again, label %loop, label %done
+
+done:
+ ret double %tot.next
+}
+
+define double @sumIfVector(double* nocapture readonly %arr) {
+; CHECK-LABEL: define double @sumIfVector
+; CHECK: <2 x double>
+entry:
+ br label %loop
+
+loop:
+ %i = phi i32 [0, %entry], [%i.next, %next.iter]
+ %tot = phi double [0.0, %entry], [%tot.next, %next.iter]
+
+ %addr = getelementptr double, double* %arr, i32 %i
+ %nextval = load double, double* %addr
+
+ %tst = fcmp fast une double %nextval, 42.0
+ br i1 %tst, label %do.add, label %no.add
+
+do.add:
+ %tot.new = fadd fast double %tot, %nextval
+ br label %next.iter
+
+no.add:
+ br label %next.iter
+
+next.iter:
+ %tot.next = phi double [%tot, %no.add], [%tot.new, %do.add]
+ %i.next = add i32 %i, 1
+ %again = icmp ult i32 %i.next, 32
+ br i1 %again, label %loop, label %done
+
+done:
+ ret double %tot.next
+}
diff --git a/test/Transforms/LoopVectorize/X86/masked_load_store.ll b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
index abe7d6de3f35..1227344daff6 100644
--- a/test/Transforms/LoopVectorize/X86/masked_load_store.ll
+++ b/test/Transforms/LoopVectorize/X86/masked_load_store.ll
@@ -1,9 +1,7 @@
-; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX1
-; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX2
+; RUN: opt < %s -O3 -mcpu=corei7-avx -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX1
+; RUN: opt < %s -O3 -mcpu=core-avx2 -S | FileCheck %s -check-prefix=AVX -check-prefix=AVX2
; RUN: opt < %s -O3 -mcpu=knl -S | FileCheck %s -check-prefix=AVX512
-;AVX1-NOT: llvm.masked
-
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-pc_linux"
@@ -18,18 +16,18 @@ target triple = "x86_64-pc_linux"
; }
;}
-;AVX2-LABEL: @foo1
-;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
-;AVX2: call <8 x i32> @llvm.masked.load.v8i32
-;AVX2: add nsw <8 x i32>
-;AVX2: call void @llvm.masked.store.v8i32
-;AVX2: ret void
+;AVX-LABEL: @foo1
+;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX: call <8 x i32> @llvm.masked.load.v8i32.p0v8i32
+;AVX: add nsw <8 x i32>
+;AVX: call void @llvm.masked.store.v8i32.p0v8i32
+;AVX: ret void
;AVX512-LABEL: @foo1
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
-;AVX512: call <16 x i32> @llvm.masked.load.v16i32
+;AVX512: call <16 x i32> @llvm.masked.load.v16i32.p0v16i32
;AVX512: add nsw <16 x i32>
-;AVX512: call void @llvm.masked.store.v16i32
+;AVX512: call void @llvm.masked.store.v16i32.p0v16i32
;AVX512: ret void
; Function Attrs: nounwind uwtable
@@ -91,6 +89,81 @@ for.end: ; preds = %for.cond
ret void
}
+; The same as @foo1 but all the pointers are address space 1 pointers.
+
+;AVX-LABEL: @foo1_addrspace1
+;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX: call <8 x i32> @llvm.masked.load.v8i32.p1v8i32
+;AVX: add nsw <8 x i32>
+;AVX: call void @llvm.masked.store.v8i32.p1v8i32
+;AVX: ret void
+
+;AVX512-LABEL: @foo1_addrspace1
+;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX512: call <16 x i32> @llvm.masked.load.v16i32.p1v16i32
+;AVX512: add nsw <16 x i32>
+;AVX512: call void @llvm.masked.store.v16i32.p1v16i32
+;AVX512: ret void
+
+; Function Attrs: nounwind uwtable
+define void @foo1_addrspace1(i32 addrspace(1)* %A, i32 addrspace(1)* %B, i32 addrspace(1)* %trigger) {
+entry:
+ %A.addr = alloca i32 addrspace(1)*, align 8
+ %B.addr = alloca i32 addrspace(1)*, align 8
+ %trigger.addr = alloca i32 addrspace(1)*, align 8
+ %i = alloca i32, align 4
+ store i32 addrspace(1)* %A, i32 addrspace(1)** %A.addr, align 8
+ store i32 addrspace(1)* %B, i32 addrspace(1)** %B.addr, align 8
+ store i32 addrspace(1)* %trigger, i32 addrspace(1)** %trigger.addr, align 8
+ store i32 0, i32* %i, align 4
+ br label %for.cond
+
+for.cond: ; preds = %for.inc, %entry
+ %0 = load i32, i32* %i, align 4
+ %cmp = icmp slt i32 %0, 10000
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body: ; preds = %for.cond
+ %1 = load i32, i32* %i, align 4
+ %idxprom = sext i32 %1 to i64
+ %2 = load i32 addrspace(1)*, i32 addrspace(1)** %trigger.addr, align 8
+ %arrayidx = getelementptr inbounds i32, i32 addrspace(1)* %2, i64 %idxprom
+ %3 = load i32, i32 addrspace(1)* %arrayidx, align 4
+ %cmp1 = icmp slt i32 %3, 100
+ br i1 %cmp1, label %if.then, label %if.end
+
+if.then: ; preds = %for.body
+ %4 = load i32, i32* %i, align 4
+ %idxprom2 = sext i32 %4 to i64
+ %5 = load i32 addrspace(1)*, i32 addrspace(1)** %B.addr, align 8
+ %arrayidx3 = getelementptr inbounds i32, i32 addrspace(1)* %5, i64 %idxprom2
+ %6 = load i32, i32 addrspace(1)* %arrayidx3, align 4
+ %7 = load i32, i32* %i, align 4
+ %idxprom4 = sext i32 %7 to i64
+ %8 = load i32 addrspace(1)*, i32 addrspace(1)** %trigger.addr, align 8
+ %arrayidx5 = getelementptr inbounds i32, i32 addrspace(1)* %8, i64 %idxprom4
+ %9 = load i32, i32 addrspace(1)* %arrayidx5, align 4
+ %add = add nsw i32 %6, %9
+ %10 = load i32, i32* %i, align 4
+ %idxprom6 = sext i32 %10 to i64
+ %11 = load i32 addrspace(1)*, i32 addrspace(1)** %A.addr, align 8
+ %arrayidx7 = getelementptr inbounds i32, i32 addrspace(1)* %11, i64 %idxprom6
+ store i32 %add, i32 addrspace(1)* %arrayidx7, align 4
+ br label %if.end
+
+if.end: ; preds = %if.then, %for.body
+ br label %for.inc
+
+for.inc: ; preds = %if.end
+ %12 = load i32, i32* %i, align 4
+ %inc = add nsw i32 %12, 1
+ store i32 %inc, i32* %i, align 4
+ br label %for.cond
+
+for.end: ; preds = %for.cond
+ ret void
+}
+
; The source code:
;
;void foo2(float *A, float *B, int *trigger) {
@@ -102,18 +175,18 @@ for.end: ; preds = %for.cond
; }
;}
-;AVX2-LABEL: @foo2
-;AVX2: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
-;AVX2: call <8 x float> @llvm.masked.load.v8f32
-;AVX2: fadd <8 x float>
-;AVX2: call void @llvm.masked.store.v8f32
-;AVX2: ret void
+;AVX-LABEL: @foo2
+;AVX: icmp slt <8 x i32> %wide.load, <i32 100, i32 100, i32 100
+;AVX: call <8 x float> @llvm.masked.load.v8f32.p0v8f32
+;AVX: fadd <8 x float>
+;AVX: call void @llvm.masked.store.v8f32.p0v8f32
+;AVX: ret void
;AVX512-LABEL: @foo2
;AVX512: icmp slt <16 x i32> %wide.load, <i32 100, i32 100, i32 100
-;AVX512: call <16 x float> @llvm.masked.load.v16f32
+;AVX512: call <16 x float> @llvm.masked.load.v16f32.p0v16f32
;AVX512: fadd <16 x float>
-;AVX512: call void @llvm.masked.store.v16f32
+;AVX512: call void @llvm.masked.store.v16f32.p0v16f32
;AVX512: ret void
; Function Attrs: nounwind uwtable
@@ -187,20 +260,20 @@ for.end: ; preds = %for.cond
; }
;}
-;AVX2-LABEL: @foo3
-;AVX2: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
-;AVX2: call <4 x double> @llvm.masked.load.v4f64
-;AVX2: sitofp <4 x i32> %wide.load to <4 x double>
-;AVX2: fadd <4 x double>
-;AVX2: call void @llvm.masked.store.v4f64
-;AVX2: ret void
+;AVX-LABEL: @foo3
+;AVX: icmp slt <4 x i32> %wide.load, <i32 100, i32 100,
+;AVX: call <4 x double> @llvm.masked.load.v4f64.p0v4f64
+;AVX: sitofp <4 x i32> %wide.load to <4 x double>
+;AVX: fadd <4 x double>
+;AVX: call void @llvm.masked.store.v4f64.p0v4f64
+;AVX: ret void
;AVX512-LABEL: @foo3
;AVX512: icmp slt <8 x i32> %wide.load, <i32 100, i32 100,
-;AVX512: call <8 x double> @llvm.masked.load.v8f64
+;AVX512: call <8 x double> @llvm.masked.load.v8f64.p0v8f64
;AVX512: sitofp <8 x i32> %wide.load to <8 x double>
;AVX512: fadd <8 x double>
-;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
;AVX512: ret void
@@ -275,12 +348,13 @@ for.end: ; preds = %for.cond
; }
;}
-;AVX2-LABEL: @foo4
-;AVX2-NOT: llvm.masked
-;AVX2: ret void
+;AVX-LABEL: @foo4
+;AVX-NOT: llvm.masked
+;AVX: ret void
;AVX512-LABEL: @foo4
-;AVX512-NOT: llvm.masked
+;AVX512-NOT: llvm.masked.load
+;AVX512: llvm.masked.gather
;AVX512: ret void
; Function Attrs: nounwind uwtable
@@ -349,10 +423,10 @@ for.end: ; preds = %for.cond
; The loop here should not be vectorized due to trapping
; constant expression
-;AVX2-LABEL: @foo5
-;AVX2-NOT: llvm.masked
-;AVX2: store i32 sdiv
-;AVX2: ret void
+;AVX-LABEL: @foo5
+;AVX-NOT: llvm.masked
+;AVX: store i32 sdiv
+;AVX: ret void
;AVX512-LABEL: @foo5
;AVX512-NOT: llvm.masked
@@ -430,17 +504,17 @@ for.end: ; preds = %for.cond
;AVX2-LABEL: @foo6
;AVX2: icmp sgt <4 x i32> %reverse, zeroinitializer
;AVX2: shufflevector <4 x i1>{{.*}}<4 x i32> <i32 3, i32 2, i32 1, i32 0>
-;AVX2: call <4 x double> @llvm.masked.load.v4f64
+;AVX2: call <4 x double> @llvm.masked.load.v4f64.p0v4f64
;AVX2: fadd <4 x double>
-;AVX2: call void @llvm.masked.store.v4f64
+;AVX2: call void @llvm.masked.store.v4f64.p0v4f64
;AVX2: ret void
;AVX512-LABEL: @foo6
;AVX512: icmp sgt <8 x i32> %reverse, zeroinitializer
;AVX512: shufflevector <8 x i1>{{.*}}<8 x i32> <i32 7, i32 6, i32 5, i32 4
-;AVX512: call <8 x double> @llvm.masked.load.v8f64
+;AVX512: call <8 x double> @llvm.masked.load.v8f64.p0v8f64
;AVX512: fadd <8 x double>
-;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
;AVX512: ret void
@@ -508,8 +582,8 @@ for.end: ; preds = %for.cond
; }
;AVX512-LABEL: @foo7
-;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64(<8 x double*>*
-;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: call <8 x double*> @llvm.masked.load.v8p0f64.p0v8p0f64(<8 x double*>*
+;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
;AVX512: ret void
define void @foo7(double* noalias %out, double** noalias %in, i8* noalias %trigger, i32 %size) #0 {
@@ -580,8 +654,8 @@ for.end: ; preds = %for.cond
;}
;AVX512-LABEL: @foo8
-;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f(<8 x i32 ()*>* %
-;AVX512: call void @llvm.masked.store.v8f64
+;AVX512: call <8 x i32 ()*> @llvm.masked.load.v8p0f_i32f.p0v8p0f_i32f(<8 x i32 ()*>* %
+;AVX512: call void @llvm.masked.store.v8f64.p0v8f64
;AVX512: ret void
define void @foo8(double* noalias %out, i32 ()** noalias %in, i8* noalias %trigger, i32 %size) #0 {
diff --git a/test/Transforms/LoopVectorize/X86/max-mstore.ll b/test/Transforms/LoopVectorize/X86/max-mstore.ll
new file mode 100644
index 000000000000..a9ac04d45606
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/max-mstore.ll
@@ -0,0 +1,46 @@
+; RUN: opt -basicaa -loop-vectorize -force-vector-interleave=1 -S -mcpu=core-avx2
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@b = common global [256 x i32] zeroinitializer, align 16
+@a = common global [256 x i32] zeroinitializer, align 16
+
+; unsigned int a[256], b[256];
+; void foo() {
+; for (i = 0; i < 256; i++) {
+; if (b[i] > a[i])
+; a[i] = b[i];
+; }
+; }
+
+; CHECK-LABEL: foo
+; CHECK: load <8 x i32>
+; CHECK: icmp ugt <8 x i32>
+; CHECK: masked.store
+
+define void @foo() {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.inc, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ]
+ %arrayidx = getelementptr inbounds [256 x i32], [256 x i32]* @b, i64 0, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %arrayidx2 = getelementptr inbounds [256 x i32], [256 x i32]* @a, i64 0, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx2, align 4
+ %cmp3 = icmp ugt i32 %0, %1
+ br i1 %cmp3, label %if.then, label %for.inc
+
+if.then: ; preds = %for.body
+ store i32 %0, i32* %arrayidx2, align 4
+ br label %for.inc
+
+for.inc: ; preds = %for.body, %if.then
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 256
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.inc
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/no_fpmath.ll b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
index 0bb78ce177fe..055d81a390b0 100644
--- a/test/Transforms/LoopVectorize/X86/no_fpmath.ll
+++ b/test/Transforms/LoopVectorize/X86/no_fpmath.ll
@@ -71,6 +71,7 @@ for.body: ; preds = %for.body.preheader,
attributes #0 = { nounwind }
+!llvm.dbg.cu = !{!28}
!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}
@@ -78,7 +79,7 @@ attributes #0 = { nounwind }
!1 = !{i32 1, !"PIC Level", i32 2}
!2 = !{!"clang version 3.7.0"}
!3 = !DILocation(line: 5, column: 20, scope: !4)
-!4 = distinct !DISubprogram(name: "cond_sum", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!4 = distinct !DISubprogram(name: "cond_sum", scope: !5, file: !5, line: 1, type: !6, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !28, variables: !7)
!5 = !DIFile(filename: "no_fpmath.c", directory: "")
!6 = !DISubroutineType(types: !7)
!7 = !{}
@@ -94,7 +95,7 @@ attributes #0 = { nounwind }
!17 = distinct !{!17, !18}
!18 = !{!"llvm.loop.unroll.disable"}
!19 = !DILocation(line: 16, column: 20, scope: !20)
-!20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, variables: !7)
+!20 = distinct !DISubprogram(name: "cond_sum_loop_hint", scope: !5, file: !5, line: 12, type: !6, isLocal: false, isDefinition: true, scopeLine: 12, flags: DIFlagPrototyped, isOptimized: true, unit: !28, variables: !7)
!21 = !DILocation(line: 16, column: 3, scope: !20)
!22 = !DILocation(line: 17, column: 14, scope: !20)
!23 = !DILocation(line: 20, column: 3, scope: !20)
@@ -102,3 +103,7 @@ attributes #0 = { nounwind }
!25 = !DILocation(line: 17, column: 11, scope: !20)
!26 = distinct !{!26, !27, !18}
!27 = !{!"llvm.loop.vectorize.enable", i1 true}
+!28 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !5,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
diff --git a/test/Transforms/LoopVectorize/X86/propagate-metadata.ll b/test/Transforms/LoopVectorize/X86/propagate-metadata.ll
new file mode 100644
index 000000000000..2825ddbac9d8
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/propagate-metadata.ll
@@ -0,0 +1,25 @@
+; RUN: opt -S -mtriple="x86_64-unknown-linux-gnu" -loop-vectorize < %s | FileCheck %s
+
+; Don't crash on unknown metadata
+; CHECK-LABEL: @no_propagate_range_metadata(
+; CHECK: load <16 x i8>
+; CHECK: store <16 x i8>
+define void @no_propagate_range_metadata(i8* readonly %first.coerce, i8* readnone %last.coerce, i8* nocapture %result) {
+for.body.preheader:
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.preheader
+ %result.addr.05 = phi i8* [ %incdec.ptr, %for.body ], [ %result, %for.body.preheader ]
+ %first.sroa.0.04 = phi i8* [ %incdec.ptr.i.i.i, %for.body ], [ %first.coerce, %for.body.preheader ]
+ %0 = load i8, i8* %first.sroa.0.04, align 1, !range !0
+ store i8 %0, i8* %result.addr.05, align 1
+ %incdec.ptr.i.i.i = getelementptr inbounds i8, i8* %first.sroa.0.04, i64 1
+ %incdec.ptr = getelementptr inbounds i8, i8* %result.addr.05, i64 1
+ %lnot.i = icmp eq i8* %incdec.ptr.i.i.i, %last.coerce
+ br i1 %lnot.i, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ ret void
+}
+
+!0 = !{i8 0, i8 2}
diff --git a/test/Transforms/LoopVectorize/X86/reg-usage.ll b/test/Transforms/LoopVectorize/X86/reg-usage.ll
new file mode 100644
index 000000000000..47a6e1029eda
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/reg-usage.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@a = global [1024 x i8] zeroinitializer, align 16
+@b = global [1024 x i8] zeroinitializer, align 16
+
+define i32 @foo() {
+; This function has a loop of SAD pattern. Here we check when VF = 16 the
+; register usage doesn't exceed 16.
+;
+; CHECK-LABEL: foo
+; CHECK: LV(REG): VF = 4
+; CHECK-NEXT: LV(REG): Found max usage: 4
+; CHECK: LV(REG): VF = 8
+; CHECK-NEXT: LV(REG): Found max usage: 7
+; CHECK: LV(REG): VF = 16
+; CHECK-NEXT: LV(REG): Found max usage: 13
+
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ %add.lcssa = phi i32 [ %add, %for.body ]
+ ret i32 %add.lcssa
+
+for.body:
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1
+ %conv = zext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1
+ %conv3 = zext i8 %1 to i32
+ %sub = sub nsw i32 %conv, %conv3
+ %ispos = icmp sgt i32 %sub, -1
+ %neg = sub nsw i32 0, %sub
+ %2 = select i1 %ispos, i32 %sub, i32 %neg
+ %add = add nsw i32 %2, %s.015
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
+
+define i64 @bar(i64* nocapture %a) {
+; CHECK-LABEL: bar
+; CHECK: LV(REG): VF = 2
+; CHECK: LV(REG): Found max usage: 4
+;
+entry:
+ br label %for.body
+
+for.cond.cleanup:
+ %add2.lcssa = phi i64 [ %add2, %for.body ]
+ ret i64 %add2.lcssa
+
+for.body:
+ %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+ %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
+ %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
+ %0 = load i64, i64* %arrayidx, align 8
+ %add = add nsw i64 %0, %i.012
+ store i64 %add, i64* %arrayidx, align 8
+ %add2 = add nsw i64 %add, %s.011
+ %inc = add nuw nsw i64 %i.012, 1
+ %exitcond = icmp eq i64 %inc, 1024
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body
+}
diff --git a/test/Transforms/LoopVectorize/X86/register-assumption.ll b/test/Transforms/LoopVectorize/X86/register-assumption.ll
new file mode 100644
index 000000000000..1add87db611e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/register-assumption.ll
@@ -0,0 +1,32 @@
+; RUN: opt < %s -loop-vectorize -instcombine -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @test1() {
+entry:
+ %alloca = alloca float, align 4
+ br label %loop_exit.dim.11.critedge
+
+loop_exit.dim.11.critedge: ; preds = %loop_body.dim.0
+ %ptrint = ptrtoint float* %alloca to i64
+ %maskedptr = and i64 %ptrint, 4
+ %maskcond = icmp eq i64 %maskedptr, 0
+ br label %loop_header.dim.017.preheader
+
+loop_header.dim.017.preheader: ; preds = %loop_exit.dim.016, %loop_exit.dim.11.critedge
+ br label %loop_body.dim.018
+
+loop_body.dim.018: ; preds = %loop_body.dim.018, %loop_header.dim.017.preheader
+ %invar_address.dim.019.0135 = phi i64 [ 0, %loop_header.dim.017.preheader ], [ %0, %loop_body.dim.018 ]
+ call void @llvm.assume(i1 %maskcond)
+; CHECK: call void @llvm.assume(
+; CHECK-NOT: call void @llvm.assume(
+ %0 = add nuw nsw i64 %invar_address.dim.019.0135, 1
+ %1 = icmp eq i64 %0, 256
+ br i1 %1, label %loop_header.dim.017.preheader, label %loop_body.dim.018
+}
+
+; Function Attrs: nounwind
+declare void @llvm.assume(i1) #0
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/LoopVectorize/X86/scatter_crash.ll b/test/Transforms/LoopVectorize/X86/scatter_crash.ll
new file mode 100755
index 000000000000..d5c882858ebd
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/scatter_crash.ll
@@ -0,0 +1,218 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -loop-vectorize -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; This test checks vector GEP before scatter.
+; The code bellow crashed due to destroyed SSA while incorrect vectorization of
+; the GEP.
+
+@d = global [10 x [10 x i32]] zeroinitializer, align 16
+@c = external global i32, align 4
+@a = external global i32, align 4
+@b = external global i64, align 8
+
+; Function Attrs: norecurse nounwind ssp uwtable
+define void @_Z3fn1v() #0 {
+; CHECK-LABEL: @_Z3fn1v(
+; CHECK: vector.body:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX:%.*]].next, %vector.body ]
+; CHECK-NEXT: [[VEC_IND:%.*]] = phi <16 x i64> [
+; CHECK-NEXT: [[VEC_IND3:%.*]] = phi <16 x i64> [
+; CHECK-NEXT: [[STEP_ADD:%.*]] = add <16 x i64> [[VEC_IND]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT: [[STEP_ADD4:%.*]] = add <16 x i64> [[VEC_IND3]], <i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32, i64 32>
+; CHECK-NEXT: [[TMP10:%.*]] = sub nsw <16 x i64> <i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8, i64 8>, [[VEC_IND]]
+; CHECK-NEXT: [[TMP11:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 0
+; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP11]]
+; CHECK-NEXT: [[TMP13:%.*]] = insertelement <16 x [10 x i32]*> undef, [10 x i32]* [[TMP12]], i32 0
+; CHECK-NEXT: [[TMP14:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 1
+; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP14]]
+; CHECK-NEXT: [[TMP16:%.*]] = insertelement <16 x [10 x i32]*> [[TMP13]], [10 x i32]* [[TMP15]], i32 1
+; CHECK-NEXT: [[TMP17:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 2
+; CHECK-NEXT: [[TMP18:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP17]]
+; CHECK-NEXT: [[TMP19:%.*]] = insertelement <16 x [10 x i32]*> [[TMP16]], [10 x i32]* [[TMP18]], i32 2
+; CHECK-NEXT: [[TMP20:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 3
+; CHECK-NEXT: [[TMP21:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP20]]
+; CHECK-NEXT: [[TMP22:%.*]] = insertelement <16 x [10 x i32]*> [[TMP19]], [10 x i32]* [[TMP21]], i32 3
+; CHECK-NEXT: [[TMP23:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 4
+; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP23]]
+; CHECK-NEXT: [[TMP25:%.*]] = insertelement <16 x [10 x i32]*> [[TMP22]], [10 x i32]* [[TMP24]], i32 4
+; CHECK-NEXT: [[TMP26:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 5
+; CHECK-NEXT: [[TMP27:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP26]]
+; CHECK-NEXT: [[TMP28:%.*]] = insertelement <16 x [10 x i32]*> [[TMP25]], [10 x i32]* [[TMP27]], i32 5
+; CHECK-NEXT: [[TMP29:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 6
+; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP29]]
+; CHECK-NEXT: [[TMP31:%.*]] = insertelement <16 x [10 x i32]*> [[TMP28]], [10 x i32]* [[TMP30]], i32 6
+; CHECK-NEXT: [[TMP32:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 7
+; CHECK-NEXT: [[TMP33:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP32]]
+; CHECK-NEXT: [[TMP34:%.*]] = insertelement <16 x [10 x i32]*> [[TMP31]], [10 x i32]* [[TMP33]], i32 7
+; CHECK-NEXT: [[TMP35:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 8
+; CHECK-NEXT: [[TMP36:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP35]]
+; CHECK-NEXT: [[TMP37:%.*]] = insertelement <16 x [10 x i32]*> [[TMP34]], [10 x i32]* [[TMP36]], i32 8
+; CHECK-NEXT: [[TMP38:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 9
+; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP38]]
+; CHECK-NEXT: [[TMP40:%.*]] = insertelement <16 x [10 x i32]*> [[TMP37]], [10 x i32]* [[TMP39]], i32 9
+; CHECK-NEXT: [[TMP41:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 10
+; CHECK-NEXT: [[TMP42:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP41]]
+; CHECK-NEXT: [[TMP43:%.*]] = insertelement <16 x [10 x i32]*> [[TMP40]], [10 x i32]* [[TMP42]], i32 10
+; CHECK-NEXT: [[TMP44:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 11
+; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP44]]
+; CHECK-NEXT: [[TMP46:%.*]] = insertelement <16 x [10 x i32]*> [[TMP43]], [10 x i32]* [[TMP45]], i32 11
+; CHECK-NEXT: [[TMP47:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 12
+; CHECK-NEXT: [[TMP48:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP47]]
+; CHECK-NEXT: [[TMP49:%.*]] = insertelement <16 x [10 x i32]*> [[TMP46]], [10 x i32]* [[TMP48]], i32 12
+; CHECK-NEXT: [[TMP50:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 13
+; CHECK-NEXT: [[TMP51:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP50]]
+; CHECK-NEXT: [[TMP52:%.*]] = insertelement <16 x [10 x i32]*> [[TMP49]], [10 x i32]* [[TMP51]], i32 13
+; CHECK-NEXT: [[TMP53:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 14
+; CHECK-NEXT: [[TMP54:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP53]]
+; CHECK-NEXT: [[TMP55:%.*]] = insertelement <16 x [10 x i32]*> [[TMP52]], [10 x i32]* [[TMP54]], i32 14
+; CHECK-NEXT: [[TMP56:%.*]] = extractelement <16 x i64> [[VEC_IND]], i32 15
+; CHECK-NEXT: [[TMP57:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 [[TMP56]]
+; CHECK-NEXT: [[TMP58:%.*]] = insertelement <16 x [10 x i32]*> [[TMP55]], [10 x i32]* [[TMP57]], i32 15
+; CHECK-NEXT: [[TMP59:%.*]] = add nsw <16 x i64> [[TMP10]], [[VEC_IND3]]
+; CHECK-NEXT: [[TMP60:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 0
+; CHECK-NEXT: [[TMP61:%.*]] = extractelement <16 x i64> [[TMP59]], i32 0
+; CHECK-NEXT: [[TMP62:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP60]], i64 [[TMP61]], i64 0
+; CHECK-NEXT: [[TMP63:%.*]] = insertelement <16 x i32*> undef, i32* [[TMP62]], i32 0
+; CHECK-NEXT: [[TMP64:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 1
+; CHECK-NEXT: [[TMP65:%.*]] = extractelement <16 x i64> [[TMP59]], i32 1
+; CHECK-NEXT: [[TMP66:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP64]], i64 [[TMP65]], i64 0
+; CHECK-NEXT: [[TMP67:%.*]] = insertelement <16 x i32*> [[TMP63]], i32* [[TMP66]], i32 1
+; CHECK-NEXT: [[TMP68:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 2
+; CHECK-NEXT: [[TMP69:%.*]] = extractelement <16 x i64> [[TMP59]], i32 2
+; CHECK-NEXT: [[TMP70:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP68]], i64 [[TMP69]], i64 0
+; CHECK-NEXT: [[TMP71:%.*]] = insertelement <16 x i32*> [[TMP67]], i32* [[TMP70]], i32 2
+; CHECK-NEXT: [[TMP72:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 3
+; CHECK-NEXT: [[TMP73:%.*]] = extractelement <16 x i64> [[TMP59]], i32 3
+; CHECK-NEXT: [[TMP74:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP72]], i64 [[TMP73]], i64 0
+; CHECK-NEXT: [[TMP75:%.*]] = insertelement <16 x i32*> [[TMP71]], i32* [[TMP74]], i32 3
+; CHECK-NEXT: [[TMP76:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 4
+; CHECK-NEXT: [[TMP77:%.*]] = extractelement <16 x i64> [[TMP59]], i32 4
+; CHECK-NEXT: [[TMP78:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP76]], i64 [[TMP77]], i64 0
+; CHECK-NEXT: [[TMP79:%.*]] = insertelement <16 x i32*> [[TMP75]], i32* [[TMP78]], i32 4
+; CHECK-NEXT: [[TMP80:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 5
+; CHECK-NEXT: [[TMP81:%.*]] = extractelement <16 x i64> [[TMP59]], i32 5
+; CHECK-NEXT: [[TMP82:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP80]], i64 [[TMP81]], i64 0
+; CHECK-NEXT: [[TMP83:%.*]] = insertelement <16 x i32*> [[TMP79]], i32* [[TMP82]], i32 5
+; CHECK-NEXT: [[TMP84:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 6
+; CHECK-NEXT: [[TMP85:%.*]] = extractelement <16 x i64> [[TMP59]], i32 6
+; CHECK-NEXT: [[TMP86:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP84]], i64 [[TMP85]], i64 0
+; CHECK-NEXT: [[TMP87:%.*]] = insertelement <16 x i32*> [[TMP83]], i32* [[TMP86]], i32 6
+; CHECK-NEXT: [[TMP88:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 7
+; CHECK-NEXT: [[TMP89:%.*]] = extractelement <16 x i64> [[TMP59]], i32 7
+; CHECK-NEXT: [[TMP90:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP88]], i64 [[TMP89]], i64 0
+; CHECK-NEXT: [[TMP91:%.*]] = insertelement <16 x i32*> [[TMP87]], i32* [[TMP90]], i32 7
+; CHECK-NEXT: [[TMP92:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 8
+; CHECK-NEXT: [[TMP93:%.*]] = extractelement <16 x i64> [[TMP59]], i32 8
+; CHECK-NEXT: [[TMP94:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP92]], i64 [[TMP93]], i64 0
+; CHECK-NEXT: [[TMP95:%.*]] = insertelement <16 x i32*> [[TMP91]], i32* [[TMP94]], i32 8
+; CHECK-NEXT: [[TMP96:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 9
+; CHECK-NEXT: [[TMP97:%.*]] = extractelement <16 x i64> [[TMP59]], i32 9
+; CHECK-NEXT: [[TMP98:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP96]], i64 [[TMP97]], i64 0
+; CHECK-NEXT: [[TMP99:%.*]] = insertelement <16 x i32*> [[TMP95]], i32* [[TMP98]], i32 9
+; CHECK-NEXT: [[TMP100:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 10
+; CHECK-NEXT: [[TMP101:%.*]] = extractelement <16 x i64> [[TMP59]], i32 10
+; CHECK-NEXT: [[TMP102:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP100]], i64 [[TMP101]], i64 0
+; CHECK-NEXT: [[TMP103:%.*]] = insertelement <16 x i32*> [[TMP99]], i32* [[TMP102]], i32 10
+; CHECK-NEXT: [[TMP104:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 11
+; CHECK-NEXT: [[TMP105:%.*]] = extractelement <16 x i64> [[TMP59]], i32 11
+; CHECK-NEXT: [[TMP106:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP104]], i64 [[TMP105]], i64 0
+; CHECK-NEXT: [[TMP107:%.*]] = insertelement <16 x i32*> [[TMP103]], i32* [[TMP106]], i32 11
+; CHECK-NEXT: [[TMP108:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 12
+; CHECK-NEXT: [[TMP109:%.*]] = extractelement <16 x i64> [[TMP59]], i32 12
+; CHECK-NEXT: [[TMP110:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP108]], i64 [[TMP109]], i64 0
+; CHECK-NEXT: [[TMP111:%.*]] = insertelement <16 x i32*> [[TMP107]], i32* [[TMP110]], i32 12
+; CHECK-NEXT: [[TMP112:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 13
+; CHECK-NEXT: [[TMP113:%.*]] = extractelement <16 x i64> [[TMP59]], i32 13
+; CHECK-NEXT: [[TMP114:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP112]], i64 [[TMP113]], i64 0
+; CHECK-NEXT: [[TMP115:%.*]] = insertelement <16 x i32*> [[TMP111]], i32* [[TMP114]], i32 13
+; CHECK-NEXT: [[TMP116:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 14
+; CHECK-NEXT: [[TMP117:%.*]] = extractelement <16 x i64> [[TMP59]], i32 14
+; CHECK-NEXT: [[TMP118:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP116]], i64 [[TMP117]], i64 0
+; CHECK-NEXT: [[TMP119:%.*]] = insertelement <16 x i32*> [[TMP115]], i32* [[TMP118]], i32 14
+; CHECK-NEXT: [[TMP120:%.*]] = extractelement <16 x [10 x i32]*> [[TMP58]], i32 15
+; CHECK-NEXT: [[TMP121:%.*]] = extractelement <16 x i64> [[TMP59]], i32 15
+; CHECK-NEXT: [[TMP122:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP120]], i64 [[TMP121]], i64 0
+; CHECK-NEXT: [[TMP123:%.*]] = insertelement <16 x i32*> [[TMP119]], i32* [[TMP122]], i32 15
+; CHECK-NEXT: [[VECTORGEP:%.*]] = getelementptr inbounds [10 x i32], <16 x [10 x i32]*> [[TMP58]], <16 x i64> [[TMP59]], i64 0
+; CHECK-NEXT: call void @llvm.masked.scatter.v16i32(<16 x i32> <i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8, i32 8>, <16 x i32*> [[VECTORGEP]], i32 16, <16 x i1> <i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true, i1 true>)
+entry:
+ %0 = load i32, i32* @c, align 4
+ %cmp34 = icmp sgt i32 %0, 8
+ br i1 %cmp34, label %for.body.lr.ph, label %for.cond.cleanup
+
+for.body.lr.ph: ; preds = %entry
+ %1 = load i32, i32* @a, align 4
+ %tobool = icmp eq i32 %1, 0
+ %2 = load i64, i64* @b, align 8
+ %mul = mul i64 %2, 4063299859190
+ %tobool6 = icmp eq i64 %mul, 0
+ %3 = sext i32 %0 to i64
+ br i1 %tobool, label %for.body.us.preheader, label %for.body.preheader
+
+for.body.preheader: ; preds = %for.body.lr.ph
+ br label %for.body
+
+for.body.us.preheader: ; preds = %for.body.lr.ph
+ br label %for.body.us
+
+for.body.us: ; preds = %for.body.us.preheader, %for.cond.cleanup4.us-lcssa.us.us
+ %indvars.iv78 = phi i64 [ %indvars.iv.next79, %for.cond.cleanup4.us-lcssa.us.us ], [ 8, %for.body.us.preheader ]
+ %indvars.iv70 = phi i64 [ %indvars.iv.next71, %for.cond.cleanup4.us-lcssa.us.us ], [ 0, %for.body.us.preheader ]
+ %4 = sub nsw i64 8, %indvars.iv78
+ %add.ptr.us = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv78
+ %5 = add nsw i64 %4, %indvars.iv70
+ %arraydecay.us.us.us = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %5, i64 0
+ br i1 %tobool6, label %for.body5.us.us.us.preheader, label %for.body5.us.us48.preheader
+
+for.body5.us.us48.preheader: ; preds = %for.body.us
+ store i32 8, i32* %arraydecay.us.us.us, align 16
+ %indvars.iv.next66 = or i64 %indvars.iv70, 1
+ %6 = add nsw i64 %4, %indvars.iv.next66
+ %arraydecay.us.us55.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %6, i64 0
+ store i32 8, i32* %arraydecay.us.us55.1, align 8
+ br label %for.cond.cleanup4.us-lcssa.us.us
+
+for.body5.us.us.us.preheader: ; preds = %for.body.us
+ store i32 7, i32* %arraydecay.us.us.us, align 16
+ %indvars.iv.next73 = or i64 %indvars.iv70, 1
+ %7 = add nsw i64 %4, %indvars.iv.next73
+ %arraydecay.us.us.us.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr.us, i64 %7, i64 0
+ store i32 7, i32* %arraydecay.us.us.us.1, align 8
+ br label %for.cond.cleanup4.us-lcssa.us.us
+
+for.cond.cleanup4.us-lcssa.us.us: ; preds = %for.body5.us.us48.preheader, %for.body5.us.us.us.preheader
+ %indvars.iv.next79 = add nuw nsw i64 %indvars.iv78, 2
+ %cmp.us = icmp slt i64 %indvars.iv.next79, %3
+ %indvars.iv.next71 = add nuw nsw i64 %indvars.iv70, 2
+ br i1 %cmp.us, label %for.body.us, label %for.cond.cleanup.loopexit
+
+for.cond.cleanup.loopexit: ; preds = %for.cond.cleanup4.us-lcssa.us.us
+ br label %for.cond.cleanup
+
+for.cond.cleanup.loopexit99: ; preds = %for.body
+ br label %for.cond.cleanup
+
+for.cond.cleanup: ; preds = %for.cond.cleanup.loopexit99, %for.cond.cleanup.loopexit, %entry
+ ret void
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv95 = phi i64 [ %indvars.iv.next96, %for.body ], [ 8, %for.body.preheader ]
+ %indvars.iv87 = phi i64 [ %indvars.iv.next88, %for.body ], [ 0, %for.body.preheader ]
+ %8 = sub nsw i64 8, %indvars.iv95
+ %add.ptr = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* @d, i64 0, i64 %indvars.iv95
+ %9 = add nsw i64 %8, %indvars.iv87
+ %arraydecay.us31 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %9, i64 0
+ store i32 8, i32* %arraydecay.us31, align 16
+ %indvars.iv.next90 = or i64 %indvars.iv87, 1
+ %10 = add nsw i64 %8, %indvars.iv.next90
+ %arraydecay.us31.1 = getelementptr inbounds [10 x i32], [10 x i32]* %add.ptr, i64 %10, i64 0
+ store i32 8, i32* %arraydecay.us31.1, align 8
+ %indvars.iv.next96 = add nuw nsw i64 %indvars.iv95, 2
+ %cmp = icmp slt i64 %indvars.iv.next96, %3
+ %indvars.iv.next88 = add nuw nsw i64 %indvars.iv87, 2
+ br i1 %cmp, label %for.body, label %for.cond.cleanup.loopexit99
+}
+
+attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
diff --git a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
index 38af11c443d0..387eec4d5ede 100644
--- a/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
+++ b/test/Transforms/LoopVectorize/X86/uint64_to_fp64-cost-model.ll
@@ -5,8 +5,8 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
target triple = "x86_64-apple-macosx10.8.0"
-; CHECK: cost of 20 for VF 2 For instruction: %conv = uitofp i64 %tmp to double
-; CHECK: cost of 40 for VF 4 For instruction: %conv = uitofp i64 %tmp to double
+; CHECK: cost of 10 for VF 2 For instruction: %conv = uitofp i64 %tmp to double
+; CHECK: cost of 20 for VF 4 For instruction: %conv = uitofp i64 %tmp to double
define void @uint64_to_double_cost(i64* noalias nocapture %a, double* noalias nocapture readonly %b) nounwind {
entry:
br label %for.body
diff --git a/test/Transforms/LoopVectorize/X86/uniform-phi.ll b/test/Transforms/LoopVectorize/X86/uniform-phi.ll
new file mode 100644
index 000000000000..1759cb819760
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/uniform-phi.ll
@@ -0,0 +1,50 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
+; REQUIRES: asserts
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK-LABEL: test
+; CHECK-DAG: LV: Found uniform instruction: %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+; CHECK-DAG: LV: Found uniform instruction: %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+; CHECK-DAG: LV: Found uniform instruction: %exitcond = icmp eq i64 %indvars.iv, 1599
+
+define void @test(float* noalias nocapture %a, float* noalias nocapture readonly %b) #0 {
+entry:
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds float, float* %b, i64 %indvars.iv
+ %tmp0 = load float, float* %arrayidx, align 4
+ %add = fadd float %tmp0, 1.000000e+00
+ %arrayidx5 = getelementptr inbounds float, float* %a, i64 %indvars.iv
+ store float %add, float* %arrayidx5, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv, 1599
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; CHECK-LABEL: foo
+; CHECK-DAG: LV: Found uniform instruction: %cond = icmp eq i64 %i.next, %n
+; CHECK-DAG: LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
+; CHECK-NOT: LV: Found uniform instruction: %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+
+define void @foo(i32* %a, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %tmp0 = trunc i64 %i to i32
+ %tmp1 = getelementptr inbounds i32, i32* %a, i32 %tmp0
+ store i32 %tmp0, i32* %tmp1, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp eq i64 %i.next, %n
+ br i1 %cond, label %for.end, label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/uniform_load.ll b/test/Transforms/LoopVectorize/X86/uniform_load.ll
new file mode 100644
index 000000000000..e71292265c2a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/uniform_load.ll
@@ -0,0 +1,47 @@
+; RUN: opt -basicaa -loop-vectorize -S -mcpu=core-avx2 < %s | FileCheck %s
+
+;float inc = 0.5;
+;void foo(float *A, unsigned N) {
+;
+; for (unsigned i=0; i<N; i++){
+; A[i] += inc;
+; }
+;}
+
+; CHECK-LABEL: foo
+; CHECK: vector.body
+; CHECK: load <8 x float>
+; CHECK: fadd <8 x float>
+; CHECK: store <8 x float>
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@inc = global float 5.000000e-01, align 4
+
+define void @foo(float* nocapture %A, i32 %N) #0 {
+entry:
+ %cmp3 = icmp eq i32 %N, 0
+ br i1 %cmp3, label %for.end, label %for.body.preheader
+
+for.body.preheader: ; preds = %entry
+ br label %for.body
+
+for.body: ; preds = %for.body.preheader, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %0 = load float, float* @inc, align 4
+ %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
+ %1 = load float, float* %arrayidx, align 4
+ %add = fadd float %0, %1
+ store float %add, float* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
index e6dc39c2afad..fe9d59efc8b3 100644
--- a/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
+++ b/test/Transforms/LoopVectorize/X86/vector_max_bandwidth.ll
@@ -16,7 +16,7 @@ target triple = "x86_64-unknown-linux-gnu"
; -vectorizer-maximize-bandwidth is indicated.
;
; CHECK-label: foo
-; CHECK: LV: Selecting VF: 16.
+; CHECK: LV: Selecting VF: 32.
define void @foo() {
entry:
br label %for.body
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
new file mode 100644
index 000000000000..1d51b9c4beaa
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-loopid-dbg.ll
@@ -0,0 +1,74 @@
+; RUN: opt < %s -loop-vectorize -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=VECTORIZED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
+; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
+
+; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
+; DEBUG-OUTPUT-NOT: .loc
+; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
+
+; VECTORIZED: remark: vectorization-remarks.c:17:8: vectorized loop (vectorization width: 4, interleaved count: 1)
+; UNROLLED: remark: vectorization-remarks.c:17:8: interleaved loop (interleaved count: 4)
+; NONE: remark: vectorization-remarks.c:17:8: loop not vectorized: vectorization and interleaving are explicitly disabled, or vectorize width and interleave count are both set to 1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+define i32 @foo(i32 %n) #0 !dbg !4 {
+entry:
+ %diff = alloca i32, align 4
+ %cb = alloca [16 x i8], align 16
+ %cc = alloca [16 x i8], align 16
+ store i32 0, i32* %diff, align 4, !tbaa !11
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %add8 = phi i32 [ 0, %entry ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds [16 x i8], [16 x i8]* %cb, i64 0, i64 %indvars.iv
+ %0 = load i8, i8* %arrayidx, align 1, !tbaa !21
+ %conv = sext i8 %0 to i32
+ %arrayidx2 = getelementptr inbounds [16 x i8], [16 x i8]* %cc, i64 0, i64 %indvars.iv
+ %1 = load i8, i8* %arrayidx2, align 1, !tbaa !21
+ %conv3 = sext i8 %1 to i32
+ %sub = sub i32 %conv, %conv3
+ %add = add nsw i32 %sub, %add8
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 16
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !25
+
+for.end: ; preds = %for.body
+ store i32 %add, i32* %diff, align 4, !tbaa !11
+ call void @ibar(i32* %diff) #2
+ ret i32 0
+}
+
+declare void @ibar(i32*) #1
+
+!llvm.module.flags = !{!7, !8}
+!llvm.ident = !{!9}
+!llvm.dbg.cu = !{!24}
+
+!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
+!2 = !{}
+!3 = !{!4}
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !24, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
+!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
+!6 = !DISubroutineType(types: !2)
+!7 = !{i32 2, !"Dwarf Version", i32 4}
+!8 = !{i32 1, !"Debug Info Version", i32 3}
+!9 = !{!"clang version 3.5.0 "}
+!10 = !DILocation(line: 8, column: 3, scope: !4)
+!11 = !{!12, !12, i64 0}
+!12 = !{!"int", !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 17, column: 8, scope: !16)
+!16 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !17)
+!17 = distinct !DILexicalBlock(line: 17, column: 8, file: !1, scope: !18)
+!18 = distinct !DILexicalBlock(line: 17, column: 3, file: !1, scope: !4)
+!19 = !DILocation(line: 18, column: 5, scope: !20)
+!20 = distinct !DILexicalBlock(line: 17, column: 27, file: !1, scope: !18)
+!21 = !{!13, !13, i64 0}
+!22 = !DILocation(line: 20, column: 3, scope: !4)
+!23 = !DILocation(line: 21, column: 3, scope: !4)
+!24 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, emissionKind: NoDebug)
+!25 = !{!25, !15}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
index 02fab4447341..419f2e02456b 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-missed.ll
@@ -54,8 +54,9 @@ for.body: ; preds = %entry, %for.body
%indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
%arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv, !dbg !16
%0 = trunc i64 %indvars.iv to i32, !dbg !16
+ %ld = load i32, i32* %arrayidx, align 4
store i32 %0, i32* %arrayidx, align 4, !dbg !16, !tbaa !18
- %cmp3 = icmp sle i32 %0, %Length, !dbg !22
+ %cmp3 = icmp sle i32 %ld, %Length, !dbg !22
%indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !12
%1 = trunc i64 %indvars.iv.next to i32
%cmp = icmp slt i32 %1, %Length, !dbg !12
@@ -122,15 +123,14 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!9, !10}
!llvm.ident = !{!11}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
-!3 = !{!4, !7, !8}
-!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
-!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
-!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !1, scope: !5, type: !6, variables: !2)
+!7 = distinct !DISubprogram(name: "test_disabled", line: 10, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 10, file: !1, scope: !5, type: !6, variables: !2)
+!8 = distinct !DISubprogram(name: "test_array_bounds", line: 16, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 16, file: !1, scope: !5, type: !6, variables: !2)
!9 = !{i32 2, !"Dwarf Version", i32 2}
!10 = !{i32 2, !"Debug Info Version", i32 3}
!11 = !{!"clang version 3.5.0"}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
index df8c668f1262..fc9f97328fb7 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks-profitable.ll
@@ -82,13 +82,12 @@ attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fp
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250016)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 250016)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
!1 = !DIFile(filename: "vectorization-remarks-profitable.c", directory: "")
!2 = !{}
-!3 = !{!4, !6}
-!4 = distinct !DISubprogram(name: "do_not_interleave", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "do_not_interleave", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !2)
-!6 = distinct !DISubprogram(name: "interleave_not_profitable", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!6 = distinct !DISubprogram(name: "interleave_not_profitable", scope: !1, file: !1, line: 8, type: !5, isLocal: false, isDefinition: true, scopeLine: 8, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
!8 = !{i32 2, !"Debug Info Version", i32 3}
!9 = !{!"clang version 3.8.0 (trunk 250016)"}
diff --git a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
index 77a405ebb434..c14a2cb91b60 100644
--- a/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
+++ b/test/Transforms/LoopVectorize/X86/vectorization-remarks.ll
@@ -2,9 +2,6 @@
; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=4 -mtriple=x86_64-unknown-linux -S -pass-remarks='loop-vectorize' 2>&1 | FileCheck -check-prefix=UNROLLED %s
; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=1 -mtriple=x86_64-unknown-linux -S -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck -check-prefix=NONE %s
-; This code has all the !dbg annotations needed to track source line information,
-; but is missing the llvm.dbg.cu annotation. This prevents code generation from
-; emitting debug info in the final output.
; RUN: llc < %s -mtriple x86_64-pc-linux-gnu -o - | FileCheck -check-prefix=DEBUG-OUTPUT %s
; DEBUG-OUTPUT-NOT: .loc
; DEBUG-OUTPUT-NOT: {{.*}}.debug_info
@@ -48,11 +45,12 @@ declare void @ibar(i32*) #1
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
+!llvm.dbg.cu = !{!24}
!1 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
!2 = !{}
!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !24, scopeLine: 6, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "vectorization-remarks.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
@@ -72,3 +70,4 @@ declare void @ibar(i32*) #1
!21 = !{!13, !13, i64 0}
!22 = !DILocation(line: 20, column: 3, scope: !4)
!23 = !DILocation(line: 21, column: 3, scope: !4)
+!24 = distinct !DICompileUnit(language: DW_LANG_C89, file: !1, emissionKind: NoDebug)
diff --git a/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll b/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
new file mode 100644
index 000000000000..d1473552c98a
--- /dev/null
+++ b/test/Transforms/LoopVectorize/X86/vectorize-only-for-real.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -basicaa -loop-vectorize < %s | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+define i32 @accum(i32* nocapture readonly %x, i32 %N) #0 {
+entry:
+; CHECK-LABEL: @accum
+; CHECK-NOT: x i32>
+
+ %cmp1 = icmp sgt i32 %N, 0
+ br i1 %cmp1, label %for.inc.preheader, label %for.end
+
+for.inc.preheader:
+ br label %for.inc
+
+for.inc:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.inc ], [ 0, %for.inc.preheader ]
+ %sum.02 = phi i32 [ %add, %for.inc ], [ 0, %for.inc.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %x, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %sum.02
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.inc
+
+for.end.loopexit:
+ %add.lcssa = phi i32 [ %add, %for.inc ]
+ br label %for.end
+
+for.end:
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add.lcssa, %for.end.loopexit ]
+ ret i32 %sum.0.lcssa
+
+; CHECK: ret i32
+}
+
+attributes #0 = { "target-cpu"="core2" "target-features"="+sse,-avx,-avx2,-sse2" }
+
diff --git a/test/Transforms/LoopVectorize/cast-induction.ll b/test/Transforms/LoopVectorize/cast-induction.ll
index fae89976a7bc..54f68b7bd076 100644
--- a/test/Transforms/LoopVectorize/cast-induction.ll
+++ b/test/Transforms/LoopVectorize/cast-induction.ll
@@ -8,7 +8,7 @@ target triple = "x86_64-apple-macosx10.8.0"
@a = common global [2048 x i32] zeroinitializer, align 16
;CHECK-LABEL: @example12(
-;CHECK: trunc i64
+;CHECK: %vec.ind1 = phi <4 x i32>
;CHECK: store <4 x i32>
;CHECK: ret void
define void @example12() nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/conditional-assignment.ll b/test/Transforms/LoopVectorize/conditional-assignment.ll
index 8d820e277b26..0115b09582f9 100644
--- a/test/Transforms/LoopVectorize/conditional-assignment.ll
+++ b/test/Transforms/LoopVectorize/conditional-assignment.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=loop-vectorize -S -pass-remarks-missed='loop-vectorize' -pass-remarks-analysis='loop-vectorize' 2>&1 | FileCheck %s
; CHECK: remark: source.c:2:8: loop not vectorized: store that is conditionally executed prevents vectorization
@@ -36,11 +37,10 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.6.0", isOptimized: true, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "conditional_store", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/control-flow.ll b/test/Transforms/LoopVectorize/control-flow.ll
index a2fc69a6e907..78ce29eff527 100644
--- a/test/Transforms/LoopVectorize/control-flow.ll
+++ b/test/Transforms/LoopVectorize/control-flow.ll
@@ -55,11 +55,10 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/dbg.value.ll b/test/Transforms/LoopVectorize/dbg.value.ll
index f68b6865b072..d7d3ff6d9f96 100644
--- a/test/Transforms/LoopVectorize/dbg.value.ll
+++ b/test/Transforms/LoopVectorize/dbg.value.ll
@@ -44,10 +44,9 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!26}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: 0, file: !25, enums: !1, retainedTypes: !1, subprograms: !2, globals: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang", isOptimized: true, emissionKind: FullDebug, file: !25, enums: !1, retainedTypes: !1, globals: !11)
!1 = !{}
-!2 = !{!3}
-!3 = distinct !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !25, scope: !4, type: !5, variables: !8)
+!3 = distinct !DISubprogram(name: "test", linkageName: "test", line: 5, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 5, file: !25, scope: !4, type: !5, variables: !8)
!4 = !DIFile(filename: "test", directory: "/path/to/somewhere")
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
diff --git a/test/Transforms/LoopVectorize/debugloc.ll b/test/Transforms/LoopVectorize/debugloc.ll
index 0214f1c4847c..45cb9a2baeaf 100644
--- a/test/Transforms/LoopVectorize/debugloc.ll
+++ b/test/Transforms/LoopVectorize/debugloc.ll
@@ -63,11 +63,10 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18, !27}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 185038) (llvm/trunk 185097)", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "-", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !12)
+!4 = distinct !DISubprogram(name: "f", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 3, file: !5, scope: !6, type: !7, variables: !12)
!5 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!6 = !DIFile(filename: "<stdin>", directory: "/Volumes/Data/backedup/dev/os/llvm/debug")
!7 = !DISubroutineType(types: !8)
diff --git a/test/Transforms/LoopVectorize/gcc-examples.ll b/test/Transforms/LoopVectorize/gcc-examples.ll
index 188090186158..95b0d16d57f9 100644
--- a/test/Transforms/LoopVectorize/gcc-examples.ll
+++ b/test/Transforms/LoopVectorize/gcc-examples.ll
@@ -368,7 +368,7 @@ define void @example11() nounwind uwtable ssp {
}
;CHECK-LABEL: @example12(
-;CHECK: trunc i64
+;CHECK: %vec.ind1 = phi <4 x i32>
;CHECK: store <4 x i32>
;CHECK: ret void
define void @example12() nounwind uwtable ssp {
diff --git a/test/Transforms/LoopVectorize/gep_with_bitcast.ll b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
index ab2fd5e4e1c6..e73b6eacbe17 100644
--- a/test/Transforms/LoopVectorize/gep_with_bitcast.ll
+++ b/test/Transforms/LoopVectorize/gep_with_bitcast.ll
@@ -12,10 +12,11 @@ target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
; CHECK-LABEL: @foo
; CHECK: vector.body
-; CHECK: %0 = getelementptr inbounds double*, double** %in, i64 %index
-; CHECK: %1 = bitcast double** %0 to <4 x i64>*
-; CHECK: %wide.load = load <4 x i64>, <4 x i64>* %1, align 8
-; CHECK: %2 = icmp eq <4 x i64> %wide.load, zeroinitializer
+; CHECK: %[[IV:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %[[v0:.+]] = getelementptr inbounds double*, double** %in, i64 %[[IV]]
+; CHECK: %[[v1:.+]] = bitcast double** %[[v0]] to <4 x i64>*
+; CHECK: %wide.load = load <4 x i64>, <4 x i64>* %[[v1]], align 8
+; CHECK: icmp eq <4 x i64> %wide.load, zeroinitializer
; CHECK: br i1
define void @foo(double** noalias nocapture readonly %in, double** noalias nocapture readnone %out, i8* noalias nocapture %res) #0 {
@@ -37,4 +38,4 @@ for.body:
for.end:
ret void
-} \ No newline at end of file
+}
diff --git a/test/Transforms/LoopVectorize/global_alias.ll b/test/Transforms/LoopVectorize/global_alias.ll
index 84fa48cd5148..0da841bcbbd0 100644
--- a/test/Transforms/LoopVectorize/global_alias.ll
+++ b/test/Transforms/LoopVectorize/global_alias.ll
@@ -12,7 +12,7 @@ target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
@PA = external global i32*
-;; === First, the tests that should always vectorize, wither statically or by adding run-time checks ===
+;; === First, the tests that should always vectorize, whether statically or by adding run-time checks ===
; /// Different objects, positive induction, constant distance
@@ -387,7 +387,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias08(
-; CHECK: sub <4 x i32>
+; CHECK: sub nuw nsw <4 x i32>
; CHECK: ret
define i32 @noAlias08(i32 %a) #0 {
@@ -439,7 +439,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias09(
-; CHECK: sub <4 x i32>
+; CHECK: sub nuw nsw <4 x i32>
; CHECK: ret
define i32 @noAlias09(i32 %a) #0 {
@@ -721,7 +721,7 @@ for.end: ; preds = %for.cond
; return Foo.A[a];
; }
; CHECK-LABEL: define i32 @noAlias14(
-; CHECK: sub <4 x i32>
+; CHECK: sub nuw nsw <4 x i32>
; CHECK: ret
define i32 @noAlias14(i32 %a) #0 {
diff --git a/test/Transforms/LoopVectorize/hints-trans.ll b/test/Transforms/LoopVectorize/hints-trans.ll
new file mode 100644
index 000000000000..ec5ddbb03ce1
--- /dev/null
+++ b/test/Transforms/LoopVectorize/hints-trans.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instsimplify -simplifycfg < %s | FileCheck %s
+; Note: -instsimplify -simplifycfg remove the (now dead) original loop, making
+; it easy to test that the llvm.loop.unroll.disable hint is still present.
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind uwtable
+define void @foo(i32* nocapture %b) #0 {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
+ store i32 1, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 16
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !llvm.loop !0
+}
+
+; CHECK-LABEL: @foo
+; CHECK: = !{!"llvm.loop.unroll.disable"}
+
+attributes #0 = { norecurse nounwind uwtable }
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.unroll.disable"}
diff --git a/test/Transforms/LoopVectorize/if-conversion.ll b/test/Transforms/LoopVectorize/if-conversion.ll
index fb5416976525..daa8f147e210 100644
--- a/test/Transforms/LoopVectorize/if-conversion.ll
+++ b/test/Transforms/LoopVectorize/if-conversion.ll
@@ -73,7 +73,7 @@ for.end:
;CHECK-LABEL: @reduction_func(
;CHECK: load <4 x i32>
;CHECK: add <4 x i32>
-;CHECK: icmp sle <4 x i32>
+;CHECK: icmp slt <4 x i32>
;CHECK: select <4 x i1>
;CHECK: ret i32
define i32 @reduction_func(i32* nocapture %A, i32 %n) nounwind uwtable readonly ssp {
diff --git a/test/Transforms/LoopVectorize/if-pred-stores.ll b/test/Transforms/LoopVectorize/if-pred-stores.ll
index 0d70f557f834..f39e774bb895 100644
--- a/test/Transforms/LoopVectorize/if-pred-stores.ll
+++ b/test/Transforms/LoopVectorize/if-pred-stores.ll
@@ -1,7 +1,7 @@
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg < %s | FileCheck %s --check-prefix=VEC
-; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=UNROLL
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=1 -force-vector-interleave=2 -loop-vectorize -verify-loop-info < %s | FileCheck %s --check-prefix=UNROLL-NOSIMPLIFY
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -verify-loop-info -simplifycfg < %s | FileCheck %s --check-prefix=VEC
+; RUN: opt -S -vectorize-num-stores-pred=1 -force-vector-width=2 -force-vector-interleave=1 -loop-vectorize -enable-cond-stores-vec -verify-loop-info -simplifycfg -instcombine < %s | FileCheck %s --check-prefix=VEC-IC
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.9.0"
diff --git a/test/Transforms/LoopVectorize/induction-step.ll b/test/Transforms/LoopVectorize/induction-step.ll
new file mode 100644
index 000000000000..f56456e82dfa
--- /dev/null
+++ b/test/Transforms/LoopVectorize/induction-step.ll
@@ -0,0 +1,124 @@
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=8 -S | FileCheck %s
+
+; int int_inc;
+;
+;int induction_with_global(int init, int *restrict A, int N) {
+; int x = init;
+; for (int i=0;i<N;i++){
+; A[i] = x;
+; x += int_inc;
+; }
+; return x;
+;}
+
+; CHECK-LABEL: @induction_with_global(
+; CHECK: %[[INT_INC:.*]] = load i32, i32* @int_inc, align 4
+; CHECK: vector.body:
+; CHECK: %[[VAR1:.*]] = insertelement <8 x i32> undef, i32 %[[INT_INC]], i32 0
+; CHECK: %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK: mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VAR2]]
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+
+@int_inc = common global i32 0, align 4
+
+define i32 @induction_with_global(i32 %init, i32* noalias nocapture %A, i32 %N) {
+entry:
+ %cmp4 = icmp sgt i32 %N, 0
+ br i1 %cmp4, label %for.body.lr.ph, label %for.end
+
+for.body.lr.ph: ; preds = %entry
+ %0 = load i32, i32* @int_inc, align 4
+ %1 = mul i32 %0, %N
+ br label %for.body
+
+for.body: ; preds = %for.body, %for.body.lr.ph
+ %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
+ %x.05 = phi i32 [ %init, %for.body.lr.ph ], [ %add, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ store i32 %x.05, i32* %arrayidx, align 4
+ %add = add nsw i32 %0, %x.05
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.end.loopexit, label %for.body
+
+for.end.loopexit: ; preds = %for.body
+ %2 = add i32 %1, %init
+ br label %for.end
+
+for.end: ; preds = %for.end.loopexit, %entry
+ %x.0.lcssa = phi i32 [ %init, %entry ], [ %2, %for.end.loopexit ]
+ ret i32 %x.0.lcssa
+}
+
+
+;int induction_with_loop_inv(int init, int *restrict A, int N, int M) {
+; int x = init;
+; for (int j = 0; j < M; j++) {
+; for (int i=0; i<N; i++){
+; A[i] = x;
+; x += j; // induction step is a loop invariant variable
+; }
+; }
+; return x;
+;}
+
+; CHECK-LABEL: @induction_with_loop_inv(
+; CHECK: for.cond1.preheader:
+; CHECK: %[[INDVAR0:.*]] = phi i32 [ 0,
+; CHECK: %[[INDVAR1:.*]] = phi i32 [ 0,
+; CHECK: vector.body:
+; CHECK: %[[VAR1:.*]] = insertelement <8 x i32> undef, i32 %[[INDVAR1]], i32 0
+; CHECK: %[[VAR2:.*]] = shufflevector <8 x i32> %[[VAR1]], <8 x i32> undef, <8 x i32> zeroinitializer
+; CHECK: mul <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>, %[[VAR2]]
+
+define i32 @induction_with_loop_inv(i32 %init, i32* noalias nocapture %A, i32 %N, i32 %M) {
+entry:
+ %cmp10 = icmp sgt i32 %M, 0
+ br i1 %cmp10, label %for.cond1.preheader.lr.ph, label %for.end6
+
+for.cond1.preheader.lr.ph: ; preds = %entry
+ %cmp27 = icmp sgt i32 %N, 0
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.inc4, %for.cond1.preheader.lr.ph
+ %indvars.iv15 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %indvars.iv.next16, %for.inc4 ]
+ %j.012 = phi i32 [ 0, %for.cond1.preheader.lr.ph ], [ %inc5, %for.inc4 ]
+ %x.011 = phi i32 [ %init, %for.cond1.preheader.lr.ph ], [ %x.1.lcssa, %for.inc4 ]
+ br i1 %cmp27, label %for.body3.preheader, label %for.inc4
+
+for.body3.preheader: ; preds = %for.cond1.preheader
+ br label %for.body3
+
+for.body3: ; preds = %for.body3.preheader, %for.body3
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body3 ], [ 0, %for.body3.preheader ]
+ %x.18 = phi i32 [ %add, %for.body3 ], [ %x.011, %for.body3.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ store i32 %x.18, i32* %arrayidx, align 4
+ %add = add nsw i32 %x.18, %j.012
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %N
+ br i1 %exitcond, label %for.inc4.loopexit, label %for.body3
+
+for.inc4.loopexit: ; preds = %for.body3
+ %0 = add i32 %x.011, %indvars.iv15
+ br label %for.inc4
+
+for.inc4: ; preds = %for.inc4.loopexit, %for.cond1.preheader
+ %x.1.lcssa = phi i32 [ %x.011, %for.cond1.preheader ], [ %0, %for.inc4.loopexit ]
+ %inc5 = add nuw nsw i32 %j.012, 1
+ %indvars.iv.next16 = add i32 %indvars.iv15, %N
+ %exitcond17 = icmp eq i32 %inc5, %M
+ br i1 %exitcond17, label %for.end6.loopexit, label %for.cond1.preheader
+
+for.end6.loopexit: ; preds = %for.inc4
+ %x.1.lcssa.lcssa = phi i32 [ %x.1.lcssa, %for.inc4 ]
+ br label %for.end6
+
+for.end6: ; preds = %for.end6.loopexit, %entry
+ %x.0.lcssa = phi i32 [ %init, %entry ], [ %x.1.lcssa.lcssa, %for.end6.loopexit ]
+ ret i32 %x.0.lcssa
+}
diff --git a/test/Transforms/LoopVectorize/induction.ll b/test/Transforms/LoopVectorize/induction.ll
index 59ee66a4a35d..c1f0bd95dbd7 100644
--- a/test/Transforms/LoopVectorize/induction.ll
+++ b/test/Transforms/LoopVectorize/induction.ll
@@ -1,4 +1,8 @@
; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
@@ -27,8 +31,6 @@ for.end:
ret void
}
-; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
-
; Make sure we remove unneeded vectorization of induction variables.
; In order for instcombine to cleanup the vectorized induction variables that we
; create in the loop vectorizer we need to perform some form of redundancy
@@ -66,6 +68,185 @@ loopexit:
ret void
}
+; Make sure we don't create a vector induction phi node that is unused.
+; Scalarize the step vectors instead.
+;
+; for (int i = 0; i < n; ++i)
+; sum += a[i];
+;
+; CHECK-LABEL: @scalarize_induction_variable_01(
+; CHECK: vector.body:
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %[[i0:.+]] = add i64 %index, 0
+; CHECK: %[[i1:.+]] = add i64 %index, 1
+; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
+; CHECK: getelementptr inbounds i64, i64* %a, i64 %[[i1]]
+;
+; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL-NO-IC: %[[i0:.+]] = add i64 %index, 0
+; UNROLL-NO-IC: %[[i1:.+]] = add i64 %index, 1
+; UNROLL-NO-IC: %[[i2:.+]] = add i64 %index, 2
+; UNROLL-NO-IC: %[[i3:.+]] = add i64 %index, 3
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i0]]
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i1]]
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i2]]
+; UNROLL-NO-IC: getelementptr inbounds i64, i64* %a, i64 %[[i3]]
+;
+; IND-LABEL: @scalarize_induction_variable_01(
+; IND: vector.body:
+; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND-NOT: add i64 {{.*}}, 2
+; IND: getelementptr inbounds i64, i64* %a, i64 %index
+;
+; UNROLL-LABEL: @scalarize_induction_variable_01(
+; UNROLL: vector.body:
+; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL-NOT: add i64 {{.*}}, 4
+; UNROLL: %[[g1:.+]] = getelementptr inbounds i64, i64* %a, i64 %index
+; UNROLL: getelementptr i64, i64* %[[g1]], i64 2
+
+define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %sum = phi i64 [ %2, %for.body ], [ 0, %entry ]
+ %0 = getelementptr inbounds i64, i64* %a, i64 %i
+ %1 = load i64, i64* %0, align 8
+ %2 = add i64 %1, %sum
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ %3 = phi i64 [ %2, %for.body ]
+ ret i64 %3
+}
+
+; Make sure we scalarize the step vectors used for the pointer arithmetic. We
+; can't easily simplify vectorized step vectors.
+;
+; float s = 0;
+; for (int i ; 0; i < n; i += 8)
+; s += (a[i] + b[i] + 1.0f);
+;
+; CHECK-LABEL: @scalarize_induction_variable_02(
+; CHECK: vector.body:
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = shl i64 %index, 3
+; CHECK: %[[i0:.+]] = add i64 %offset.idx, 0
+; CHECK: %[[i1:.+]] = add i64 %offset.idx, 8
+; CHECK: getelementptr inbounds float, float* %a, i64 %[[i0]]
+; CHECK: getelementptr inbounds float, float* %a, i64 %[[i1]]
+; CHECK: getelementptr inbounds float, float* %b, i64 %[[i0]]
+; CHECK: getelementptr inbounds float, float* %b, i64 %[[i1]]
+;
+; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
+; UNROLL-NO-IC: vector.body:
+; UNROLL-NO-IC: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL-NO-IC: %offset.idx = shl i64 %index, 3
+; UNROLL-NO-IC: %[[i0:.+]] = add i64 %offset.idx, 0
+; UNROLL-NO-IC: %[[i1:.+]] = add i64 %offset.idx, 8
+; UNROLL-NO-IC: %[[i2:.+]] = add i64 %offset.idx, 16
+; UNROLL-NO-IC: %[[i3:.+]] = add i64 %offset.idx, 24
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i0]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i1]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i2]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %a, i64 %[[i3]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i0]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i1]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i2]]
+; UNROLL-NO-IC: getelementptr inbounds float, float* %b, i64 %[[i3]]
+;
+; IND-LABEL: @scalarize_induction_variable_02(
+; IND: vector.body:
+; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND: %[[i0:.+]] = shl i64 %index, 3
+; IND: %[[i1:.+]] = or i64 %[[i0]], 8
+; IND: getelementptr inbounds float, float* %a, i64 %[[i0]]
+; IND: getelementptr inbounds float, float* %a, i64 %[[i1]]
+;
+; UNROLL-LABEL: @scalarize_induction_variable_02(
+; UNROLL: vector.body:
+; UNROLL: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL: %[[i0:.+]] = shl i64 %index, 3
+; UNROLL: %[[i1:.+]] = or i64 %[[i0]], 8
+; UNROLL: %[[i2:.+]] = or i64 %[[i0]], 16
+; UNROLL: %[[i3:.+]] = or i64 %[[i0]], 24
+; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i0]]
+; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i1]]
+; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i2]]
+; UNROLL: getelementptr inbounds float, float* %a, i64 %[[i3]]
+
+define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
+ %s = phi float [ 0.0, %entry ], [ %6, %for.body ]
+ %0 = getelementptr inbounds float, float* %a, i64 %i
+ %1 = load float, float* %0, align 4
+ %2 = getelementptr inbounds float, float* %b, i64 %i
+ %3 = load float, float* %2, align 4
+ %4 = fadd fast float %s, 1.0
+ %5 = fadd fast float %4, %1
+ %6 = fadd fast float %5, %3
+ %i.next = add nuw nsw i64 %i, 8
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ %s.lcssa = phi float [ %6, %for.body ]
+ ret float %s.lcssa
+}
+
+; Make sure we scalarize the step vectors used for the pointer arithmetic. We
+; can't easily simplify vectorized step vectors. (Interleaved accesses.)
+;
+; for (int i = 0; i < n; ++i)
+; a[i].f ^= y;
+;
+; INTERLEAVE-LABEL: @scalarize_induction_variable_03(
+; INTERLEAVE: vector.body:
+; INTERLEAVE: %[[i0:.+]] = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; INTERLEAVE: %[[i1:.+]] = or i64 %[[i0]], 1
+; INTERLEAVE: %[[i2:.+]] = or i64 %[[i0]], 2
+; INTERLEAVE: %[[i3:.+]] = or i64 %[[i0]], 3
+; INTERLEAVE: %[[i4:.+]] = or i64 %[[i0]], 4
+; INTERLEAVE: %[[i5:.+]] = or i64 %[[i0]], 5
+; INTERLEAVE: %[[i6:.+]] = or i64 %[[i0]], 6
+; INTERLEAVE: %[[i7:.+]] = or i64 %[[i0]], 7
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i0]], i32 1
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i1]], i32 1
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i2]], i32 1
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i3]], i32 1
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i4]], i32 1
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i5]], i32 1
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i6]], i32 1
+; INTERLEAVE: getelementptr inbounds %pair, %pair* %p, i64 %[[i7]], i32 1
+
+%pair = type { i32, i32 }
+define void @scalarize_induction_variable_03(%pair *%p, i32 %y, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %f = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
+ %0 = load i32, i32* %f, align 8
+ %1 = xor i32 %0, %y
+ store i32 %1, i32* %f, align 8
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
; Make sure that the loop exit count computation does not overflow for i8 and
; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
@@ -114,9 +295,11 @@ define i32 @i16_loop() nounwind readnone ssp uwtable {
; CHECK-LABEL: max_i32_backedgetaken
; CHECK: br i1 true, label %scalar.ph, label %min.iters.checked
+; CHECK: middle.block:
+; CHECK: %[[v9:.+]] = extractelement <2 x i32> %bin.rdx, i32 0
; CHECK: scalar.ph:
-; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %0 ]
-; CHECK: %bc.merge.rdx = phi i32 [ 1, %0 ], [ 1, %min.iters.checked ], [ %5, %middle.block ]
+; CHECK: %bc.resume.val = phi i32 [ 0, %middle.block ], [ 0, %[[v0:.+]] ]
+; CHECK: %bc.merge.rdx = phi i32 [ 1, %[[v0:.+]] ], [ 1, %min.iters.checked ], [ %[[v9]], %middle.block ]
define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
@@ -166,3 +349,186 @@ cond.end.i:
loopexit:
ret i32 %and.i
}
+
+; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32)
+; In order to recognize %sphi as an induction PHI and vectorize this loop,
+; we need to convert the SCEV expression into an AddRecExpr.
+; The expression gets converted to {zext i8 %t to i32,+,1}.
+
+; CHECK-LABEL: wrappingindvars1
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.ph
+; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 1>
+; CHECK-LABEL: vector.body
+; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ]
+; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 2, i32 2>
+define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
+ entry:
+ %st = zext i8 %t to i16
+ %ext = zext i8 %t to i32
+ %ecmp = icmp ult i16 %st, 42
+ br i1 %ecmp, label %loop, label %exit
+
+ loop:
+
+ %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
+ %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
+ %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
+
+ %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
+ store i32 %sphi, i32* %ptr
+
+ %idx.inc = add i8 %idx, 1
+ %idx.inc.ext = zext i8 %idx.inc to i32
+ %idx.b.inc = add nuw nsw i32 %idx.b, 1
+
+ %c = icmp ult i32 %idx.b, %len
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
+
+; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32))
+; In order to recognize %sphi as an induction PHI and vectorize this loop,
+; we need to convert the SCEV expression into an AddRecExpr.
+; The expression gets converted to ({4 * (zext %t to i32),+,4}).
+; CHECK-LABEL: wrappingindvars2
+; CHECK-LABEL: vector.scevcheck
+; CHECK-LABEL: vector.ph
+; CHECK: %[[START:.*]] = add <2 x i32> %{{.*}}, <i32 0, i32 4>
+; CHECK-LABEL: vector.body
+; CHECK: %[[PHI:.*]] = phi <2 x i32> [ %[[START]], %vector.ph ], [ %[[STEP:.*]], %vector.body ]
+; CHECK: %[[STEP]] = add <2 x i32> %[[PHI]], <i32 8, i32 8>
+define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
+
+entry:
+ %st = zext i8 %t to i16
+ %ext = zext i8 %t to i32
+ %ext.mul = mul i32 %ext, 4
+
+ %ecmp = icmp ult i16 %st, 42
+ br i1 %ecmp, label %loop, label %exit
+
+ loop:
+
+ %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
+ %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
+ %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
+
+ %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
+ store i32 %sphi, i32* %ptr
+
+ %idx.inc = add i8 %idx, 1
+ %idx.inc.ext = zext i8 %idx.inc to i32
+ %mul = mul i32 %idx.inc.ext, 4
+ %idx.b.inc = add nuw nsw i32 %idx.b, 1
+
+ %c = icmp ult i32 %idx.b, %len
+ br i1 %c, label %loop, label %exit
+
+ exit:
+ ret void
+}
+
+; Check that we generate vectorized IVs in the pre-header
+; instead of widening the scalar IV inside the loop, when
+; we know how to do that.
+; IND-LABEL: veciv
+; IND: vector.body:
+; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add, %vector.body ]
+; IND: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
+; IND: %index.next = add i32 %index, 2
+; IND: %[[CMP:.*]] = icmp eq i32 %index.next
+; IND: br i1 %[[CMP]]
+; UNROLL-LABEL: veciv
+; UNROLL: vector.body:
+; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL: %vec.ind = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %step.add1, %vector.body ]
+; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 2, i32 2>
+; UNROLL: %step.add1 = add <2 x i32> %vec.ind, <i32 4, i32 4>
+; UNROLL: %index.next = add i32 %index, 4
+; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
+; UNROLL: br i1 %[[CMP]]
+define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
+ store i32 %indvars.iv, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next, %k
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; IND-LABEL: trunciv
+; IND: vector.body:
+; IND: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND: %[[VECIND:.*]] = phi <2 x i32> [ <i32 0, i32 1>, %vector.ph ], [ %[[STEPADD:.*]], %vector.body ]
+; IND: %[[STEPADD]] = add <2 x i32> %[[VECIND]], <i32 2, i32 2>
+; IND: %index.next = add i64 %index, 2
+; IND: %[[CMP:.*]] = icmp eq i64 %index.next
+; IND: br i1 %[[CMP]]
+define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
+ %trunc.iv = trunc i64 %indvars.iv to i32
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
+ store i32 %trunc.iv, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, %k
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ ret void
+}
+
+; IND-LABEL: nonprimary
+; IND-LABEL: vector.ph
+; IND: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
+; IND: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
+; IND: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
+; IND-LABEL: vector.body:
+; IND: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; IND: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %step.add, %vector.body ]
+; IND: %step.add = add <2 x i32> %vec.ind, <i32 84, i32 84>
+; IND: %index.next = add i32 %index, 2
+; IND: %[[CMP:.*]] = icmp eq i32 %index.next
+; IND: br i1 %[[CMP]]
+; UNROLL-LABEL: nonprimary
+; UNROLL-LABEL: vector.ph
+; UNROLL: %[[INSERT:.*]] = insertelement <2 x i32> undef, i32 %i, i32 0
+; UNROLL: %[[SPLAT:.*]] = shufflevector <2 x i32> %[[INSERT]], <2 x i32> undef, <2 x i32> zeroinitializer
+; UNROLL: %[[START:.*]] = add <2 x i32> %[[SPLAT]], <i32 0, i32 42>
+; UNROLL-LABEL: vector.body:
+; UNROLL: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; UNROLL: %vec.ind = phi <2 x i32> [ %[[START]], %vector.ph ], [ %step.add1, %vector.body ]
+; UNROLL: %step.add = add <2 x i32> %vec.ind, <i32 84, i32 84>
+; UNROLL: %step.add1 = add <2 x i32> %vec.ind, <i32 168, i32 168>
+; UNROLL: %index.next = add i32 %index, 4
+; UNROLL: %[[CMP:.*]] = icmp eq i32 %index.next
+; UNROLL: br i1 %[[CMP]]
+define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
+for.body.preheader:
+ br label %for.body
+
+for.body:
+ %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
+ %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
+ store i32 %indvars.iv, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 42
+ %exitcond = icmp eq i32 %indvars.iv.next, %k
+ br i1 %exitcond, label %exit, label %for.body
+
+exit:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/induction_plus.ll b/test/Transforms/LoopVectorize/induction_plus.ll
index 7c4c8f2edcbf..5e96d4196cae 100644
--- a/test/Transforms/LoopVectorize/induction_plus.ll
+++ b/test/Transforms/LoopVectorize/induction_plus.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -instcombine -S | FileCheck %s
+; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S | FileCheck %s
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
target triple = "x86_64-apple-macosx10.8.0"
@@ -6,8 +6,11 @@ target triple = "x86_64-apple-macosx10.8.0"
@array = common global [1024 x i32] zeroinitializer, align 16
;CHECK-LABEL: @array_at_plus_one(
-;CHECK: add i64 %index, 12
-;CHECK: trunc i64
+;CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+;CHECK: %vec.ind = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %vector.ph ], [ %step.add, %vector.body ]
+;CHECK: %vec.ind1 = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, %vector.ph ], [ %step.add2, %vector.body ]
+;CHECK: add <4 x i64> %vec.ind, <i64 4, i64 4, i64 4, i64 4>
+;CHECK: add nsw <4 x i64> %vec.ind, <i64 12, i64 12, i64 12, i64 12>
;CHECK: ret i32
define i32 @array_at_plus_one(i32 %n) nounwind uwtable ssp {
%1 = icmp sgt i32 %n, 0
diff --git a/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll b/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
new file mode 100644
index 000000000000..9ee6e6d529ad
--- /dev/null
+++ b/test/Transforms/LoopVectorize/interleaved-accesses-pred-stores.ll
@@ -0,0 +1,164 @@
+; RUN: opt -S -loop-vectorize -instcombine -force-vector-width=2 -force-vector-interleave=1 -enable-interleaved-mem-accesses -vectorize-num-stores-pred=1 -enable-cond-stores-vec < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+%pair = type { i64, i64 }
+
+; Ensure that we vectorize the interleaved load group even though the loop
+; contains a conditional store. The store group contains gaps and is not
+; vectorized.
+;
+; CHECK-LABEL: @interleaved_with_cond_store_0(
+;
+; CHECK: min.iters.checked
+; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+;
+; CHECK: vector.body:
+; CHECK: %wide.vec = load <4 x i64>, <4 x i64>* %{{.*}}
+; CHECK: %strided.vec = shufflevector <4 x i64> %wide.vec, <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+;
+; CHECK: pred.store.if
+; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
+; CHECK: store i64 %[[X1]], {{.*}}
+;
+; CHECK: pred.store.if
+; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
+; CHECK: store i64 %[[X2]], {{.*}}
+
+define void @interleaved_with_cond_store_0(%pair *%p, i64 %x, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
+ %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
+ %0 = load i64, i64* %p.1, align 8
+ %1 = icmp eq i64 %0, %x
+ br i1 %1, label %if.then, label %if.merge
+
+if.then:
+ store i64 %0, i64* %p.1, align 8
+ br label %if.merge
+
+if.merge:
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; Ensure that we don't form a single interleaved group for the two loads. The
+; conditional store prevents the second load from being hoisted. The two load
+; groups are separately vectorized. The store group contains gaps and is not
+; vectorized.
+;
+; CHECK-LABEL: @interleaved_with_cond_store_1(
+;
+; CHECK: min.iters.checked
+; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+;
+; CHECK: vector.body:
+; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
+; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+;
+; CHECK: pred.store.if
+; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
+; CHECK: store i64 %[[X1]], {{.*}}
+;
+; CHECK: pred.store.if
+; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
+; CHECK: store i64 %[[X2]], {{.*}}
+;
+; CHECK: pred.store.continue
+; CHECK: %[[L2:.+]] = load <4 x i64>, <4 x i64>* {{.*}}
+; CHECK: %[[X3:.+]] = extractelement <4 x i64> %[[L2]], i32 0
+; CHECK: store i64 %[[X3]], {{.*}}
+; CHECK: %[[X4:.+]] = extractelement <4 x i64> %[[L2]], i32 2
+; CHECK: store i64 %[[X4]], {{.*}}
+
+define void @interleaved_with_cond_store_1(%pair *%p, i64 %x, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
+ %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
+ %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
+ %0 = load i64, i64* %p.1, align 8
+ %1 = icmp eq i64 %0, %x
+ br i1 %1, label %if.then, label %if.merge
+
+if.then:
+ store i64 %0, i64* %p.0, align 8
+ br label %if.merge
+
+if.merge:
+ %2 = load i64, i64* %p.0, align 8
+ store i64 %2, i64 *%p.1, align 8
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; Ensure that we don't create a single interleaved group for the two stores.
+; The second store is conditional and we can't sink the first store inside the
+; predicated block. The load group is vectorized, and the store groups contain
+; gaps and are not vectorized.
+;
+; CHECK-LABEL: @interleaved_with_cond_store_2(
+;
+; CHECK: min.iters.checked
+; CHECK: %n.mod.vf = and i64 %[[N:.+]], 1
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:.+]] = select i1 %[[IsZero]], i64 2, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+;
+; CHECK: vector.body:
+; CHECK: %[[L1:.+]] = load <4 x i64>, <4 x i64>* %{{.*}}
+; CHECK: %strided.vec = shufflevector <4 x i64> %[[L1]], <4 x i64> undef, <2 x i32> <i32 0, i32 2>
+; CHECK: store i64 %x, {{.*}}
+; CHECK: store i64 %x, {{.*}}
+;
+; CHECK: pred.store.if
+; CHECK: %[[X1:.+]] = extractelement <4 x i64> %wide.vec, i32 0
+; CHECK: store i64 %[[X1]], {{.*}}
+;
+; CHECK: pred.store.if
+; CHECK: %[[X2:.+]] = extractelement <4 x i64> %wide.vec, i32 2
+; CHECK: store i64 %[[X2]], {{.*}}
+
+define void @interleaved_with_cond_store_2(%pair *%p, i64 %x, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
+ %p.0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
+ %p.1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
+ %0 = load i64, i64* %p.1, align 8
+ store i64 %x, i64* %p.0, align 8
+ %1 = icmp eq i64 %0, %x
+ br i1 %1, label %if.then, label %if.merge
+
+if.then:
+ store i64 %0, i64* %p.1, align 8
+ br label %if.merge
+
+if.merge:
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/interleaved-accesses.ll b/test/Transforms/LoopVectorize/interleaved-accesses.ll
index 54ce3e29293a..868c3a2cdabf 100644
--- a/test/Transforms/LoopVectorize/interleaved-accesses.ll
+++ b/test/Transforms/LoopVectorize/interleaved-accesses.ll
@@ -284,18 +284,24 @@ for.body: ; preds = %for.body, %entry
}
; Check vectorization on an interleaved load group of factor 2 with 1 gap
-; (missing the load of odd elements).
+; (missing the load of odd elements). Because the vectorized loop would
+; speculatively access memory out-of-bounds, we must execute at least one
+; iteration of the scalar loop.
-; void even_load(int *A, int *B) {
+; void even_load_static_tc(int *A, int *B) {
; for (unsigned i = 0; i < 1024; i+=2)
; B[i/2] = A[i] * 2;
; }
-; CHECK-LABEL: @even_load(
-; CHECK-NOT: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
-; CHECK-NOT: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK-LABEL: @even_load_static_tc(
+; CHECK: vector.body:
+; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
+; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: icmp eq i64 %index.next, 508
+; CHECK: middle.block:
+; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
-define void @even_load(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
+define void @even_load_static_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B) {
entry:
br label %for.body
@@ -315,6 +321,93 @@ for.body: ; preds = %for.body, %entry
br i1 %cmp, label %for.body, label %for.cond.cleanup
}
+; Check vectorization on an interleaved load group of factor 2 with 1 gap
+; (missing the load of odd elements). Because the vectorized loop would
+; speculatively access memory out-of-bounds, we must execute at least one
+; iteration of the scalar loop.
+
+; void even_load_dynamic_tc(int *A, int *B, unsigned N) {
+; for (unsigned i = 0; i < N; i+=2)
+; B[i/2] = A[i] * 2;
+; }
+
+; CHECK-LABEL: @even_load_dynamic_tc(
+; CHECK: min.iters.checked:
+; CHECK: %n.mod.vf = and i64 %[[N:[a-zA-Z0-9]+]], 3
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+; CHECK: vector.body:
+; CHECK: %wide.vec = load <8 x i32>, <8 x i32>* %{{.*}}, align 4
+; CHECK: %strided.vec = shufflevector <8 x i32> %wide.vec, <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: icmp eq i64 %index.next, %n.vec
+; CHECK: middle.block:
+; CHECK: br i1 false, label %for.cond.cleanup, label %scalar.ph
+
+define void @even_load_dynamic_tc(i32* noalias nocapture readonly %A, i32* noalias nocapture %B, i64 %N) {
+entry:
+ br label %for.body
+
+for.cond.cleanup: ; preds = %for.body
+ ret void
+
+for.body: ; preds = %for.body, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
+ %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
+ %tmp = load i32, i32* %arrayidx, align 4
+ %mul = shl nsw i32 %tmp, 1
+ %tmp1 = lshr exact i64 %indvars.iv, 1
+ %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %tmp1
+ store i32 %mul, i32* %arrayidx2, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 2
+ %cmp = icmp ult i64 %indvars.iv.next, %N
+ br i1 %cmp, label %for.body, label %for.cond.cleanup
+}
+
+; Check vectorization on a reverse interleaved load group of factor 2 with 1
+; gap and a reverse interleaved store group of factor 2. The interleaved load
+; group should be removed since it has a gap and is reverse.
+
+; struct pair {
+; int x;
+; int y;
+; };
+;
+; void load_gap_reverse(struct pair *P1, struct pair *P2, int X) {
+; for (int i = 1023; i >= 0; i--) {
+; int a = X + i;
+; int b = A[i].y - i;
+; B[i].x = a;
+; B[i].y = b;
+; }
+; }
+
+; CHECK-LABEL: @load_gap_reverse(
+; CHECK-NOT: %wide.vec = load <8 x i64>, <8 x i64>* %{{.*}}, align 8
+; CHECK-NOT: %strided.vec = shufflevector <8 x i64> %wide.vec, <8 x i64> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+
+%pair = type { i64, i64 }
+define void @load_gap_reverse(%pair* noalias nocapture readonly %P1, %pair* noalias nocapture readonly %P2, i64 %X) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ 1023, %entry ], [ %i.next, %for.body ]
+ %0 = add nsw i64 %X, %i
+ %1 = getelementptr inbounds %pair, %pair* %P1, i64 %i, i32 0
+ %2 = getelementptr inbounds %pair, %pair* %P2, i64 %i, i32 1
+ %3 = load i64, i64* %2, align 8
+ %4 = sub nsw i64 %3, %i
+ store i64 %0, i64* %1, align 8
+ store i64 %4, i64* %2, align 8
+ %i.next = add nsw i64 %i, -1
+ %cond = icmp sgt i64 %i, 0
+ br i1 %cond, label %for.body, label %for.exit
+
+for.exit:
+ ret void
+}
+
; Check vectorization on interleaved access groups identified from mixed
; loads/stores.
; void mixed_load2_store2(int *A, int *B) {
@@ -462,4 +555,309 @@ for.body: ; preds = %for.body, %entry
br i1 %exitcond, label %for.cond.cleanup, label %for.body
}
+; Check vectorization of interleaved access groups in the presence of
+; dependences (PR27626). The following tests check that we don't reorder
+; dependent loads and stores when generating code for interleaved access
+; groups. Stores should be scalarized because the required code motion would
+; break dependences, and the remaining interleaved load groups should have
+; gaps.
+
+; PR27626_0: Ensure a strided store is not moved after a dependent (zero
+; distance) strided load.
+
+; void PR27626_0(struct pair *p, int z, int n) {
+; for (int i = 0; i < n; i++) {
+; p[i].x = z;
+; p[i].y = p[i].x;
+; }
+; }
+
+; CHECK-LABEL: @PR27626_0(
+; CHECK: min.iters.checked:
+; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+; CHECK: vector.body:
+; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
+; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1]], i32 0
+; CHECK: store i32 %[[X1]], {{.*}}
+; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1]], i32 2
+; CHECK: store i32 %[[X2]], {{.*}}
+; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1]], i32 4
+; CHECK: store i32 %[[X3]], {{.*}}
+; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1]], i32 6
+; CHECK: store i32 %[[X4]], {{.*}}
+
+%pair.i32 = type { i32, i32 }
+define void @PR27626_0(%pair.i32 *%p, i32 %z, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
+ %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
+ store i32 %z, i32* %p_i.x, align 4
+ %0 = load i32, i32* %p_i.x, align 4
+ store i32 %0, i32 *%p_i.y, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; PR27626_1: Ensure a strided load is not moved before a dependent (zero
+; distance) strided store.
+
+; void PR27626_1(struct pair *p, int n) {
+; int s = 0;
+; for (int i = 0; i < n; i++) {
+; p[i].y = p[i].x;
+; s += p[i].y
+; }
+; }
+
+; CHECK-LABEL: @PR27626_1(
+; CHECK: min.iters.checked:
+; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+; CHECK: vector.body:
+; CHECK: %[[Phi:.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ {{.*}}, %vector.body ]
+; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
+; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 0
+; CHECK: store i32 %[[X1:.+]], {{.*}}
+; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 2
+; CHECK: store i32 %[[X2:.+]], {{.*}}
+; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 4
+; CHECK: store i32 %[[X3:.+]], {{.*}}
+; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6
+; CHECK: store i32 %[[X4:.+]], {{.*}}
+; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
+; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: add nsw <4 x i32> %[[S1]], %[[Phi]]
+
+define i32 @PR27626_1(%pair.i32 *%p, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %s = phi i32 [ %2, %for.body ], [ 0, %entry ]
+ %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
+ %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
+ %0 = load i32, i32* %p_i.x, align 4
+ store i32 %0, i32* %p_i.y, align 4
+ %1 = load i32, i32* %p_i.y, align 4
+ %2 = add nsw i32 %1, %s
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ %3 = phi i32 [ %2, %for.body ]
+ ret i32 %3
+}
+
+; PR27626_2: Ensure a strided store is not moved after a dependent (negative
+; distance) strided load.
+
+; void PR27626_2(struct pair *p, int z, int n) {
+; for (int i = 0; i < n; i++) {
+; p[i].x = z;
+; p[i].y = p[i - 1].x;
+; }
+; }
+
+; CHECK-LABEL: @PR27626_2(
+; CHECK: min.iters.checked:
+; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+; CHECK: vector.body:
+; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
+; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1]], i32 0
+; CHECK: store i32 %[[X1]], {{.*}}
+; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1]], i32 2
+; CHECK: store i32 %[[X2]], {{.*}}
+; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1]], i32 4
+; CHECK: store i32 %[[X3]], {{.*}}
+; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1]], i32 6
+; CHECK: store i32 %[[X4]], {{.*}}
+
+define void @PR27626_2(%pair.i32 *%p, i64 %n, i32 %z) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %i_minus_1 = add nuw nsw i64 %i, -1
+ %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
+ %p_i_minus_1.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_minus_1, i32 0
+ %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
+ store i32 %z, i32* %p_i.x, align 4
+ %0 = load i32, i32* %p_i_minus_1.x, align 4
+ store i32 %0, i32 *%p_i.y, align 4
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; PR27626_3: Ensure a strided load is not moved before a dependent (negative
+; distance) strided store.
+
+; void PR27626_3(struct pair *p, int z, int n) {
+; for (int i = 0; i < n; i++) {
+; p[i + 1].y = p[i].x;
+; s += p[i].y;
+; }
+; }
+
+; CHECK-LABEL: @PR27626_3(
+; CHECK: min.iters.checked:
+; CHECK: %n.mod.vf = and i64 %[[N:.+]], 3
+; CHECK: %[[IsZero:[a-zA-Z0-9]+]] = icmp eq i64 %n.mod.vf, 0
+; CHECK: %[[R:[a-zA-Z0-9]+]] = select i1 %[[IsZero]], i64 4, i64 %n.mod.vf
+; CHECK: %n.vec = sub i64 %[[N]], %[[R]]
+; CHECK: vector.body:
+; CHECK: %[[Phi:.+]] = phi <4 x i32> [ zeroinitializer, %vector.ph ], [ {{.*}}, %vector.body ]
+; CHECK: %[[L1:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
+; CHECK: %[[X1:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 0
+; CHECK: store i32 %[[X1:.+]], {{.*}}
+; CHECK: %[[X2:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 2
+; CHECK: store i32 %[[X2:.+]], {{.*}}
+; CHECK: %[[X3:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 4
+; CHECK: store i32 %[[X3:.+]], {{.*}}
+; CHECK: %[[X4:.+]] = extractelement <8 x i32> %[[L1:.+]], i32 6
+; CHECK: store i32 %[[X4:.+]], {{.*}}
+; CHECK: %[[L2:.+]] = load <8 x i32>, <8 x i32>* {{.*}}
+; CHECK: %[[S1:.+]] = shufflevector <8 x i32> %[[L2]], <8 x i32> undef, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
+; CHECK: add nsw <4 x i32> %[[S1]], %[[Phi]]
+
+define i32 @PR27626_3(%pair.i32 *%p, i64 %n, i32 %z) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %s = phi i32 [ %2, %for.body ], [ 0, %entry ]
+ %i_plus_1 = add nuw nsw i64 %i, 1
+ %p_i.x = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 0
+ %p_i.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
+ %p_i_plus_1.y = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i_plus_1, i32 1
+ %0 = load i32, i32* %p_i.x, align 4
+ store i32 %0, i32* %p_i_plus_1.y, align 4
+ %1 = load i32, i32* %p_i.y, align 4
+ %2 = add nsw i32 %1, %s
+ %i.next = add nuw nsw i64 %i, 1
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ %3 = phi i32 [ %2, %for.body ]
+ ret i32 %3
+}
+
+; PR27626_4: Ensure we form an interleaved group for strided stores in the
+; presence of a write-after-write dependence. We create a group for
+; (2) and (3) while excluding (1).
+
+; void PR27626_4(int *a, int x, int y, int z, int n) {
+; for (int i = 0; i < n; i += 2) {
+; a[i] = x; // (1)
+; a[i] = y; // (2)
+; a[i + 1] = z; // (3)
+; }
+; }
+
+; CHECK-LABEL: @PR27626_4(
+; CHECK: vector.ph:
+; CHECK: %[[INS_Y:.+]] = insertelement <4 x i32> undef, i32 %y, i32 0
+; CHECK: %[[SPLAT_Y:.+]] = shufflevector <4 x i32> %[[INS_Y]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK: %[[INS_Z:.+]] = insertelement <4 x i32> undef, i32 %z, i32 0
+; CHECK: %[[SPLAT_Z:.+]] = shufflevector <4 x i32> %[[INS_Z]], <4 x i32> undef, <4 x i32> zeroinitializer
+; CHECK: vector.body:
+; CHECK: store i32 %x, {{.*}}
+; CHECK: store i32 %x, {{.*}}
+; CHECK: store i32 %x, {{.*}}
+; CHECK: store i32 %x, {{.*}}
+; CHECK: %[[VEC:.+]] = shufflevector <4 x i32> %[[SPLAT_Y]], <4 x i32> %[[SPLAT_Z]], <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7>
+; CHECK: store <8 x i32> %[[VEC]], {{.*}}
+
+define void @PR27626_4(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
+ %i_plus_1 = add i64 %i, 1
+ %a_i = getelementptr inbounds i32, i32* %a, i64 %i
+ %a_i_plus_1 = getelementptr inbounds i32, i32* %a, i64 %i_plus_1
+ store i32 %x, i32* %a_i, align 4
+ store i32 %y, i32* %a_i, align 4
+ store i32 %z, i32* %a_i_plus_1, align 4
+ %i.next = add nuw nsw i64 %i, 2
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
+; PR27626_5: Ensure we do not form an interleaved group for strided stores in
+; the presence of a write-after-write dependence.
+
+; void PR27626_5(int *a, int x, int y, int z, int n) {
+; for (int i = 3; i < n; i += 2) {
+; a[i - 1] = x;
+; a[i - 3] = y;
+; a[i] = z;
+; }
+; }
+
+; CHECK-LABEL: @PR27626_5(
+; CHECK: vector.body:
+; CHECK: store i32 %x, {{.*}}
+; CHECK: store i32 %x, {{.*}}
+; CHECK: store i32 %x, {{.*}}
+; CHECK: store i32 %x, {{.*}}
+; CHECK: store i32 %y, {{.*}}
+; CHECK: store i32 %y, {{.*}}
+; CHECK: store i32 %y, {{.*}}
+; CHECK: store i32 %y, {{.*}}
+; CHECK: store i32 %z, {{.*}}
+; CHECK: store i32 %z, {{.*}}
+; CHECK: store i32 %z, {{.*}}
+; CHECK: store i32 %z, {{.*}}
+
+define void @PR27626_5(i32 *%a, i32 %x, i32 %y, i32 %z, i64 %n) {
+entry:
+ br label %for.body
+
+for.body:
+ %i = phi i64 [ %i.next, %for.body ], [ 3, %entry ]
+ %i_minus_1 = sub i64 %i, 1
+ %i_minus_3 = sub i64 %i_minus_1, 2
+ %a_i = getelementptr inbounds i32, i32* %a, i64 %i
+ %a_i_minus_1 = getelementptr inbounds i32, i32* %a, i64 %i_minus_1
+ %a_i_minus_3 = getelementptr inbounds i32, i32* %a, i64 %i_minus_3
+ store i32 %x, i32* %a_i_minus_1, align 4
+ store i32 %y, i32* %a_i_minus_3, align 4
+ store i32 %z, i32* %a_i, align 4
+ %i.next = add nuw nsw i64 %i, 2
+ %cond = icmp slt i64 %i.next, %n
+ br i1 %cond, label %for.body, label %for.end
+
+for.end:
+ ret void
+}
+
attributes #0 = { "unsafe-fp-math"="true" }
diff --git a/test/Transforms/LoopVectorize/iv_outside_user.ll b/test/Transforms/LoopVectorize/iv_outside_user.ll
new file mode 100644
index 000000000000..d536d1023f41
--- /dev/null
+++ b/test/Transforms/LoopVectorize/iv_outside_user.ll
@@ -0,0 +1,135 @@
+; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s | FileCheck %s
+
+; CHECK-LABEL: @postinc
+; CHECK-LABEL: scalar.ph:
+; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %n.vec, %middle.block ]
+; CHECK: ret i32 %[[RET]]
+define i32 @postinc(i32 %k) {
+entry:
+ br label %for.body
+
+for.body:
+ %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %inc = add nsw i32 %inc.phi, 1
+ %cmp = icmp eq i32 %inc, %k
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+ ret i32 %inc
+}
+
+; CHECK-LABEL: @preinc
+; CHECK-LABEL: middle.block:
+; CHECK: %[[v3:.+]] = sub i32 %n.vec, 1
+; CHECK: %ind.escape = add i32 0, %[[v3]]
+; CHECK-LABEL: scalar.ph:
+; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: ret i32 %[[RET]]
+define i32 @preinc(i32 %k) {
+entry:
+ br label %for.body
+
+for.body:
+ %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %inc = add nsw i32 %inc.phi, 1
+ %cmp = icmp eq i32 %inc, %k
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+ ret i32 %inc.phi
+}
+
+; CHECK-LABEL: @constpre
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32 [ {{.*}}, %for.body ], [ 2, %middle.block ]
+; CHECK: ret i32 %[[RET]]
+define i32 @constpre() {
+entry:
+ br label %for.body
+
+for.body:
+ %inc.phi = phi i32 [ 32, %entry ], [ %inc, %for.body ]
+ %inc = sub nsw i32 %inc.phi, 2
+ %cmp = icmp eq i32 %inc, 0
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+ ret i32 %inc.phi
+}
+
+; CHECK-LABEL: @geppre
+; CHECK-LABEL: middle.block:
+; CHECK: %ind.escape = getelementptr i32, i32* %ptr, i64 124
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32* [ {{.*}}, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: ret i32* %[[RET]]
+define i32* @geppre(i32* %ptr) {
+entry:
+ br label %for.body
+
+for.body:
+ %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %ptr.phi = phi i32* [ %ptr, %entry ], [ %inc.ptr, %for.body ]
+ %inc = add nsw i32 %inc.phi, 1
+ %inc.ptr = getelementptr i32, i32* %ptr.phi, i32 4
+ %cmp = icmp eq i32 %inc, 32
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+ ret i32* %ptr.phi
+}
+
+; CHECK-LABEL: @both
+; CHECK-LABEL: middle.block:
+; CHECK: %[[END:.*]] = sub i64 %n.vec, 1
+; CHECK: %ind.escape = getelementptr i32, i32* %base, i64 %[[END]]
+; CHECK-LABEL: for.end:
+; CHECK: %[[RET:.*]] = phi i32* [ %inc.lag1, %for.body ], [ %ind.escape, %middle.block ]
+; CHECK: ret i32* %[[RET]]
+
+define i32* @both(i32 %k) {
+entry:
+ %base = getelementptr inbounds i32, i32* undef, i64 1
+ br label %for.body
+
+for.body:
+ %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %inc.lag1 = phi i32* [ %base, %entry ], [ %tmp, %for.body]
+ %inc.lag2 = phi i32* [ undef, %entry ], [ %inc.lag1, %for.body]
+ %tmp = getelementptr inbounds i32, i32* %inc.lag1, i64 1
+ %inc = add nsw i32 %inc.phi, 1
+ %cmp = icmp eq i32 %inc, %k
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+ ret i32* %inc.lag1
+}
+
+; CHECK-LABEL: @multiphi
+; CHECK-LABEL: scalar.ph:
+; CHECK: %bc.resume.val = phi i32 [ %n.vec, %middle.block ], [ 0, %entry ]
+; CHECK-LABEL: for.end:
+; CHECK: %phi = phi i32 [ {{.*}}, %for.body ], [ %n.vec, %middle.block ]
+; CHECK: %phi2 = phi i32 [ {{.*}}, %for.body ], [ %n.vec, %middle.block ]
+; CHECK: store i32 %phi2, i32* %p
+; CHECK: ret i32 %phi
+define i32 @multiphi(i32 %k, i32* %p) {
+entry:
+ br label %for.body
+
+for.body:
+ %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
+ %inc = add nsw i32 %inc.phi, 1
+ %cmp = icmp eq i32 %inc, %k
+ br i1 %cmp, label %for.end, label %for.body
+
+for.end:
+ %phi = phi i32 [ %inc, %for.body ]
+ %phi2 = phi i32 [ %inc, %for.body ]
+ store i32 %phi2, i32* %p
+ ret i32 %phi
+}
diff --git a/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll b/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
new file mode 100644
index 000000000000..adadbfc9e1dc
--- /dev/null
+++ b/test/Transforms/LoopVectorize/multiple-strides-vectorization.ll
@@ -0,0 +1,65 @@
+; RUN: opt -loop-vectorize -force-vector-width=4 -S < %s | FileCheck %s
+
+; This is the test case from PR26314.
+; When we were retrying dependence checking with memchecks only,
+; the loop-invariant access in the inner loop was incorrectly determined to be wrapping
+; because it was not strided in the inner loop.
+; Improved wrapping detection allows vectorization in the following case.
+
+; #define Z 32
+; typedef struct s {
+; int v1[Z];
+; int v2[Z];
+; int v3[Z][Z];
+; } s;
+;
+; void slow_function (s* const obj) {
+; for (int j=0; j<Z; j++) {
+; for (int k=0; k<Z; k++) {
+; int x = obj->v1[k] + obj->v2[j];
+; obj->v3[j][k] += x;
+; }
+; }
+; }
+
+; CHECK-LABEL: Test
+; CHECK: <4 x i64>
+; CHECK: <4 x i32>, <4 x i32>
+; CHECK: llvm.loop.vectorize.width
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.s = type { [32 x i32], [32 x i32], [32 x [32 x i32]] }
+
+define void @Test(%struct.s* nocapture %obj) #0 {
+ br label %.outer.preheader
+
+
+.outer.preheader:
+ %i = phi i64 [ 0, %0 ], [ %i.next, %.outer ]
+ %1 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 1, i64 %i
+ br label %.inner
+
+.exit:
+ ret void
+
+.outer:
+ %i.next = add nuw nsw i64 %i, 1
+ %exitcond.outer = icmp eq i64 %i.next, 32
+ br i1 %exitcond.outer, label %.exit, label %.outer.preheader
+
+.inner:
+ %j = phi i64 [ 0, %.outer.preheader ], [ %j.next, %.inner ]
+ %2 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 0, i64 %j
+ %3 = load i32, i32* %2
+ %4 = load i32, i32* %1
+ %5 = add nsw i32 %4, %3
+ %6 = getelementptr inbounds %struct.s, %struct.s* %obj, i64 0, i32 2, i64 %i, i64 %j
+ %7 = load i32, i32* %6
+ %8 = add nsw i32 %5, %7
+ store i32 %8, i32* %6
+ %j.next = add nuw nsw i64 %j, 1
+ %exitcond.inner = icmp eq i64 %j.next, 32
+ br i1 %exitcond.inner, label %.outer, label %.inner
+}
diff --git a/test/Transforms/LoopVectorize/no_array_bounds.ll b/test/Transforms/LoopVectorize/no_array_bounds.ll
index 13cec71fc455..44412bce27fb 100644
--- a/test/Transforms/LoopVectorize/no_array_bounds.ll
+++ b/test/Transforms/LoopVectorize/no_array_bounds.ll
@@ -72,11 +72,10 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "test", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 2, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "no_array_bounds.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/no_outside_user.ll b/test/Transforms/LoopVectorize/no_outside_user.ll
index 2683b42dc717..39363f6034c4 100644
--- a/test/Transforms/LoopVectorize/no_outside_user.ll
+++ b/test/Transforms/LoopVectorize/no_outside_user.ll
@@ -1,7 +1,6 @@
; RUN: opt -S -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 < %s 2>&1 | FileCheck %s
; CHECK: remark: {{.*}}: loop not vectorized: value could not be identified as an induction or reduction variable
-; CHECK: remark: {{.*}}: loop not vectorized: use of induction value outside of the loop is not handled by vectorizer
target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S128"
@@ -41,34 +40,3 @@ f1.exit.loopexit:
%.lcssa = phi i32 [ %tmp17, %bb16 ]
ret i32 %.lcssa
}
-
-; Don't vectorize this loop. Its phi node (induction variable) has an outside
-; loop user. We currently don't handle this case.
-; PR17179
-
-; CHECK-LABEL: @test2(
-; CHECK-NOT: <2 x
-
-@x1 = common global i32 0, align 4
-@x2 = common global i32 0, align 4
-@x0 = common global i32 0, align 4
-
-define i32 @test2() {
-entry:
- store i32 0, i32* @x1, align 4
- %0 = load i32, i32* @x0, align 4
- br label %for.cond1.preheader
-
-for.cond1.preheader:
- %inc7 = phi i32 [ 0, %entry ], [ %inc, %for.cond1.preheader ]
- %inc = add nsw i32 %inc7, 1
- %cmp = icmp eq i32 %inc, 52
- br i1 %cmp, label %for.end5, label %for.cond1.preheader
-
-for.end5:
- %inc7.lcssa = phi i32 [ %inc7, %for.cond1.preheader ]
- %xor = xor i32 %inc7.lcssa, %0
- store i32 52, i32* @x1, align 4
- store i32 1, i32* @x2, align 4
- ret i32 %xor
-}
diff --git a/test/Transforms/LoopVectorize/no_switch.ll b/test/Transforms/LoopVectorize/no_switch.ll
index 842d262d3192..181304a409ee 100644
--- a/test/Transforms/LoopVectorize/no_switch.ll
+++ b/test/Transforms/LoopVectorize/no_switch.ll
@@ -67,11 +67,10 @@ attributes #0 = { nounwind }
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: 2, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5.0", isOptimized: true, runtimeVersion: 6, emissionKind: LineTablesOnly, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "source.cpp", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "test_switch", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "source.cpp", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 2}
diff --git a/test/Transforms/LoopVectorize/noalias-md-licm.ll b/test/Transforms/LoopVectorize/noalias-md-licm.ll
new file mode 100644
index 000000000000..233d530dc102
--- /dev/null
+++ b/test/Transforms/LoopVectorize/noalias-md-licm.ll
@@ -0,0 +1,59 @@
+; RUN: opt -basicaa -scoped-noalias -loop-vectorize -licm -force-vector-width=2 \
+; RUN: -force-vector-interleave=1 -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; In order to vectorize the inner loop, it needs to be versioned with
+; memchecks between {A} x {B, C} first:
+;
+; for (i = 0; i < n; i++)
+; for (j = 0; j < m; j++)
+; A[j] += B[i] + C[j];
+;
+; Since in the versioned vector loop A and B can no longer alias, B[i] can be
+; LICM'ed from the inner loop.
+
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+ br label %outer
+
+outer:
+ %i.2 = phi i64 [ 0, %entry ], [ %i, %inner.end ]
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %i.2
+ br label %inner.ph
+
+inner.ph:
+; CHECK: vector.ph:
+; CHECK: load i32, i32* %arrayidxB,
+; CHECK: br label %vector.body
+ br label %inner
+
+inner:
+ %j.2 = phi i64 [ 0, %inner.ph ], [ %j, %inner ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %j.2
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %j.2
+ %loadC = load i32, i32* %arrayidxC, align 4
+
+ %add = add nuw i32 %loadA, %loadB
+ %add2 = add nuw i32 %add, %loadC
+
+ store i32 %add2, i32* %arrayidxA, align 4
+
+ %j = add nuw nsw i64 %j.2, 1
+ %cond1 = icmp eq i64 %j, 20
+ br i1 %cond1, label %inner.end, label %inner
+
+inner.end:
+ %i = add nuw nsw i64 %i.2, 1
+ %cond2 = icmp eq i64 %i, 30
+ br i1 %cond2, label %outer.end, label %outer
+
+outer.end:
+ ret void
+}
diff --git a/test/Transforms/LoopVectorize/noalias-md.ll b/test/Transforms/LoopVectorize/noalias-md.ll
new file mode 100644
index 000000000000..787ea88f9457
--- /dev/null
+++ b/test/Transforms/LoopVectorize/noalias-md.ll
@@ -0,0 +1,78 @@
+; RUN: opt -basicaa -loop-vectorize -force-vector-width=2 \
+; RUN: -force-vector-interleave=1 -S < %s \
+; RUN: | FileCheck %s -check-prefix=BOTH -check-prefix=LV
+; RUN: opt -basicaa -scoped-noalias -loop-vectorize -dse -force-vector-width=2 \
+; RUN: -force-vector-interleave=1 -S < %s \
+; RUN: | FileCheck %s -check-prefix=BOTH -check-prefix=DSE
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; This loop needs to be versioned with memchecks between {A, B} x {C} before
+; it can be vectorized.
+;
+; for (i = 0; i < n; i++) {
+; C[i] = A[i] + 1;
+; C[i] += B[i];
+; }
+;
+; Check that the corresponding noalias metadata is added to the vector loop
+; but not to the scalar loop.
+;
+; Since in the versioned vector loop C and B can no longer alias, the first
+; store to C[i] can be DSE'd.
+
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+ br label %for.body
+
+; BOTH: vector.memcheck:
+; BOTH: vector.body:
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %inc, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+; Scope 1
+; LV: = load {{.*}} !alias.scope !0
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %add = add nuw i32 %loadA, 2
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+; Noalias with scope 1 and 6
+; LV: store {{.*}} !alias.scope !3, !noalias !5
+; DSE-NOT: store
+ store i32 %add, i32* %arrayidxC, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+; Scope 6
+; LV: = load {{.*}} !alias.scope !7
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %add2 = add nuw i32 %add, %loadB
+
+; Noalias with scope 1 and 6
+; LV: store {{.*}} !alias.scope !3, !noalias !5
+; DSE: store
+ store i32 %add2, i32* %arrayidxC, align 4
+
+ %inc = add nuw nsw i64 %ind, 1
+ %exitcond = icmp eq i64 %inc, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+; BOTH: for.body:
+; BOTH-NOT: !alias.scope
+; BOTH-NOT: !noalias
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; LV: !0 = !{!1}
+; LV: !1 = distinct !{!1, !2}
+; LV: !2 = distinct !{!2, !"LVerDomain"}
+; LV: !3 = !{!4}
+; LV: !4 = distinct !{!4, !2}
+; LV: !5 = !{!1, !6}
+; LV: !6 = distinct !{!6, !2}
+; LV: !7 = !{!6}
diff --git a/test/Transforms/LoopVectorize/phi-hang.ll b/test/Transforms/LoopVectorize/phi-hang.ll
index bbce239afa71..eb1aaeffde87 100644
--- a/test/Transforms/LoopVectorize/phi-hang.ll
+++ b/test/Transforms/LoopVectorize/phi-hang.ll
@@ -18,7 +18,7 @@ bb4: ; preds = %bb3
bb5: ; preds = %bb4, %bb1
%tmp6 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
- %tmp7 = phi i32 [ 0, %bb4 ], [ %tmp6, %bb1 ]
+ %tmp7 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
%tmp8 = phi i32 [ 0, %bb4 ], [ %tmp, %bb1 ]
%tmp9 = add nsw i32 %tmp2, 1
%tmp10 = icmp eq i32 %tmp9, 0
diff --git a/test/Transforms/LoopVectorize/pr25281.ll b/test/Transforms/LoopVectorize/pr25281.ll
new file mode 100644
index 000000000000..6001a200c94e
--- /dev/null
+++ b/test/Transforms/LoopVectorize/pr25281.ll
@@ -0,0 +1,59 @@
+; RUN: opt < %s -scev-aa -loop-vectorize -print-alias-sets -S -o - 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; PR25281
+; Just check that we don't crash on this test.
+; CHECK-LABEL: @foo
+define void @foo(float** noalias nocapture readonly %in, i32* noalias nocapture readonly %isCompressed, float* noalias nocapture readonly %out) {
+entry_block:
+ %tmp = getelementptr float*, float** %in, i32 0
+ %in_0 = load float*, float** %tmp, !alias.scope !0
+ %tmp1 = getelementptr i32, i32* %isCompressed, i32 0
+ %isCompressed_0 = load i32, i32* %tmp1, !alias.scope !1
+ %tmp2 = getelementptr float*, float** %in, i32 1
+ %in_1 = load float*, float** %tmp2, !alias.scope !2
+ %tmp3 = getelementptr i32, i32* %isCompressed, i32 1
+ %isCompressed_1 = load i32, i32* %tmp3, !alias.scope !3
+ br label %for_each_frames
+
+for_each_frames:
+ %frameIndex = phi i32 [ 0, %entry_block ], [ %nextFrameIndex, %for_each_frames_end ]
+ %nextFrameIndex = add nuw nsw i32 %frameIndex, 2
+ br label %for_each_channel
+
+for_each_channel:
+ %channelIndex = phi i32 [ 0, %for_each_frames ], [ %nextChannelIndex, %for_each_channel ]
+ %nextChannelIndex = add nuw nsw i32 %channelIndex, 1
+ %tmp4 = add i32 %frameIndex, %channelIndex
+ %tmp5 = xor i32 %isCompressed_0, 1
+ %tmp6 = mul i32 %frameIndex, %tmp5
+ %offset0 = add i32 %tmp6, %channelIndex
+ %tmp7 = getelementptr float, float* %in_0, i32 %offset0
+ %in_0_index = load float, float* %tmp7, align 4, !alias.scope !4
+ %tmp8 = xor i32 %isCompressed_1, 1
+ %tmp9 = mul i32 %frameIndex, %tmp8
+ %offset1 = add i32 %tmp9, %channelIndex
+ %tmp10 = getelementptr float, float* %in_1, i32 %offset1
+ %in_1_index = load float, float* %tmp10, align 4, !alias.scope !5
+ %tmp11 = fadd float %in_0_index, %in_1_index
+ %tmp12 = getelementptr float, float* %out, i32 %tmp4
+ store float %tmp11, float* %tmp12, align 4, !alias.noalias !6
+ %tmp13 = icmp eq i32 %nextChannelIndex, 2
+ br i1 %tmp13, label %for_each_frames_end, label %for_each_channel
+
+for_each_frames_end:
+ %tmp14 = icmp eq i32 %nextFrameIndex, 512
+ br i1 %tmp14, label %return, label %for_each_frames
+
+return:
+ ret void
+}
+
+!0 = distinct !{!0}
+!1 = distinct !{!1, !0}
+!2 = distinct !{!2, !0}
+!3 = distinct !{!3, !0}
+!4 = distinct !{!4, !0}
+!5 = distinct !{!5, !0}
+!6 = !{!2, !3, !4, !5, !1}
diff --git a/test/Transforms/LoopVectorize/reverse_induction.ll b/test/Transforms/LoopVectorize/reverse_induction.ll
index 88dd2e4d66ca..24ffb6167de3 100644
--- a/test/Transforms/LoopVectorize/reverse_induction.ll
+++ b/test/Transforms/LoopVectorize/reverse_induction.ll
@@ -5,9 +5,16 @@ target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f3
; Make sure consecutive vector generates correct negative indices.
; PR15882
-; CHECK-LABEL: @reverse_induction_i64(
-; CHECK: add <4 x i64> %[[SPLAT:.*]], <i64 0, i64 -1, i64 -2, i64 -3>
-; CHECK: add <4 x i64> %[[SPLAT]], <i64 -4, i64 -5, i64 -6, i64 -7>
+; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i64 %startval, %index
+; CHECK: %[[a0:.+]] = add i64 %offset.idx, 0
+; CHECK: %[[a1:.+]] = add i64 %offset.idx, -1
+; CHECK: %[[a2:.+]] = add i64 %offset.idx, -2
+; CHECK: %[[a3:.+]] = add i64 %offset.idx, -3
+; CHECK: %[[a4:.+]] = add i64 %offset.idx, -4
+; CHECK: %[[a5:.+]] = add i64 %offset.idx, -5
+; CHECK: %[[a6:.+]] = add i64 %offset.idx, -6
+; CHECK: %[[a7:.+]] = add i64 %offset.idx, -7
define i32 @reverse_induction_i64(i64 %startval, i32 * %ptr) {
entry:
@@ -30,8 +37,17 @@ loopend:
}
; CHECK-LABEL: @reverse_induction_i128(
-; CHECK: add <4 x i128> %[[SPLAT:.*]], <i128 0, i128 -1, i128 -2, i128 -3>
-; CHECK: add <4 x i128> %[[SPLAT]], <i128 -4, i128 -5, i128 -6, i128 -7>
+; CHECK: %index = phi i128 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i128 %startval, %index
+; CHECK: %[[a0:.+]] = add i128 %offset.idx, 0
+; CHECK: %[[a1:.+]] = add i128 %offset.idx, -1
+; CHECK: %[[a2:.+]] = add i128 %offset.idx, -2
+; CHECK: %[[a3:.+]] = add i128 %offset.idx, -3
+; CHECK: %[[a4:.+]] = add i128 %offset.idx, -4
+; CHECK: %[[a5:.+]] = add i128 %offset.idx, -5
+; CHECK: %[[a6:.+]] = add i128 %offset.idx, -6
+; CHECK: %[[a7:.+]] = add i128 %offset.idx, -7
+
define i32 @reverse_induction_i128(i128 %startval, i32 * %ptr) {
entry:
br label %for.body
@@ -53,8 +69,16 @@ loopend:
}
; CHECK-LABEL: @reverse_induction_i16(
-; CHECK: add <4 x i16> %[[SPLAT:.*]], <i16 0, i16 -1, i16 -2, i16 -3>
-; CHECK: add <4 x i16> %[[SPLAT]], <i16 -4, i16 -5, i16 -6, i16 -7>
+; CHECK: %index = phi i32 [ 0, %vector.ph ], [ %index.next, %vector.body ]
+; CHECK: %offset.idx = sub i16 %startval, {{.*}}
+; CHECK: %[[a0:.+]] = add i16 %offset.idx, 0
+; CHECK: %[[a1:.+]] = add i16 %offset.idx, -1
+; CHECK: %[[a2:.+]] = add i16 %offset.idx, -2
+; CHECK: %[[a3:.+]] = add i16 %offset.idx, -3
+; CHECK: %[[a4:.+]] = add i16 %offset.idx, -4
+; CHECK: %[[a5:.+]] = add i16 %offset.idx, -5
+; CHECK: %[[a6:.+]] = add i16 %offset.idx, -6
+; CHECK: %[[a7:.+]] = add i16 %offset.idx, -7
define i32 @reverse_induction_i16(i16 %startval, i32 * %ptr) {
entry:
@@ -96,7 +120,8 @@ loopend:
; CHECK-LABEL: @reverse_forward_induction_i64_i8(
; CHECK: vector.body
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %offset.idx = sub i64 1023, %index
+; CHECK: %vec.ind = phi <4 x i64> [ <i64 1023, i64 1022, i64 1021, i64 1020>, %vector.ph ]
+; CHECK: %step.add = add <4 x i64> %vec.ind, <i64 -4, i64 -4, i64 -4, i64 -4>
; CHECK: trunc i64 %index to i8
define void @reverse_forward_induction_i64_i8() {
@@ -122,7 +147,8 @@ while.end:
; CHECK-LABEL: @reverse_forward_induction_i64_i8_signed(
; CHECK: vector.body:
; CHECK: %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
-; CHECK: %offset.idx = sub i64 1023, %index
+; CHECK: %vec.ind = phi <4 x i64> [ <i64 1023, i64 1022, i64 1021, i64 1020>, %vector.ph ]
+; CHECK: %step.add = add <4 x i64> %vec.ind, <i64 -4, i64 -4, i64 -4, i64 -4>
define void @reverse_forward_induction_i64_i8_signed() {
entry:
diff --git a/test/Transforms/LoopVectorize/runtime-check.ll b/test/Transforms/LoopVectorize/runtime-check.ll
index 3673b71db30d..2bd8b43820df 100644
--- a/test/Transforms/LoopVectorize/runtime-check.ll
+++ b/test/Transforms/LoopVectorize/runtime-check.ll
@@ -67,13 +67,20 @@ loopexit:
; CHECK: [[BODY_LOC]] = !DILocation(line: 101, column: 1, scope: !{{.*}})
!llvm.module.flags = !{!0, !1}
+!llvm.dbg.cu = !{!9}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 2, !"Debug Info Version", i32 3}
!2 = !{}
!3 = !DISubroutineType(types: !2)
!4 = !DIFile(filename: "test.cpp", directory: "/tmp")
-!5 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!5 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 99, type: !3, isLocal: false, isDefinition: true, scopeLine: 100, flags: DIFlagPrototyped, isOptimized: false, unit: !9, variables: !2)
!6 = !DILocation(line: 100, column: 1, scope: !5)
!7 = !DILocation(line: 101, column: 1, scope: !5)
!8 = !DILocation(line: 102, column: 1, scope: !5)
+!9 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !10,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
+!10 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
+!11 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/LoopVectorize/same-base-access.ll b/test/Transforms/LoopVectorize/same-base-access.ll
index 31cff0ee653f..53fad8afdad8 100644
--- a/test/Transforms/LoopVectorize/same-base-access.ll
+++ b/test/Transforms/LoopVectorize/same-base-access.ll
@@ -62,11 +62,9 @@ define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
}
-
-; We don't vectorize this function because A[i*7] is scalarized, and the
-; different scalars can in theory wrap around and overwrite other scalar
-; elements. At the moment we only allow read/write access to arrays
-; that are consecutive.
+; A[i*7] is scalarized, and the different scalars can in theory wrap
+; around and overwrite other scalar elements. However we can still
+; vectorize because we can version the loop to avoid this case.
;
; void foo(int *a) {
; for (int i=0; i<256; ++i) {
@@ -78,7 +76,7 @@ define i32 @kernel11(double* %x, double* %y, i32 %n) nounwind uwtable ssp {
; }
; CHECK-LABEL: @func2(
-; CHECK-NOT: <4 x i32>
+; CHECK: <4 x i32>
; CHECK: ret
define i32 @func2(i32* nocapture %a) nounwind uwtable ssp {
br label %1
diff --git a/test/Transforms/LoopVectorize/unsafe-dep-remark.ll b/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
new file mode 100644
index 000000000000..5fff82554f4c
--- /dev/null
+++ b/test/Transforms/LoopVectorize/unsafe-dep-remark.ll
@@ -0,0 +1,74 @@
+; RUN: opt -loop-vectorize -force-vector-width=2 -pass-remarks-analysis=loop-vectorize < %s 2>&1 | FileCheck %s
+
+; ModuleID = '/tmp/kk.c'
+source_filename = "/tmp/kk.c"
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; 1 void success (char *A, char *B, char *C, char *D, char *E, int N) {
+; 2 for(int i = 0; i < N; i++) {
+; 3 A[i + 1] = A[i] + B[i];
+; 4 C[i] = D[i] * E[i];
+; 5 }
+; 6 }
+
+; CHECK: remark: /tmp/kk.c:3:16: loop not vectorized: unsafe dependent memory operations in loop. Use #pragma loop distribute(enable) to allow loop distribution to attempt to isolate the offending operations into a separate loop
+
+define void @success(i8* nocapture %A, i8* nocapture readonly %B, i8* nocapture %C, i8* nocapture readonly %D, i8* nocapture readonly %E, i32 %N) !dbg !6 {
+entry:
+ %cmp28 = icmp sgt i32 %N, 0, !dbg !8
+ br i1 %cmp28, label %for.body, label %for.cond.cleanup, !dbg !9
+
+for.body: ; preds = %entry, %for.body
+ %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ]
+ %arrayidx = getelementptr inbounds i8, i8* %A, i64 %indvars.iv, !dbg !11
+ %0 = load i8, i8* %arrayidx, align 1, !dbg !11, !tbaa !12
+ %arrayidx2 = getelementptr inbounds i8, i8* %B, i64 %indvars.iv, !dbg !15
+ %1 = load i8, i8* %arrayidx2, align 1, !dbg !15, !tbaa !12
+ %add = add i8 %1, %0, !dbg !16
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !9
+ %arrayidx7 = getelementptr inbounds i8, i8* %A, i64 %indvars.iv.next, !dbg !17
+ store i8 %add, i8* %arrayidx7, align 1, !dbg !18, !tbaa !12
+ %arrayidx9 = getelementptr inbounds i8, i8* %D, i64 %indvars.iv, !dbg !19
+ %2 = load i8, i8* %arrayidx9, align 1, !dbg !19, !tbaa !12
+ %arrayidx12 = getelementptr inbounds i8, i8* %E, i64 %indvars.iv, !dbg !20
+ %3 = load i8, i8* %arrayidx12, align 1, !dbg !20, !tbaa !12
+ %mul = mul i8 %3, %2, !dbg !21
+ %arrayidx16 = getelementptr inbounds i8, i8* %C, i64 %indvars.iv, !dbg !22
+ store i8 %mul, i8* %arrayidx16, align 1, !dbg !23, !tbaa !12
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32, !dbg !9
+ %exitcond = icmp eq i32 %lftr.wideiv, %N, !dbg !9
+ br i1 %exitcond, label %for.cond.cleanup, label %for.body, !dbg !9
+
+for.cond.cleanup: ; preds = %for.body, %entry
+ ret void, !dbg !10
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.9.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "/tmp/kk.c", directory: "/tmp")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"PIC Level", i32 2}
+!5 = !{!"clang version 3.9.0 "}
+!6 = distinct !DISubprogram(name: "success", scope: !1, file: !1, line: 1, type: !7, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!7 = !DISubroutineType(types: !2)
+!8 = !DILocation(line: 2, column: 20, scope: !6)
+!9 = !DILocation(line: 2, column: 3, scope: !6)
+!10 = !DILocation(line: 6, column: 1, scope: !6)
+!11 = !DILocation(line: 3, column: 16, scope: !6)
+!12 = !{!13, !13, i64 0}
+!13 = !{!"omnipotent char", !14, i64 0}
+!14 = !{!"Simple C/C++ TBAA"}
+!15 = !DILocation(line: 3, column: 23, scope: !6)
+!16 = !DILocation(line: 3, column: 21, scope: !6)
+!17 = !DILocation(line: 3, column: 5, scope: !6)
+!18 = !DILocation(line: 3, column: 14, scope: !6)
+!19 = !DILocation(line: 4, column: 12, scope: !6)
+!20 = !DILocation(line: 4, column: 19, scope: !6)
+!21 = !DILocation(line: 4, column: 17, scope: !6)
+!22 = !DILocation(line: 4, column: 5, scope: !6)
+!23 = !DILocation(line: 4, column: 10, scope: !6)
diff --git a/test/Transforms/LoopVersioning/basic.ll b/test/Transforms/LoopVersioning/basic.ll
new file mode 100644
index 000000000000..f59caecadae7
--- /dev/null
+++ b/test/Transforms/LoopVersioning/basic.ll
@@ -0,0 +1,47 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+; Version this loop with overlap checks between a, c and b, c.
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+ br label %for.body
+
+; CHECK: for.body.lver.check:
+; CHECK: icmp
+; CHECK: icmp
+; CHECK: icmp
+; CHECK: icmp
+; CHECK-NOT: icmp
+; CHECK: br i1 %memcheck.conflict, label %for.body.ph.lver.orig, label %for.body.ph
+
+; CHECK: for.body.ph.lver.orig:
+; CHECK: for.body.lver.orig:
+; CHECK: br i1 %exitcond.lver.orig, label %for.end, label %for.body.lver.orig
+; CHECK: for.body.ph:
+; CHECK: for.body:
+; CHECK: br i1 %exitcond, label %for.end, label %for.body
+; CHECK: for.end:
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulC = mul i32 %loadA, %loadB
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %add = add nuw nsw i64 %ind, 1
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
diff --git a/test/Transforms/LoopVersioning/incorrect-phi.ll b/test/Transforms/LoopVersioning/incorrect-phi.ll
new file mode 100644
index 000000000000..fcecdb349003
--- /dev/null
+++ b/test/Transforms/LoopVersioning/incorrect-phi.ll
@@ -0,0 +1,62 @@
+; RUN: opt -loop-versioning -S < %s | FileCheck %s
+
+; Make sure all PHIs are properly updated in the exit block. Based on
+; PR28037.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@x = external global [2 x [3 x [5 x i16]]]
+
+; CHECK-LABEL: @phi_with_undef
+define void @phi_with_undef() {
+bb6.lr.ph: ; preds = %bb5.preheader
+ br label %bb6
+
+bb6: ; preds = %bb6.lr.ph, %bb6
+ %_tmp1423 = phi i16 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ]
+ %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef
+ %_tmp125 = sext i16 %_tmp1423 to i64
+ %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp125
+ %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef
+ %_tmp130 = load i16, i16* %_tmp129
+ store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef)
+ %_tmp142 = add i16 %_tmp1423, 1
+ br i1 false, label %bb6, label %loop.exit
+
+loop.exit: ; preds = %bb6
+ %_tmp142.lcssa = phi i16 [ %_tmp142, %bb6 ]
+ %split = phi i16 [ undef, %bb6 ]
+; CHECK: %split = phi i16 [ undef, %bb6 ], [ undef, %bb6.lver.orig ]
+ br label %bb9
+
+bb9: ; preds = %bb9.loopexit, %bb1
+ ret void
+}
+
+; CHECK-LABEL: @phi_with_non_loop_defined_value
+define void @phi_with_non_loop_defined_value() {
+bb6.lr.ph: ; preds = %bb5.preheader
+ %t = add i16 1, 1
+ br label %bb6
+
+bb6: ; preds = %bb6.lr.ph, %bb6
+ %_tmp1423 = phi i16 [ undef, %bb6.lr.ph ], [ %_tmp142, %bb6 ]
+ %_tmp123 = getelementptr [2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i16 0, i64 undef
+ %_tmp125 = sext i16 %_tmp1423 to i64
+ %_tmp126 = getelementptr [3 x [5 x i16]], [3 x [5 x i16]]* %_tmp123, i16 0, i64 %_tmp125
+ %_tmp129 = getelementptr [5 x i16], [5 x i16]* %_tmp126, i16 0, i64 undef
+ %_tmp130 = load i16, i16* %_tmp129
+ store i16 undef, i16* getelementptr ([2 x [3 x [5 x i16]]], [2 x [3 x [5 x i16]]]* @x, i64 0, i64 undef, i64 undef, i64 undef)
+ %_tmp142 = add i16 %_tmp1423, 1
+ br i1 false, label %bb6, label %loop.exit
+
+loop.exit: ; preds = %bb6
+ %_tmp142.lcssa = phi i16 [ %_tmp142, %bb6 ]
+ %split = phi i16 [ %t, %bb6 ]
+; CHECK: %split = phi i16 [ %t, %bb6 ], [ %t, %bb6.lver.orig ]
+ br label %bb9
+
+bb9: ; preds = %bb9.loopexit, %bb1
+ ret void
+}
diff --git a/test/Transforms/LoopVersioning/lcssa.ll b/test/Transforms/LoopVersioning/lcssa.ll
new file mode 100644
index 000000000000..2cd4662c371a
--- /dev/null
+++ b/test/Transforms/LoopVersioning/lcssa.ll
@@ -0,0 +1,35 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @fill(i8** %ls1.20, i8** %ls2.21, i8* %cse3.22) {
+; CHECK: bb1.lver.check:
+; CHECK: br i1 %memcheck.conflict, label %bb1.ph.lver.orig, label %bb1.ph
+bb1.ph:
+ %ls1.20.promoted = load i8*, i8** %ls1.20
+ %ls2.21.promoted = load i8*, i8** %ls2.21
+ br label %bb1
+
+bb1:
+ %_tmp302 = phi i8* [ %ls2.21.promoted, %bb1.ph ], [ %_tmp30, %bb1 ]
+ %_tmp281 = phi i8* [ %ls1.20.promoted, %bb1.ph ], [ %_tmp28, %bb1 ]
+ %_tmp14 = getelementptr i8, i8* %_tmp281, i16 -1
+ %_tmp15 = load i8, i8* %_tmp14
+ %add = add i8 %_tmp15, 1
+ store i8 %add, i8* %_tmp281
+ store i8 %add, i8* %_tmp302
+ %_tmp28 = getelementptr i8, i8* %_tmp281, i16 1
+ %_tmp30 = getelementptr i8, i8* %_tmp302, i16 1
+ br i1 false, label %bb1, label %bb3.loopexit
+
+bb3.loopexit:
+ %_tmp30.lcssa = phi i8* [ %_tmp30, %bb1 ]
+ %_tmp15.lcssa = phi i8 [ %_tmp15, %bb1 ]
+ %_tmp28.lcssa = phi i8* [ %_tmp28, %bb1 ]
+ store i8* %_tmp28.lcssa, i8** %ls1.20
+ store i8 %_tmp15.lcssa, i8* %cse3.22
+ store i8* %_tmp30.lcssa, i8** %ls2.21
+ br label %bb3
+
+bb3:
+ ret void
+}
diff --git a/test/Transforms/LoopVersioning/noalias-version-twice.ll b/test/Transforms/LoopVersioning/noalias-version-twice.ll
new file mode 100644
index 000000000000..81ec0c0dc9f6
--- /dev/null
+++ b/test/Transforms/LoopVersioning/noalias-version-twice.ll
@@ -0,0 +1,106 @@
+; RUN: opt -basicaa -loop-distribute -scoped-noalias -loop-versioning -S < %s | FileCheck %s
+
+; Test the metadata generated when versioning an already versioned loop. Here
+; we invoke loop distribution to perform the first round of versioning. It
+; adds memchecks for accesses that can alias across the distribution boundary.
+; Then we further version the distributed loops to fully disambiguate accesses
+; within each.
+;
+; So as an example, we add noalias between C and A during the versioning
+; within loop distribution and then add noalias between C and D during the
+; second explicit versioning step:
+;
+; for (i = 0; i < n; i++) {
+; A[i + 1] = A[i] * B[i];
+; -------------------------------
+; C[i] = D[i] * E[i];
+; }
+
+; To see it easier what's going on, I expanded every noalias/scope metadata
+; reference below in a comment. For a scope I use the format scope(domain),
+; e.g. scope 17 in domain 15 is written as 17(15).
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+@B = common global i32* null, align 8
+@A = common global i32* null, align 8
+@C = common global i32* null, align 8
+@D = common global i32* null, align 8
+@E = common global i32* null, align 8
+
+define void @f() {
+entry:
+ %a = load i32*, i32** @A, align 8
+ %b = load i32*, i32** @B, align 8
+ %c = load i32*, i32** @C, align 8
+ %d = load i32*, i32** @D, align 8
+ %e = load i32*, i32** @E, align 8
+ br label %for.body
+
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+
+; CHECK: %loadA.ldist1 = {{.*}} !noalias !25
+; A noalias C: !25 -> { 17(15), 18(15), 19(15), 26(24) }
+; ^^^^^^
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulA = mul i32 %loadB, %loadA
+
+ %add = add nuw nsw i64 %ind, 1
+ %arrayidxA_plus_4 = getelementptr inbounds i32, i32* %a, i64 %add
+ store i32 %mulA, i32* %arrayidxA_plus_4, align 4
+
+; CHECK: for.body:
+
+ %arrayidxD = getelementptr inbounds i32, i32* %d, i64 %ind
+
+; CHECK: %loadD = {{.*}} !alias.scope !31
+; D's scope: !31 -> { 18(15), 32(33) }
+; ^^^^^^
+ %loadD = load i32, i32* %arrayidxD, align 4
+
+ %arrayidxE = getelementptr inbounds i32, i32* %e, i64 %ind
+
+; CHECK: %loadE = {{.*}} !alias.scope !34
+; E's scope: !34 -> { 19(15), 35(33) }
+; ^^^^^^
+ %loadE = load i32, i32* %arrayidxE, align 4
+
+ %mulC = mul i32 %loadD, %loadE
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+
+; CHECK: store i32 %mulC, {{.*}} !alias.scope !36, !noalias !38
+; C's scope: !36 -> { 17(15), 37(33) }
+; ^^^^^^
+; C noalias D and E: !38 -> { 21(15), 32(33), 35(33) }
+; ^^^^^^ ^^^^^^
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+
+; Domain for the second loop versioning for the top loop after
+; distribution.
+; CHECK: !15 = distinct !{!15, !"LVerDomain"}
+; CHECK: !17 = distinct !{!17, !15}
+; CHECK: !25 = !{!17, !18, !19, !26}
+; CHECK: !31 = !{!18, !32}
+; CHECK: !32 = distinct !{!32, !33}
+; Domain for the second loop versioning for the bottom loop after
+; distribution.
+; CHECK: !33 = distinct !{!33, !"LVerDomain"}
+; CHECK: !34 = !{!19, !35}
+; CHECK: !35 = distinct !{!35, !33}
+; CHECK: !36 = !{!17, !37}
+; CHECK: !38 = !{!21, !32, !35}
diff --git a/test/Transforms/LoopVersioning/noalias.ll b/test/Transforms/LoopVersioning/noalias.ll
new file mode 100644
index 000000000000..c2539726db4d
--- /dev/null
+++ b/test/Transforms/LoopVersioning/noalias.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -loop-versioning -S < %s | FileCheck %s
+
+; A very simple case. After versioning the %loadA and %loadB can't alias with
+; the store.
+;
+; To see it easier what's going on, I expanded every noalias/scope metadata
+; reference below in a comment. For a scope I use the format scope(domain),
+; e.g. scope 17 in domain 15 is written as 17(15).
+
+; CHECK-LABEL: @f(
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+
+define void @f(i32* %a, i32* %b, i32* %c) {
+entry:
+ br label %for.body
+
+; CHECK: for.body.lver.orig:
+; CHECK: for.body:
+for.body: ; preds = %for.body, %entry
+ %ind = phi i64 [ 0, %entry ], [ %add, %for.body ]
+
+ %arrayidxA = getelementptr inbounds i32, i32* %a, i64 %ind
+; CHECK: %loadA = {{.*}} !alias.scope !0
+; A's scope: !0 -> { 1(2) }
+ %loadA = load i32, i32* %arrayidxA, align 4
+
+ %arrayidxB = getelementptr inbounds i32, i32* %b, i64 %ind
+; CHECK: %loadB = {{.*}} !alias.scope !3
+; B's scope: !3 -> { 4(2) }
+ %loadB = load i32, i32* %arrayidxB, align 4
+
+ %mulC = mul i32 %loadA, %loadB
+
+ %arrayidxC = getelementptr inbounds i32, i32* %c, i64 %ind
+; CHECK: store {{.*}} !alias.scope !5, !noalias !7
+; C noalias A and B: !7 -> { 1(2), 4(2) }
+ store i32 %mulC, i32* %arrayidxC, align 4
+
+ %add = add nuw nsw i64 %ind, 1
+ %exitcond = icmp eq i64 %add, 20
+ br i1 %exitcond, label %for.end, label %for.body
+
+for.end: ; preds = %for.body
+ ret void
+}
+; CHECK: !0 = !{!1}
+; CHECK: !1 = distinct !{!1, !2}
+; CHECK: !2 = distinct !{!2, !"LVerDomain"}
+; CHECK: !3 = !{!4}
+; CHECK: !4 = distinct !{!4, !2}
+; CHECK: !5 = !{!6}
+; CHECK: !6 = distinct !{!6, !2}
+; CHECK: !7 = !{!1, !4}
diff --git a/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll b/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
new file mode 100644
index 000000000000..9eacbde7710a
--- /dev/null
+++ b/test/Transforms/LoopVersioningLICM/loopversioningLICM1.ll
@@ -0,0 +1,67 @@
+; RUN: opt < %s -O1 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; Test to confirm loop is a candidate for LoopVersioningLICM.
+; It also confirms invariant moved out of loop.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT: Loop Versioning found to be beneficial
+;
+; CHECK: for.body3:
+; CHECK-NEXT: %add86 = phi i32 [ %arrayidx7.promoted, %for.body3.ph ], [ %add8, %for.body3 ]
+; CHECK-NEXT: %j.113 = phi i32 [ %j.016, %for.body3.ph ], [ %inc, %for.body3 ]
+; CHECK-NEXT: %idxprom = zext i32 %j.113 to i64
+; CHECK-NEXT: %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+; CHECK-NEXT: store i32 %add, i32* %arrayidx, align 4, !alias.scope !6, !noalias !6
+; CHECK-NEXT: %add8 = add nsw i32 %add86, %add
+; CHECK-NEXT: %inc = add nuw i32 %j.113, 1
+; CHECK-NEXT: %cmp2 = icmp ult i32 %inc, %itr
+; CHECK-NEXT: br i1 %cmp2, label %for.body3, label %for.inc11.loopexit.loopexit5, !llvm.loop !7
+define i32 @foo(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+ %cmp14 = icmp eq i32 %itr, 0
+ br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11
+ %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %cmp212 = icmp ult i32 %j.016, %itr
+ br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %add = add i32 %i.015, %itr
+ %idxprom6 = zext i32 %i.015 to i64
+ %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+ br label %for.body3
+
+for.body3: ; preds = %for.body3.lr.ph, %for.body3
+ %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+ %idxprom = zext i32 %j.113 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+ store i32 %add, i32* %arrayidx, align 4
+ %0 = load i32, i32* %arrayidx7, align 4
+ %add8 = add nsw i32 %0, %add
+ store i32 %add8, i32* %arrayidx7, align 4
+ %inc = add nuw i32 %j.113, 1
+ %cmp2 = icmp ult i32 %inc, %itr
+ br i1 %cmp2, label %for.body3, label %for.inc11.loopexit
+
+for.inc11.loopexit: ; preds = %for.body3
+ br label %for.inc11
+
+for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+ %inc12 = add nuw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc12, %itr
+ br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit: ; preds = %for.inc11
+ br label %for.end13
+
+for.end13: ; preds = %for.end13.loopexit, %entry
+ ret i32 0
+}
+
diff --git a/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll b/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll
new file mode 100644
index 000000000000..a48ee890ac50
--- /dev/null
+++ b/test/Transforms/LoopVersioningLICM/loopversioningLICM2.ll
@@ -0,0 +1,52 @@
+; RUN: opt < %s -O1 -S -loop-versioning-licm -licm -debug-only=loop-versioning-licm -disable-loop-unrolling 2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; Test to confirm loop is a good candidate for LoopVersioningLICM
+; It also confirms invariant moved out of loop.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3.us<header><latch><exiting>
+; CHECK-NEXT: Loop Versioning found to be beneficial
+;
+; CHECK: for.cond1.for.inc17_crit_edge.us.loopexit5: ; preds = %for.body3.us
+; CHECK-NEXT: %add14.us.lcssa = phi float [ %add14.us, %for.body3.us ]
+; CHECK-NEXT: store float %add14.us.lcssa, float* %arrayidx.us, align 4, !alias.scope !7, !noalias !8
+; CHECK-NEXT: br label %for.cond1.for.inc17_crit_edge.us
+;
+define i32 @foo(float* nocapture %var2, float** nocapture readonly %var3, i32 %itr) #0 {
+entry:
+ %cmp38 = icmp sgt i32 %itr, 1
+ br i1 %cmp38, label %for.body3.lr.ph.us, label %for.end19
+
+for.body3.us: ; preds = %for.body3.us, %for.body3.lr.ph.us
+ %0 = phi float [ %.pre, %for.body3.lr.ph.us ], [ %add14.us, %for.body3.us ]
+ %indvars.iv = phi i64 [ 1, %for.body3.lr.ph.us ], [ %indvars.iv.next, %for.body3.us ]
+ %1 = trunc i64 %indvars.iv to i32
+ %conv.us = sitofp i32 %1 to float
+ %add.us = fadd float %conv.us, %0
+ %arrayidx7.us = getelementptr inbounds float, float* %3, i64 %indvars.iv
+ store float %add.us, float* %arrayidx7.us, align 4
+ %2 = load float, float* %arrayidx.us, align 4
+ %add14.us = fadd float %2, %add.us
+ store float %add14.us, float* %arrayidx.us, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %itr
+ br i1 %exitcond, label %for.cond1.for.inc17_crit_edge.us, label %for.body3.us
+
+for.body3.lr.ph.us: ; preds = %entry, %for.cond1.for.inc17_crit_edge.us
+ %indvars.iv40 = phi i64 [ %indvars.iv.next41, %for.cond1.for.inc17_crit_edge.us ], [ 1, %entry ]
+ %arrayidx.us = getelementptr inbounds float, float* %var2, i64 %indvars.iv40
+ %arrayidx6.us = getelementptr inbounds float*, float** %var3, i64 %indvars.iv40
+ %3 = load float*, float** %arrayidx6.us, align 8
+ %.pre = load float, float* %arrayidx.us, align 4
+ br label %for.body3.us
+
+for.cond1.for.inc17_crit_edge.us: ; preds = %for.body3.us
+ %indvars.iv.next41 = add nuw nsw i64 %indvars.iv40, 1
+ %lftr.wideiv42 = trunc i64 %indvars.iv.next41 to i32
+ %exitcond43 = icmp eq i32 %lftr.wideiv42, %itr
+ br i1 %exitcond43, label %for.end19, label %for.body3.lr.ph.us
+
+for.end19: ; preds = %for.cond1.for.inc17_crit_edge.us, %entry
+ ret i32 0
+}
diff --git a/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll b/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll
new file mode 100644
index 000000000000..8e39fa6ed0c1
--- /dev/null
+++ b/test/Transforms/LoopVersioningLICM/loopversioningLICM3.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -O1 -S -loop-versioning-licm -debug-only=loop-versioning-licm 2>&1 | FileCheck %s
+; REQUIRES: asserts
+;
+; Test to confirm loop is not a candidate for LoopVersioningLICM.
+;
+; CHECK: Loop: Loop at depth 2 containing: %for.body3<header><latch><exiting>
+; CHECK-NEXT: LAA: Runtime check not found !!
+; CHECK-NEXT: Loop instructions not suitable for LoopVersioningLICM
+
+define i32 @foo(i32* nocapture %var1, i32 %itr) #0 {
+entry:
+ %cmp18 = icmp eq i32 %itr, 0
+ br i1 %cmp18, label %for.end8, label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %entry, %for.inc6
+ %j.020 = phi i32 [ %j.1.lcssa, %for.inc6 ], [ 0, %entry ]
+ %i.019 = phi i32 [ %inc7, %for.inc6 ], [ 0, %entry ]
+ %cmp216 = icmp ult i32 %j.020, %itr
+ br i1 %cmp216, label %for.body3.lr.ph, label %for.inc6
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %0 = zext i32 %j.020 to i64
+ br label %for.body3
+
+for.body3: ; preds = %for.body3, %for.body3.lr.ph
+ %indvars.iv = phi i64 [ %0, %for.body3.lr.ph ], [ %indvars.iv.next, %for.body3 ]
+ %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx, align 4
+ %add = add nsw i32 %1, %itr
+ store i32 %add, i32* %arrayidx, align 4
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %lftr.wideiv = trunc i64 %indvars.iv.next to i32
+ %exitcond = icmp eq i32 %lftr.wideiv, %itr
+ br i1 %exitcond, label %for.inc6, label %for.body3
+
+for.inc6: ; preds = %for.body3, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.020, %for.cond1.preheader ], [ %itr, %for.body3 ]
+ %inc7 = add nuw i32 %i.019, 1
+ %exitcond21 = icmp eq i32 %inc7, %itr
+ br i1 %exitcond21, label %for.end8, label %for.cond1.preheader
+
+for.end8: ; preds = %for.inc6, %entry
+ ret i32 0
+}
+
diff --git a/test/Transforms/LoopVersioningLICM/metadata.ll b/test/Transforms/LoopVersioningLICM/metadata.ll
new file mode 100644
index 000000000000..5a592f6b7af1
--- /dev/null
+++ b/test/Transforms/LoopVersioningLICM/metadata.ll
@@ -0,0 +1,104 @@
+; RUN: opt < %s -O1 -S -loop-versioning-licm -licm 2>&1 | FileCheck %s
+
+; CHECK-LABEL: @without_metadata(
+define i32 @without_metadata(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+ %cmp14 = icmp eq i32 %itr, 0
+ br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11
+ %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %cmp212 = icmp ult i32 %j.016, %itr
+ br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %add = add i32 %i.015, %itr
+ %idxprom6 = zext i32 %i.015 to i64
+ %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+ br label %for.body3
+
+for.body3: ; preds = %for.body3.lr.ph, %for.body3
+ %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+ %idxprom = zext i32 %j.113 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+; CHECK: store i32 %add, i32* %arrayidx, align 4, !alias.scope {{.*}}, !noalias {{.*}}
+ store i32 %add, i32* %arrayidx, align 4
+ %0 = load i32, i32* %arrayidx7, align 4
+ %add8 = add nsw i32 %0, %add
+ store i32 %add8, i32* %arrayidx7, align 4
+ %inc = add nuw i32 %j.113, 1
+ %cmp2 = icmp ult i32 %inc, %itr
+ br i1 %cmp2, label %for.body3, label %for.inc11.loopexit
+
+for.inc11.loopexit: ; preds = %for.body3
+ br label %for.inc11
+
+for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+ %inc12 = add nuw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc12, %itr
+ br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit: ; preds = %for.inc11
+ br label %for.end13
+
+for.end13: ; preds = %for.end13.loopexit, %entry
+ ret i32 0
+}
+
+; CHECK-LABEL: @with_metadata(
+define i32 @with_metadata(i32* nocapture %var1, i32* nocapture readnone %var2, i32* nocapture %var3, i32 %itr) #0 {
+entry:
+ %cmp14 = icmp eq i32 %itr, 0
+ br i1 %cmp14, label %for.end13, label %for.cond1.preheader.preheader
+
+for.cond1.preheader.preheader: ; preds = %entry
+ br label %for.cond1.preheader
+
+for.cond1.preheader: ; preds = %for.cond1.preheader.preheader, %for.inc11
+ %j.016 = phi i32 [ %j.1.lcssa, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %i.015 = phi i32 [ %inc12, %for.inc11 ], [ 0, %for.cond1.preheader.preheader ]
+ %cmp212 = icmp ult i32 %j.016, %itr
+ br i1 %cmp212, label %for.body3.lr.ph, label %for.inc11
+
+for.body3.lr.ph: ; preds = %for.cond1.preheader
+ %add = add i32 %i.015, %itr
+ %idxprom6 = zext i32 %i.015 to i64
+ %arrayidx7 = getelementptr inbounds i32, i32* %var3, i64 %idxprom6
+ br label %for.body3
+
+for.body3: ; preds = %for.body3.lr.ph, %for.body3
+ %j.113 = phi i32 [ %j.016, %for.body3.lr.ph ], [ %inc, %for.body3 ]
+ %idxprom = zext i32 %j.113 to i64
+ %arrayidx = getelementptr inbounds i32, i32* %var1, i64 %idxprom
+; CHECK-NOT: store i32 %add, i32* %arrayidx, align 4, !alias.scope {{.*}}, !noalias {{.*}}
+ store i32 %add, i32* %arrayidx, align 4
+ %0 = load i32, i32* %arrayidx7, align 4
+ %add8 = add nsw i32 %0, %add
+ store i32 %add8, i32* %arrayidx7, align 4
+ %inc = add nuw i32 %j.113, 1
+ %cmp2 = icmp ult i32 %inc, %itr
+ br i1 %cmp2, label %for.body3, label %for.inc11.loopexit, !llvm.loop !0
+
+for.inc11.loopexit: ; preds = %for.body3
+ br label %for.inc11
+
+for.inc11: ; preds = %for.inc11.loopexit, %for.cond1.preheader
+ %j.1.lcssa = phi i32 [ %j.016, %for.cond1.preheader ], [ %itr, %for.inc11.loopexit ]
+ %inc12 = add nuw i32 %i.015, 1
+ %cmp = icmp ult i32 %inc12, %itr
+ br i1 %cmp, label %for.cond1.preheader, label %for.end13.loopexit
+
+for.end13.loopexit: ; preds = %for.inc11
+ br label %for.end13
+
+for.end13: ; preds = %for.end13.loopexit, %entry
+ ret i32 0
+}
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.licm_versioning.disable"}
diff --git a/test/Transforms/LowerAtomic/atomic-load.ll b/test/Transforms/LowerAtomic/atomic-load.ll
index 1279bf72201c..e73417f3d407 100644
--- a/test/Transforms/LowerAtomic/atomic-load.ll
+++ b/test/Transforms/LowerAtomic/atomic-load.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -loweratomic -S | FileCheck %s
+; RUN: opt < %s -passes=loweratomic -S | FileCheck %s
define i8 @add() {
; CHECK-LABEL: @add(
diff --git a/test/Transforms/LowerBitSets/constant.ll b/test/Transforms/LowerBitSets/constant.ll
deleted file mode 100644
index 99c925914f72..000000000000
--- a/test/Transforms/LowerBitSets/constant.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
-
-target datalayout = "e-p:32:32"
-
-@a = constant i32 1
-@b = constant [2 x i32] [i32 2, i32 3]
-
-!0 = !{!"bitset1", i32* @a, i32 0}
-!1 = !{!"bitset1", [2 x i32]* @b, i32 4}
-
-!llvm.bitsets = !{ !0, !1 }
-
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
-
-; CHECK: @foo(
-define i1 @foo() {
- ; CHECK: ret i1 true
- %x = call i1 @llvm.bitset.test(i8* bitcast (i32* @a to i8*), metadata !"bitset1")
- ret i1 %x
-}
-
-; CHECK: @bar(
-define i1 @bar() {
- ; CHECK: ret i1 true
- %x = call i1 @llvm.bitset.test(i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @b, i32 0, i32 1) to i8*), metadata !"bitset1")
- ret i1 %x
-}
-
-; CHECK: @baz(
-define i1 @baz() {
- ; CHECK-NOT: ret i1 true
- %x = call i1 @llvm.bitset.test(i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @b, i32 0, i32 0) to i8*), metadata !"bitset1")
- ret i1 %x
-}
diff --git a/test/Transforms/LowerBitSets/layout.ll b/test/Transforms/LowerBitSets/layout.ll
deleted file mode 100644
index a0c6e77a57fe..000000000000
--- a/test/Transforms/LowerBitSets/layout.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
-
-target datalayout = "e-p:32:32"
-
-; Tests that this set of globals is laid out according to our layout algorithm
-; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerBitSets.h).
-; The chosen layout in this case is a, e, b, d, c.
-
-; CHECK: private constant { i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 5, [0 x i8] zeroinitializer, i32 2, [0 x i8] zeroinitializer, i32 4, [0 x i8] zeroinitializer, i32 3 }
-@a = constant i32 1
-@b = constant i32 2
-@c = constant i32 3
-@d = constant i32 4
-@e = constant i32 5
-
-!0 = !{!"bitset1", i32* @a, i32 0}
-!1 = !{!"bitset1", i32* @b, i32 0}
-!2 = !{!"bitset1", i32* @c, i32 0}
-
-!3 = !{!"bitset2", i32* @b, i32 0}
-!4 = !{!"bitset2", i32* @d, i32 0}
-
-!5 = !{!"bitset3", i32* @a, i32 0}
-!6 = !{!"bitset3", i32* @e, i32 0}
-
-!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6 }
-
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
-
-define void @foo() {
- %x = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset1")
- %y = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset2")
- %z = call i1 @llvm.bitset.test(i8* undef, metadata !"bitset3")
- ret void
-}
diff --git a/test/Transforms/LowerBitSets/nonglobal.ll b/test/Transforms/LowerBitSets/nonglobal.ll
deleted file mode 100644
index 7591e31e3524..000000000000
--- a/test/Transforms/LowerBitSets/nonglobal.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
-
-target datalayout = "e-p:32:32"
-
-; CHECK-NOT: @b = alias
-@a = constant i32 1
-@b = constant [2 x i32] [i32 2, i32 3]
-
-!0 = !{!"bitset1", i32* @a, i32 0}
-!1 = !{!"bitset1", i32* bitcast ([2 x i32]* @b to i32*), i32 0}
-
-!llvm.bitsets = !{ !0, !1 }
-
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
-
-define i1 @foo(i8* %p) {
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
- ret i1 %x
-}
diff --git a/test/Transforms/LowerBitSets/pr25902.ll b/test/Transforms/LowerBitSets/pr25902.ll
deleted file mode 100644
index b9a1203ec0e1..000000000000
--- a/test/Transforms/LowerBitSets/pr25902.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; PR25902: gold plugin crash.
-; RUN: opt -mtriple=i686-pc -S -lowerbitsets < %s
-
-define void @f(void ()* %p) {
-entry:
- %a = bitcast void ()* %p to i8*, !nosanitize !1
- %b = call i1 @llvm.bitset.test(i8* %a, metadata !"_ZTSFvvE"), !nosanitize !1
- ret void
-}
-
-define void @g() {
-entry:
- ret void
-}
-
-declare i1 @llvm.bitset.test(i8*, metadata)
-
-!llvm.bitsets = !{!0}
-
-!0 = !{!"_ZTSFvvE", void ()* @g, i64 0}
-!1 = !{}
diff --git a/test/Transforms/LowerBitSets/unnamed.ll b/test/Transforms/LowerBitSets/unnamed.ll
deleted file mode 100644
index 6f108e22d02d..000000000000
--- a/test/Transforms/LowerBitSets/unnamed.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
-
-target datalayout = "e-p:32:32"
-
-; CHECK: @{{[0-9]+}} = alias
-; CHECK: @{{[0-9]+}} = alias
-@0 = constant i32 1
-@1 = constant [2 x i32] [i32 2, i32 3]
-
-!0 = !{!"bitset1", i32* @0, i32 0}
-!1 = !{!"bitset1", [2 x i32]* @1, i32 4}
-
-!llvm.bitsets = !{ !0, !1 }
-
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
-
-define i1 @foo(i8* %p) {
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
- ret i1 %x
-}
diff --git a/test/Transforms/LowerExpectIntrinsic/basic.ll b/test/Transforms/LowerExpectIntrinsic/basic.ll
index 69e67cd7c1dd..562db86cefde 100644
--- a/test/Transforms/LowerExpectIntrinsic/basic.ll
+++ b/test/Transforms/LowerExpectIntrinsic/basic.ll
@@ -275,7 +275,7 @@ return: ; preds = %if.end, %if.then
declare i1 @llvm.expect.i1(i1, i1) nounwind readnone
-; CHECK: !0 = !{!"branch_weights", i32 64, i32 4}
-; CHECK: !1 = !{!"branch_weights", i32 4, i32 64}
-; CHECK: !2 = !{!"branch_weights", i32 4, i32 64, i32 4}
-; CHECK: !3 = !{!"branch_weights", i32 64, i32 4, i32 4}
+; CHECK: !0 = !{!"branch_weights", i32 2000, i32 1}
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 2000}
+; CHECK: !2 = !{!"branch_weights", i32 1, i32 2000, i32 1}
+; CHECK: !3 = !{!"branch_weights", i32 2000, i32 1, i32 1}
diff --git a/test/Transforms/LowerGuardIntrinsic/basic.ll b/test/Transforms/LowerGuardIntrinsic/basic.ll
new file mode 100644
index 000000000000..7c3584a774e1
--- /dev/null
+++ b/test/Transforms/LowerGuardIntrinsic/basic.ll
@@ -0,0 +1,76 @@
+; RUN: opt -S -lower-guard-intrinsic < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i8 @f_basic(i1* %c_ptr) {
+; CHECK-LABEL: @f_basic(
+
+ %c = load volatile i1, i1* %c_ptr
+ call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1) [ "deopt"(i32 1) ]
+ ret i8 5
+
+; CHECK: br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT: %deoptcall = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ]
+; CHECK-NEXT: ret i8 %deoptcall
+; CHECK: guarded:
+; CHECK-NEXT: ret i8 5
+}
+
+define void @f_void_return_ty(i1* %c_ptr) {
+; CHECK-LABEL: @f_void_return_ty(
+
+ %c = load volatile i1, i1* %c_ptr
+ call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1) [ "deopt"() ]
+ ret void
+
+; CHECK: br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1) [ "deopt"() ]
+; CHECK-NEXT: ret void
+; CHECK: guarded:
+; CHECK-NEXT: ret void
+}
+
+define void @f_multiple_args(i1* %c_ptr) {
+; CHECK-LABEL: @f_multiple_args(
+
+ %c = load volatile i1, i1* %c_ptr
+ call void(i1, ...) @llvm.experimental.guard(i1 %c, i32 1, i32 2, double 500.0) [ "deopt"(i32 2, i32 3) ]
+ ret void
+
+; CHECK: br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT: call void (...) @llvm.experimental.deoptimize.isVoid(i32 1, i32 2, double 5.000000e+02) [ "deopt"(i32 2, i32 3) ]
+; CHECK-NEXT: ret void
+; CHECK: guarded:
+; CHECK-NEXT: ret void
+}
+
+define i32 @f_zero_args(i1* %c_ptr) {
+; CHECK-LABEL: @f_zero_args(
+ %c = load volatile i1, i1* %c_ptr
+ call void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"(i32 2, i32 3) ]
+ ret i32 500
+
+; CHECK: br i1 %c, label %guarded, label %deopt, !prof !0
+; CHECK: deopt:
+; CHECK-NEXT: %deoptcall = call i32 (...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 2, i32 3) ]
+; CHECK-NEXT: ret i32 %deoptcall
+; CHECK: guarded:
+; CHECK-NEXT: ret i32 500
+}
+
+define i8 @f_with_make_implicit_md(i32* %ptr) {
+; CHECK-LABEL: @f_with_make_implicit_md(
+; CHECK: br i1 %notNull, label %guarded, label %deopt, !prof !0, !make.implicit !1
+; CHECK: deopt:
+; CHECK-NEXT: %deoptcall = call i8 (...) @llvm.experimental.deoptimize.i8(i32 1) [ "deopt"(i32 1) ]
+; CHECK-NEXT: ret i8 %deoptcall
+
+ %notNull = icmp ne i32* %ptr, null
+ call void(i1, ...) @llvm.experimental.guard(i1 %notNull, i32 1) [ "deopt"(i32 1) ], !make.implicit !{}
+ ret i8 5
+}
+
+!0 = !{!"branch_weights", i32 1048576, i32 1}
diff --git a/test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll b/test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll
new file mode 100644
index 000000000000..9dce54ce6044
--- /dev/null
+++ b/test/Transforms/LowerGuardIntrinsic/with-calling-conv.ll
@@ -0,0 +1,15 @@
+; RUN: opt -S -lower-guard-intrinsic < %s | FileCheck %s
+
+declare cc99 void @llvm.experimental.guard(i1, ...)
+
+define i8 @f_basic(i1* %c_ptr) {
+; CHECK-LABEL: @f_basic(
+; CHECK: br i1 %c, label %guarded, label %deopt
+; CHECK: deopt:
+; CHECK-NEXT: %deoptcall = call cc99 i8 (...) @llvm.experimental.deoptimize.i8() [ "deopt"() ]
+; CHECK-NEXT: ret i8 %deoptcall
+
+ %c = load volatile i1, i1* %c_ptr
+ call cc99 void(i1, ...) @llvm.experimental.guard(i1 %c) [ "deopt"() ]
+ ret i8 6
+}
diff --git a/test/Transforms/LowerTypeTests/constant.ll b/test/Transforms/LowerTypeTests/constant.ll
new file mode 100644
index 000000000000..65b21184d221
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/constant.ll
@@ -0,0 +1,33 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+; RUN: opt -S -passes=lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+@a = constant i32 1, !type !0
+@b = constant [2 x i32] [i32 2, i32 3], !type !1
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 4, !"typeid1"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+; CHECK: @foo(
+define i1 @foo() {
+ ; CHECK: ret i1 true
+ %x = call i1 @llvm.type.test(i8* bitcast (i32* @a to i8*), metadata !"typeid1")
+ ret i1 %x
+}
+
+; CHECK: @bar(
+define i1 @bar() {
+ ; CHECK: ret i1 true
+ %x = call i1 @llvm.type.test(i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @b, i32 0, i32 1) to i8*), metadata !"typeid1")
+ ret i1 %x
+}
+
+; CHECK: @baz(
+define i1 @baz() {
+ ; CHECK-NOT: ret i1 true
+ %x = call i1 @llvm.type.test(i8* bitcast (i32* getelementptr ([2 x i32], [2 x i32]* @b, i32 0, i32 0) to i8*), metadata !"typeid1")
+ ret i1 %x
+}
diff --git a/test/Transforms/LowerBitSets/function-ext.ll b/test/Transforms/LowerTypeTests/function-ext.ll
index 2a83bef2f074..45dcc5e6de35 100644
--- a/test/Transforms/LowerBitSets/function-ext.ll
+++ b/test/Transforms/LowerTypeTests/function-ext.ll
@@ -1,22 +1,20 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
; Tests that we correctly handle external references, including the case where
; all functions in a bitset are external references.
target triple = "x86_64-unknown-linux-gnu"
-declare void @foo()
+declare !type !0 void @foo()
; CHECK: @[[JT:.*]] = private constant [1 x <{ i8, i32, i8, i8, i8 }>] [<{ i8, i32, i8, i8, i8 }> <{ i8 -23, i32 trunc (i64 sub (i64 sub (i64 ptrtoint (void ()* @foo to i64), i64 ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)), i64 5) to i32), i8 -52, i8 -52, i8 -52 }>], section ".text"
define i1 @bar(i8* %ptr) {
; CHECK: icmp eq i64 {{.*}}, ptrtoint ([1 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
- %p = call i1 @llvm.bitset.test(i8* %ptr, metadata !"void")
+ %p = call i1 @llvm.type.test(i8* %ptr, metadata !"void")
ret i1 %p
}
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
-!0 = !{!"void", void ()* @foo, i64 0}
-
-!llvm.bitsets = !{!0}
+!0 = !{i64 0, !"void"}
diff --git a/test/Transforms/LowerBitSets/function.ll b/test/Transforms/LowerTypeTests/function.ll
index bf4043d61c41..662d1e2a1978 100644
--- a/test/Transforms/LowerBitSets/function.ll
+++ b/test/Transforms/LowerTypeTests/function.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
; Tests that we correctly create a jump table for bitsets containing 2 or more
; functions.
@@ -11,25 +11,22 @@ target datalayout = "e-p:64:64"
; CHECK: @f = alias void (), bitcast ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to void ()*)
; CHECK: @g = alias void (), bitcast (<{ i8, i32, i8, i8, i8 }>* getelementptr inbounds ([2 x <{ i8, i32, i8, i8, i8 }>], [2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]], i64 0, i64 1) to void ()*)
-; CHECK: define private void @[[FNAME]]() {
-define void @f() {
+; CHECK: define private void @[[FNAME]]()
+define void @f() !type !0 {
ret void
}
-; CHECK: define private void @[[GNAME]]() {
-define void @g() {
+; CHECK: define private void @[[GNAME]]()
+define void @g() !type !0 {
ret void
}
-!0 = !{!"bitset1", void ()* @f, i32 0}
-!1 = !{!"bitset1", void ()* @g, i32 0}
+!0 = !{i32 0, !"typeid1"}
-!llvm.bitsets = !{ !0, !1 }
-
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
define i1 @foo(i8* %p) {
; CHECK: sub i64 {{.*}}, ptrtoint ([2 x <{ i8, i32, i8, i8, i8 }>]* @[[JT]] to i64)
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
+ %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
ret i1 %x
}
diff --git a/test/Transforms/LowerTypeTests/layout.ll b/test/Transforms/LowerTypeTests/layout.ll
new file mode 100644
index 000000000000..7075955790d9
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/layout.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; Tests that this set of globals is laid out according to our layout algorithm
+; (see GlobalLayoutBuilder in include/llvm/Transforms/IPO/LowerTypeTests.h).
+; The chosen layout in this case is a, e, b, d, c.
+
+; CHECK: private constant { i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32, [0 x i8], i32 } { i32 1, [0 x i8] zeroinitializer, i32 5, [0 x i8] zeroinitializer, i32 2, [0 x i8] zeroinitializer, i32 4, [0 x i8] zeroinitializer, i32 3 }
+@a = constant i32 1, !type !0, !type !2
+@b = constant i32 2, !type !0, !type !1
+@c = constant i32 3, !type !0
+@d = constant i32 4, !type !1
+@e = constant i32 5, !type !2
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
+!2 = !{i32 0, !"typeid3"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define void @foo() {
+ %x = call i1 @llvm.type.test(i8* undef, metadata !"typeid1")
+ %y = call i1 @llvm.type.test(i8* undef, metadata !"typeid2")
+ %z = call i1 @llvm.type.test(i8* undef, metadata !"typeid3")
+ ret void
+}
diff --git a/test/Transforms/LowerBitSets/nonstring.ll b/test/Transforms/LowerTypeTests/nonstring.ll
index e61c9123e086..306dd1f3db10 100644
--- a/test/Transforms/LowerBitSets/nonstring.ll
+++ b/test/Transforms/LowerTypeTests/nonstring.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
; Tests that non-string metadata nodes may be used as bitset identifiers.
@@ -7,28 +7,26 @@ target datalayout = "e-p:32:32"
; CHECK: @[[ANAME:.*]] = private constant { i32 }
; CHECK: @[[BNAME:.*]] = private constant { [2 x i32] }
-@a = constant i32 1
-@b = constant [2 x i32] [i32 2, i32 3]
+@a = constant i32 1, !type !0
+@b = constant [2 x i32] [i32 2, i32 3], !type !1
-!0 = !{!2, i32* @a, i32 0}
-!1 = !{!3, [2 x i32]* @b, i32 0}
+!0 = !{i32 0, !2}
+!1 = !{i32 0, !3}
!2 = distinct !{}
!3 = distinct !{}
-!llvm.bitsets = !{ !0, !1 }
-
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
; CHECK-LABEL: @foo
define i1 @foo(i8* %p) {
; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ i32 }* @[[ANAME]] to i32)
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !2)
+ %x = call i1 @llvm.type.test(i8* %p, metadata !2)
ret i1 %x
}
; CHECK-LABEL: @bar
define i1 @bar(i8* %p) {
; CHECK: icmp eq i32 {{.*}}, ptrtoint ({ [2 x i32] }* @[[BNAME]] to i32)
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !3)
+ %x = call i1 @llvm.type.test(i8* %p, metadata !3)
ret i1 %x
}
diff --git a/test/Transforms/LowerTypeTests/pr25902.ll b/test/Transforms/LowerTypeTests/pr25902.ll
new file mode 100644
index 000000000000..dda283ca6688
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/pr25902.ll
@@ -0,0 +1,19 @@
+; PR25902: gold plugin crash.
+; RUN: opt -mtriple=i686-pc -S -lowertypetests < %s
+
+define void @f(void ()* %p) {
+entry:
+ %a = bitcast void ()* %p to i8*, !nosanitize !1
+ %b = call i1 @llvm.type.test(i8* %a, metadata !"_ZTSFvvE"), !nosanitize !1
+ ret void
+}
+
+define void @g() !type !0 {
+entry:
+ ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+
+!0 = !{i64 0, !"_ZTSFvvE"}
+!1 = !{}
diff --git a/test/Transforms/LowerTypeTests/section.ll b/test/Transforms/LowerTypeTests/section.ll
new file mode 100644
index 000000000000..7884acfaec37
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/section.ll
@@ -0,0 +1,25 @@
+; Test that functions with "section" attribute are accepted, and jumptables are
+; emitted in ".text".
+
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: @[[A:.*]] = private constant {{.*}} section ".text"
+; CHECK: @f = alias void (), bitcast ({{.*}}* @[[A]] to void ()*)
+; CHECK: define private void {{.*}} section "xxx"
+
+define void @f() section "xxx" !type !0 {
+entry:
+ ret void
+}
+
+define i1 @g() {
+entry:
+ %0 = call i1 @llvm.type.test(i8* bitcast (void ()* @f to i8*), metadata !"_ZTSFvE")
+ ret i1 %0
+}
+
+declare i1 @llvm.type.test(i8*, metadata) nounwind readnone
+
+!0 = !{i64 0, !"_ZTSFvE"}
diff --git a/test/Transforms/LowerBitSets/simple.ll b/test/Transforms/LowerTypeTests/simple.ll
index a22d998e2008..0628951868cd 100644
--- a/test/Transforms/LowerBitSets/simple.ll
+++ b/test/Transforms/LowerTypeTests/simple.ll
@@ -1,42 +1,34 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
-; RUN: opt -S -lowerbitsets -mtriple=x86_64-apple-macosx10.8.0 < %s | FileCheck -check-prefix=CHECK-DARWIN %s
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+; RUN: opt -S -lowertypetests -mtriple=x86_64-apple-macosx10.8.0 < %s | FileCheck -check-prefix=CHECK-DARWIN %s
; RUN: opt -S -O3 < %s | FileCheck -check-prefix=CHECK-NODISCARD %s
target datalayout = "e-p:32:32"
; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], [63 x i32], [4 x i8], i32, [0 x i8], [2 x i32] } { i32 1, [0 x i8] zeroinitializer, [63 x i32] zeroinitializer, [4 x i8] zeroinitializer, i32 3, [0 x i8] zeroinitializer, [2 x i32] [i32 4, i32 5] }
-@a = constant i32 1
-@b = hidden constant [63 x i32] zeroinitializer
-@c = protected constant i32 3
-@d = constant [2 x i32] [i32 4, i32 5]
+@a = constant i32 1, !type !0, !type !2
+@b = hidden constant [63 x i32] zeroinitializer, !type !0, !type !1
+@c = protected constant i32 3, !type !1, !type !2
+@d = constant [2 x i32] [i32 4, i32 5], !type !3
+
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
+; CHECK-NODISCARD: !type
; CHECK: [[BA:@[^ ]*]] = private constant [68 x i8] c"\03\01\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\02\00\01"
; Offset 0, 4 byte alignment
-!0 = !{!"bitset1", i32* @a, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset1", i32* @a, i32 0}
-!1 = !{!"bitset1", [63 x i32]* @b, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset1", [63 x i32]* @b, i32 0}
-!2 = !{!"bitset1", [2 x i32]* @d, i32 4}
-; CHECK-NODISCARD-DAG: !{!"bitset1", [2 x i32]* @d, i32 4}
+!0 = !{i32 0, !"typeid1"}
+!3 = !{i32 4, !"typeid1"}
; Offset 4, 256 byte alignment
-!3 = !{!"bitset2", [63 x i32]* @b, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset2", [63 x i32]* @b, i32 0}
-!4 = !{!"bitset2", i32* @c, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset2", i32* @c, i32 0}
-
-; Entries whose second operand is null (the result of a global being DCE'd)
-; should be ignored.
-!5 = !{!"bitset2", null, i32 0}
+!1 = !{i32 0, !"typeid2"}
; Offset 0, 4 byte alignment
-!6 = !{!"bitset3", i32* @a, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @a, i32 0}
-!7 = !{!"bitset3", i32* @c, i32 0}
-; CHECK-NODISCARD-DAG: !{!"bitset3", i32* @c, i32 0}
-
-!llvm.bitsets = !{ !0, !1, !2, !3, !4, !5, !6, !7 }
+!2 = !{i32 0, !"typeid3"}
; CHECK: @bits_use{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
; CHECK: @bits_use.{{[0-9]*}} = private alias i8, i8* @bits{{[0-9]*}}
@@ -64,11 +56,11 @@ target datalayout = "e-p:32:32"
; CHECK: @bits{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
; CHECK: @bits.{{[0-9]*}} = private alias i8, getelementptr inbounds ([68 x i8], [68 x i8]* [[BA]], i32 0, i32 0)
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
; CHECK: @foo(i32* [[A0:%[^ ]*]])
define i1 @foo(i32* %p) {
- ; CHECK-NOT: llvm.bitset.test
+ ; CHECK-NOT: llvm.type.test
; CHECK: [[R0:%[^ ]*]] = bitcast i32* [[A0]] to i8*
%pi8 = bitcast i32* %p to i8*
@@ -86,10 +78,10 @@ define i1 @foo(i32* %p) {
; CHECK: [[R11:%[^ ]*]] = icmp ne i8 [[R10]], 0
; CHECK: [[R16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[R11]], {{%[^ ]*}} ]
- %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")
+ %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid1")
- ; CHECK-NOT: llvm.bitset.test
- %y = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset1")
+ ; CHECK-NOT: llvm.type.test
+ %y = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid1")
; CHECK: ret i1 [[R16]]
ret i1 %x
@@ -105,7 +97,7 @@ define i1 @bar(i32* %p) {
; CHECK: [[S4:%[^ ]*]] = shl i32 [[S2]], 24
; CHECK: [[S5:%[^ ]*]] = or i32 [[S3]], [[S4]]
; CHECK: [[S6:%[^ ]*]] = icmp ult i32 [[S5]], 2
- %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset2")
+ %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid2")
; CHECK: ret i1 [[S6]]
ret i1 %x
@@ -123,15 +115,13 @@ define i1 @baz(i32* %p) {
; CHECK: [[T6:%[^ ]*]] = icmp ult i32 [[T5]], 66
; CHECK: br i1 [[T6]]
- ; CHECK: [[T8:%[^ ]*]] = getelementptr i8, i8* @bits_use.{{[0-9]*}}, i32 [[T5]]
+ ; CHECK: [[T8:%[^ ]*]] = getelementptr i8, i8* @bits_use{{(\.[0-9]*)?}}, i32 [[T5]]
; CHECK: [[T9:%[^ ]*]] = load i8, i8* [[T8]]
; CHECK: [[T10:%[^ ]*]] = and i8 [[T9]], 2
; CHECK: [[T11:%[^ ]*]] = icmp ne i8 [[T10]], 0
; CHECK: [[T16:%[^ ]*]] = phi i1 [ false, {{%[^ ]*}} ], [ [[T11]], {{%[^ ]*}} ]
- %x = call i1 @llvm.bitset.test(i8* %pi8, metadata !"bitset3")
+ %x = call i1 @llvm.type.test(i8* %pi8, metadata !"typeid3")
; CHECK: ret i1 [[T16]]
ret i1 %x
}
-
-; CHECK-NOT: !llvm.bitsets
diff --git a/test/Transforms/LowerBitSets/single-offset.ll b/test/Transforms/LowerTypeTests/single-offset.ll
index 57194f42e096..6dd37984df9c 100644
--- a/test/Transforms/LowerBitSets/single-offset.ll
+++ b/test/Transforms/LowerTypeTests/single-offset.ll
@@ -1,25 +1,22 @@
-; RUN: opt -S -lowerbitsets < %s | FileCheck %s
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
target datalayout = "e-p:32:32"
; CHECK: [[G:@[^ ]*]] = private constant { i32, [0 x i8], i32 }
-@a = constant i32 1
-@b = constant i32 2
+@a = constant i32 1, !type !0, !type !1
+@b = constant i32 2, !type !0, !type !2
-!0 = !{!"bitset1", i32* @a, i32 0}
-!1 = !{!"bitset1", i32* @b, i32 0}
-!2 = !{!"bitset2", i32* @a, i32 0}
-!3 = !{!"bitset3", i32* @b, i32 0}
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
+!2 = !{i32 0, !"typeid3"}
-!llvm.bitsets = !{ !0, !1, !2, !3 }
-
-declare i1 @llvm.bitset.test(i8* %ptr, metadata %bitset) nounwind readnone
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
; CHECK: @foo(i8* [[A0:%[^ ]*]])
define i1 @foo(i8* %p) {
; CHECK: [[R0:%[^ ]*]] = ptrtoint i8* [[A0]] to i32
; CHECK: [[R1:%[^ ]*]] = icmp eq i32 [[R0]], ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32)
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset2")
+ %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid2")
; CHECK: ret i1 [[R1]]
ret i1 %x
}
@@ -28,13 +25,13 @@ define i1 @foo(i8* %p) {
define i1 @bar(i8* %p) {
; CHECK: [[S0:%[^ ]*]] = ptrtoint i8* [[B0]] to i32
; CHECK: [[S1:%[^ ]*]] = icmp eq i32 [[S0]], add (i32 ptrtoint ({ i32, [0 x i8], i32 }* [[G]] to i32), i32 4)
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset3")
+ %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid3")
; CHECK: ret i1 [[S1]]
ret i1 %x
}
; CHECK: @x(
define i1 @x(i8* %p) {
- %x = call i1 @llvm.bitset.test(i8* %p, metadata !"bitset1")
+ %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
ret i1 %x
}
diff --git a/test/Transforms/LowerTypeTests/unnamed.ll b/test/Transforms/LowerTypeTests/unnamed.ll
new file mode 100644
index 000000000000..4bb2fd972081
--- /dev/null
+++ b/test/Transforms/LowerTypeTests/unnamed.ll
@@ -0,0 +1,18 @@
+; RUN: opt -S -lowertypetests < %s | FileCheck %s
+
+target datalayout = "e-p:32:32"
+
+; CHECK: @{{[0-9]+}} = alias
+; CHECK: @{{[0-9]+}} = alias
+@0 = constant i32 1, !type !0
+@1 = constant [2 x i32] [i32 2, i32 3], !type !1
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 4, !"typeid1"}
+
+declare i1 @llvm.type.test(i8* %ptr, metadata %bitset) nounwind readnone
+
+define i1 @foo(i8* %p) {
+ %x = call i1 @llvm.type.test(i8* %p, metadata !"typeid1")
+ ret i1 %x
+}
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
index 6aaf594b3056..4e3c772f6ca9 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo.ll
@@ -36,9 +36,9 @@ declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
!llvm.module.flags = !{!14}
!0 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !1, file: !2, type: !7)
-!1 = distinct !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 2, file: !12, scope: !2, type: !4)
+!1 = distinct !DISubprogram(name: "testfunc", linkageName: "testfunc", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, scopeLine: 2, file: !12, scope: !2, type: !4)
!2 = !DIFile(filename: "testfunc.c", directory: "/tmp")
-!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12, enums: !13, retainedTypes: !13, subprograms: !{!1})
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: FullDebug, file: !12, enums: !13, retainedTypes: !13)
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7, !6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "double", size: 64, align: 64, encoding: DW_ATE_float)
diff --git a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
index 071d708e1fe8..e2dd0e15f817 100644
--- a/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
+++ b/test/Transforms/Mem2Reg/ConvertDebugInfo2.ll
@@ -43,16 +43,16 @@ return: ; preds = %entry
!llvm.dbg.cu = !{!3}
!llvm.module.flags = !{!22}
!0 = !DILocalVariable(name: "a", line: 8, arg: 1, scope: !1, file: !2, type: !6)
-!1 = distinct !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 8, file: !20, scope: !2, type: !4)
+!1 = distinct !DISubprogram(name: "baz", linkageName: "baz", line: 8, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, scopeLine: 8, file: !20, scope: !2, type: !4)
!2 = !DIFile(filename: "bar.c", directory: "/tmp/")
-!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !20, enums: !21, retainedTypes: !21, subprograms: !{!1})
+!3 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: FullDebug, file: !20, enums: !21, retainedTypes: !21)
!4 = !DISubroutineType(types: !5)
!5 = !{null, !6}
!6 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!7 = !DILocation(line: 8, scope: !1)
!8 = !DILocation(line: 9, scope: !1)
!9 = !DILocalVariable(name: "x", line: 4, arg: 1, scope: !10, file: !2, type: !6)
-!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, scopeLine: 4, file: !20, scope: !2, type: !11)
+!10 = distinct !DISubprogram(name: "bar", linkageName: "bar", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !3, scopeLine: 4, file: !20, scope: !2, type: !11)
!11 = !DISubroutineType(types: !12)
!12 = !{null, !6, !13, !14}
!13 = !DIBasicType(tag: DW_TAG_base_type, name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
diff --git a/test/Transforms/Mem2Reg/pr24179.ll b/test/Transforms/Mem2Reg/pr24179.ll
index e4216ce4daa1..72a9e61938d7 100644
--- a/test/Transforms/Mem2Reg/pr24179.ll
+++ b/test/Transforms/Mem2Reg/pr24179.ll
@@ -1,4 +1,5 @@
; RUN: opt -mem2reg < %s -S | FileCheck %s
+; RUN: opt -passes=mem2reg < %s -S | FileCheck %s
declare i32 @def(i32)
declare i1 @use(i32)
diff --git a/test/Transforms/MemCpyOpt/callslot_throw.ll b/test/Transforms/MemCpyOpt/callslot_throw.ll
new file mode 100644
index 000000000000..1aa4c92efc72
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/callslot_throw.ll
@@ -0,0 +1,34 @@
+; RUN: opt -S -memcpyopt < %s | FileCheck %s
+declare void @may_throw(i32* nocapture %x)
+
+; CHECK-LABEL: define void @test1(
+define void @test1(i32* nocapture noalias dereferenceable(4) %x) {
+entry:
+ %t = alloca i32, align 4
+ call void @may_throw(i32* nonnull %t)
+ %load = load i32, i32* %t, align 4
+ store i32 %load, i32* %x, align 4
+; CHECK: %[[t:.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]])
+; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4
+; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4
+ ret void
+}
+
+declare void @always_throws()
+
+; CHECK-LABEL: define void @test2(
+define void @test2(i32* nocapture noalias dereferenceable(4) %x) {
+entry:
+ %t = alloca i32, align 4
+ call void @may_throw(i32* nonnull %t) nounwind
+ %load = load i32, i32* %t, align 4
+ call void @always_throws()
+ store i32 %load, i32* %x, align 4
+; CHECK: %[[t:.*]] = alloca i32, align 4
+; CHECK-NEXT: call void @may_throw(i32* {{.*}} %[[t]])
+; CHECK-NEXT: %[[load:.*]] = load i32, i32* %[[t]], align 4
+; CHECK-NEXT: call void @always_throws()
+; CHECK-NEXT: store i32 %[[load]], i32* %x, align 4
+ ret void
+}
diff --git a/test/Transforms/MemCpyOpt/fca2memcpy.ll b/test/Transforms/MemCpyOpt/fca2memcpy.ll
index c8a126848b06..0215431ac350 100644
--- a/test/Transforms/MemCpyOpt/fca2memcpy.ll
+++ b/test/Transforms/MemCpyOpt/fca2memcpy.ll
@@ -73,16 +73,38 @@ define void @copyalias(%S* %src, %S* %dst) {
ret void
}
-
-; The GEP is present after the aliasing store, preventing to move the memcpy before
-; (without further analysis/transformation)
-define void @copyaliaswithproducerinbetween(%S* %src, %S* %dst) {
-; CHECK-LABEL: copyalias
-; CHECK-NEXT: [[LOAD:%[a-z0-9\.]+]] = load %S, %S* %src
-; CHECK-NOT: call
+; If the store address is computed ina complex manner, make
+; sure we lift the computation as well if needed and possible.
+define void @addrproducer(%S* %src, %S* %dst) {
+; CHECK-LABEL: addrproducer
+; CHECK: %dst2 = getelementptr %S, %S* %dst, i64 1
+; CHECK: call void @llvm.memmove.p0i8.p0i8.i64
+; CHECK-NEXT: store %S undef, %S* %dst
+; CHECK-NEXT: ret void
%1 = load %S, %S* %src
store %S undef, %S* %dst
%dst2 = getelementptr %S , %S* %dst, i64 1
store %S %1, %S* %dst2
ret void
}
+
+define void @aliasaddrproducer(%S* %src, %S* %dst, i32* %dstidptr) {
+; CHECK-LABEL: aliasaddrproducer
+ %1 = load %S, %S* %src
+ store %S undef, %S* %dst
+ %dstindex = load i32, i32* %dstidptr
+ %dst2 = getelementptr %S , %S* %dst, i32 %dstindex
+ store %S %1, %S* %dst2
+ ret void
+}
+
+define void @noaliasaddrproducer(%S* %src, %S* noalias %dst, i32* noalias %dstidptr) {
+; CHECK-LABEL: noaliasaddrproducer
+ %1 = load %S, %S* %src
+ store %S undef, %S* %src
+ %2 = load i32, i32* %dstidptr
+ %dstindex = or i32 %2, 1
+ %dst2 = getelementptr %S , %S* %dst, i32 %dstindex
+ store %S %1, %S* %dst2
+ ret void
+}
diff --git a/test/Transforms/MemCpyOpt/lifetime.ll b/test/Transforms/MemCpyOpt/lifetime.ll
new file mode 100644
index 000000000000..6a7e44692daa
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/lifetime.ll
@@ -0,0 +1,25 @@
+; RUN: opt < %s -O1 -S | FileCheck %s
+
+; performCallSlotOptzn in MemCpy should not exchange the calls to
+; @llvm.lifetime.start and @llvm.memcpy.
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #1
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+define void @_ZN4CordC2EOS_(i8* nocapture dereferenceable(16) %arg1) {
+bb:
+; CHECK-LABEL: @_ZN4CordC2EOS_
+; CHECK-NOT: call void @llvm.lifetime.start
+; CHECK: ret void
+ %tmp = alloca [8 x i8], align 8
+ %tmp5 = bitcast [8 x i8]* %tmp to i8*
+ call void @llvm.lifetime.start(i64 16, i8* %tmp5)
+ %tmp10 = getelementptr inbounds i8, i8* %tmp5, i64 7
+ store i8 0, i8* %tmp10, align 1
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %arg1, i8* %tmp5, i64 16, i32 8, i1 false)
+ call void @llvm.lifetime.end(i64 16, i8* %tmp5)
+ ret void
+}
+
+attributes #1 = { argmemonly nounwind }
diff --git a/test/Transforms/MemCpyOpt/loadstore-sret.ll b/test/Transforms/MemCpyOpt/loadstore-sret.ll
index 55cbe59651f4..4c6136cf6259 100644
--- a/test/Transforms/MemCpyOpt/loadstore-sret.ll
+++ b/test/Transforms/MemCpyOpt/loadstore-sret.ll
@@ -22,4 +22,4 @@ _ZNSt8auto_ptrIiED1Ev.exit:
ret void
}
-declare void @_Z3barv(%"class.std::auto_ptr"* nocapture sret)
+declare void @_Z3barv(%"class.std::auto_ptr"* nocapture sret) nounwind
diff --git a/test/Transforms/MemCpyOpt/profitable-memset.ll b/test/Transforms/MemCpyOpt/profitable-memset.ll
new file mode 100644
index 000000000000..bbef1244ee36
--- /dev/null
+++ b/test/Transforms/MemCpyOpt/profitable-memset.ll
@@ -0,0 +1,20 @@
+; RUN: opt < %s -memcpyopt -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+
+; CHECK-LABEL: @foo(
+; CHECK-NOT: store
+; CHECK: call void @llvm.memset.p0i8.i64(i8* %2, i8 0, i64 8, i32 2, i1 false)
+
+define void @foo(i64* nocapture %P) {
+entry:
+ %0 = bitcast i64* %P to i16*
+ %arrayidx = getelementptr inbounds i16, i16* %0, i64 1
+ %1 = bitcast i16* %arrayidx to i32*
+ %arrayidx1 = getelementptr inbounds i16, i16* %0, i64 3
+ store i16 0, i16* %0, align 2
+ store i32 0, i32* %1, align 4
+ store i16 0, i16* %arrayidx1, align 2
+ ret void
+}
+
diff --git a/test/Transforms/MemCpyOpt/smaller.ll b/test/Transforms/MemCpyOpt/smaller.ll
index 8f6cafa6c012..d0ec56e25ddc 100644
--- a/test/Transforms/MemCpyOpt/smaller.ll
+++ b/test/Transforms/MemCpyOpt/smaller.ll
@@ -1,4 +1,5 @@
; RUN: opt -memcpyopt -S < %s | FileCheck %s
+; RUN: opt -passes=memcpyopt -S < %s | FileCheck %s
; rdar://8875553
; Memcpyopt shouldn't optimize the second memcpy using the first
diff --git a/test/Transforms/MergeFunc/alloca.ll b/test/Transforms/MergeFunc/alloca.ll
index d9f66d0911da..165fc68365b1 100644
--- a/test/Transforms/MergeFunc/alloca.ll
+++ b/test/Transforms/MergeFunc/alloca.ll
@@ -1,14 +1,18 @@
; RUN: opt -mergefunc -S < %s | FileCheck %s
-;; Make sure that two different sized allocas are not treated as equal.
+;; Make sure that two different allocas are not treated as equal.
target datalayout = "e-m:w-p:32:32-i64:64-f80:32-n8:16:32-S32"
%kv1 = type { i32, i32 }
%kv2 = type { i8 }
+%kv3 = type { i64, i64 }
+; Size difference.
-define void @a(i8 *%f) {
+; CHECK-LABEL: define void @size1
+; CHECK-NOT: call void @
+define void @size1(i8 *%f) {
%v = alloca %kv1, align 8
%f_2 = bitcast i8* %f to void (%kv1 *)*
call void %f_2(%kv1 * %v)
@@ -18,11 +22,9 @@ define void @a(i8 *%f) {
ret void
}
-; CHECK-LABEL: define void @b
-; CHECK-NOT: call @a
-; CHECK: ret
-
-define void @b(i8 *%f) {
+; CHECK-LABEL: define void @size2
+; CHECK-NOT: call void @
+define void @size2(i8 *%f) {
%v = alloca %kv2, align 8
%f_2 = bitcast i8* %f to void (%kv2 *)*
call void %f_2(%kv2 * %v)
@@ -31,3 +33,29 @@ define void @b(i8 *%f) {
call void %f_2(%kv2 * %v)
ret void
}
+
+; Alignment difference.
+
+; CHECK-LABEL: define void @align1
+; CHECK-NOT: call void @
+define void @align1(i8 *%f) {
+ %v = alloca %kv3, align 8
+ %f_2 = bitcast i8* %f to void (%kv3 *)*
+ call void %f_2(%kv3 * %v)
+ call void %f_2(%kv3 * %v)
+ call void %f_2(%kv3 * %v)
+ call void %f_2(%kv3 * %v)
+ ret void
+}
+
+; CHECK-LABEL: define void @align2
+; CHECK-NOT: call void @
+define void @align2(i8 *%f) {
+ %v = alloca %kv3, align 16
+ %f_2 = bitcast i8* %f to void (%kv3 *)*
+ call void %f_2(%kv3 * %v)
+ call void %f_2(%kv3 * %v)
+ call void %f_2(%kv3 * %v)
+ call void %f_2(%kv3 * %v)
+ ret void
+}
diff --git a/test/Transforms/MergeFunc/merge-weak-crash.ll b/test/Transforms/MergeFunc/merge-weak-crash.ll
new file mode 100644
index 000000000000..9d2c5caa4e54
--- /dev/null
+++ b/test/Transforms/MergeFunc/merge-weak-crash.ll
@@ -0,0 +1,47 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; CHECK-LABEL: define i32 @func1
+; CHECK: call i32 @func2
+; CHECK: ret
+
+; CHECK-LABEL: define i32 @func2
+; CHECK: call i32 @unknown
+; CHECK: ret
+
+; CHECK-LABEL: define i32 @func4
+; CHECK: call i32 @func2
+; CHECK: ret
+
+; CHECK-LABEL: define weak i32 @func3_weak
+; CHECK: call i32 @func1
+; CHECK: ret
+
+define i32 @func1(i32 %x, i32 %y) {
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = call i32 @func4(i32 %sum, i32 %sum2)
+ ret i32 %sum3
+}
+
+define i32 @func4(i32 %x, i32 %y) {
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = call i32 @unknown(i32 %sum, i32 %sum2)
+ ret i32 %sum3
+}
+
+define weak i32 @func3_weak(i32 %x, i32 %y) {
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = call i32 @func2(i32 %sum, i32 %sum2)
+ ret i32 %sum3
+}
+
+define i32 @func2(i32 %x, i32 %y) {
+ %sum = add i32 %x, %y
+ %sum2 = add i32 %sum, %y
+ %sum3 = call i32 @unknown(i32 %sum, i32 %sum2)
+ ret i32 %sum3
+}
+
+declare i32 @unknown(i32 %x, i32 %y)
diff --git a/test/Transforms/MergeFunc/phi-check-blocks.ll b/test/Transforms/MergeFunc/phi-check-blocks.ll
new file mode 100644
index 000000000000..b2de9a0c0286
--- /dev/null
+++ b/test/Transforms/MergeFunc/phi-check-blocks.ll
@@ -0,0 +1,50 @@
+; RUN: opt -S -mergefunc < %s | FileCheck %s
+
+; Ensure that we do not merge functions that are identical with the
+; exception of the order of the incoming blocks to a phi.
+
+; CHECK-LABEL: define linkonce_odr hidden i1 @first(i2)
+define linkonce_odr hidden i1 @first(i2) {
+entry:
+; CHECK: switch i2
+ switch i2 %0, label %default [
+ i2 0, label %L1
+ i2 1, label %L2
+ i2 -2, label %L3
+ ]
+default:
+ unreachable
+L1:
+ br label %done
+L2:
+ br label %done
+L3:
+ br label %done
+done:
+ %result = phi i1 [ true, %L1 ], [ false, %L2 ], [ false, %L3 ]
+; CHECK: ret i1
+ ret i1 %result
+}
+
+; CHECK-LABEL: define linkonce_odr hidden i1 @second(i2)
+define linkonce_odr hidden i1 @second(i2) {
+entry:
+; CHECK: switch i2
+ switch i2 %0, label %default [
+ i2 0, label %L1
+ i2 1, label %L2
+ i2 -2, label %L3
+ ]
+default:
+ unreachable
+L1:
+ br label %done
+L2:
+ br label %done
+L3:
+ br label %done
+done:
+ %result = phi i1 [ true, %L3 ], [ false, %L2 ], [ false, %L1 ]
+; CHECK: ret i1
+ ret i1 %result
+}
diff --git a/test/Transforms/NameAnonFunctions/rename.ll b/test/Transforms/NameAnonFunctions/rename.ll
new file mode 100644
index 000000000000..851746f7a979
--- /dev/null
+++ b/test/Transforms/NameAnonFunctions/rename.ll
@@ -0,0 +1,27 @@
+; RUN: opt -S -name-anon-functions < %s | FileCheck %s
+
+
+; foo contribute to the unique hash for the module
+define void @foo() {
+ ret void
+}
+
+; bar is internal, and does not contribute to the unique hash for the module
+define internal void @bar() {
+ ret void
+}
+
+; CHECK: define void @anon.acbd18db4cc2f85cedef654fccc4a4d8.0()
+; CHECK: define void @anon.acbd18db4cc2f85cedef654fccc4a4d8.1()
+; CHECK: define void @anon.acbd18db4cc2f85cedef654fccc4a4d8.2()
+
+define void @0() {
+ ret void
+}
+define void @1() {
+ ret void
+}
+define void @2() {
+ ret void
+}
+
diff --git a/test/Transforms/ObjCARC/basic.ll b/test/Transforms/ObjCARC/basic.ll
index fc1d087794d6..c10c3b1381b7 100644
--- a/test/Transforms/ObjCARC/basic.ll
+++ b/test/Transforms/ObjCARC/basic.ll
@@ -4,6 +4,7 @@ target datalayout = "e-p:64:64:64"
declare i8* @objc_retain(i8*)
declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare i8* @objc_unsafeClaimAutoreleasedReturnValue(i8*)
declare void @objc_release(i8*)
declare i8* @objc_autorelease(i8*)
declare i8* @objc_autoreleaseReturnValue(i8*)
@@ -2573,6 +2574,27 @@ return: ; preds = %if.then, %entry
ret i8* %retval
}
+; CHECK-LABEL: define i8* @test65d(
+; CHECK: if.then:
+; CHECK-NOT: @objc_autorelease
+; CHECK: return:
+; CHECK: call i8* @objc_autoreleaseReturnValue(
+; CHECK: }
+define i8* @test65d(i1 %x) {
+entry:
+ br i1 %x, label %return, label %if.then
+
+if.then: ; preds = %entry
+ %c = call i8* @returner()
+ %s = call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %c) nounwind
+ br label %return
+
+return: ; preds = %if.then, %entry
+ %retval = phi i8* [ %s, %if.then ], [ null, %entry ]
+ %q = call i8* @objc_autoreleaseReturnValue(i8* %retval) nounwind
+ ret i8* %retval
+}
+
; An objc_retain can serve as a may-use for a different pointer.
; rdar://11931823
@@ -3015,10 +3037,17 @@ define void @test67(i8* %x) {
}
!llvm.module.flags = !{!1}
+!llvm.dbg.cu = !{!3}
!0 = !{}
!1 = !{i32 1, !"Debug Info Version", i32 3}
-!2 = distinct !DISubprogram()
+!2 = distinct !DISubprogram(unit: !3)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang",
+ file: !4,
+ isOptimized: true, flags: "-O2",
+ splitDebugFilename: "abc.debug", emissionKind: 2)
+!4 = !DIFile(filename: "path/to/file", directory: "/path/to/dir")
+!5 = !{i32 2, !"Debug Info Version", i32 3}
; CHECK: attributes #0 = { nounwind readnone }
; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/comdat-ipo.ll b/test/Transforms/ObjCARC/comdat-ipo.ll
new file mode 100644
index 000000000000..0a5713e9ab6f
--- /dev/null
+++ b/test/Transforms/ObjCARC/comdat-ipo.ll
@@ -0,0 +1,53 @@
+; RUN: opt -S -objc-arc-apelim < %s | FileCheck %s
+
+; See PR26774
+
+@llvm.global_ctors = appending global [2 x { i32, void ()* }] [{ i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_x }, { i32, void ()* } { i32 65535, void ()* @_GLOBAL__I_y }]
+
+@x = global i32 0
+
+declare i32 @bar() nounwind
+
+define linkonce_odr i32 @foo() nounwind {
+entry:
+ ret i32 5
+}
+
+define internal void @__cxx_global_var_init() {
+entry:
+ %call = call i32 @foo()
+ store i32 %call, i32* @x, align 4
+ ret void
+}
+
+define internal void @__dxx_global_var_init() {
+entry:
+ %call = call i32 @bar()
+ store i32 %call, i32* @x, align 4
+ ret void
+}
+
+; CHECK-LABEL: define internal void @_GLOBAL__I_x() {
+define internal void @_GLOBAL__I_x() {
+entry:
+; CHECK: call i8* @objc_autoreleasePoolPush()
+; CHECK-NEXT: call void @__cxx_global_var_init()
+; CHECK-NEXT: call void @objc_autoreleasePoolPop(i8* %0)
+; CHECK-NEXT: ret void
+
+ %0 = call i8* @objc_autoreleasePoolPush() nounwind
+ call void @__cxx_global_var_init()
+ call void @objc_autoreleasePoolPop(i8* %0) nounwind
+ ret void
+}
+
+define internal void @_GLOBAL__I_y() {
+entry:
+ %0 = call i8* @objc_autoreleasePoolPush() nounwind
+ call void @__dxx_global_var_init()
+ call void @objc_autoreleasePoolPop(i8* %0) nounwind
+ ret void
+}
+
+declare i8* @objc_autoreleasePoolPush()
+declare void @objc_autoreleasePoolPop(i8*)
diff --git a/test/Transforms/ObjCARC/contract-marker.ll b/test/Transforms/ObjCARC/contract-marker.ll
index a8282607cb3b..bf70d4e9d044 100644
--- a/test/Transforms/ObjCARC/contract-marker.ll
+++ b/test/Transforms/ObjCARC/contract-marker.ll
@@ -1,9 +1,9 @@
; RUN: opt -S -objc-arc-contract < %s | FileCheck %s
-; CHECK: define void @foo() {
+; CHECK-LABEL: define void @foo() {
; CHECK: %call = tail call i32* @qux()
; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
-; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue", ""()
+; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for return value optimization", ""()
; CHECK-NEXT: %0 = tail call i8* @objc_retainAutoreleasedReturnValue(i8* %tcall) [[NUW:#[0-9]+]]
; CHECK: }
@@ -16,12 +16,30 @@ entry:
ret void
}
+; CHECK-LABEL: define void @foo2() {
+; CHECK: %call = tail call i32* @qux()
+; CHECK-NEXT: %tcall = bitcast i32* %call to i8*
+; CHECK-NEXT: call void asm sideeffect "mov\09r7, r7\09\09@ marker for return value optimization", ""()
+; CHECK-NEXT: %0 = tail call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %tcall) [[NUW:#[0-9]+]]
+; CHECK: }
+
+define void @foo2() {
+entry:
+ %call = tail call i32* @qux()
+ %tcall = bitcast i32* %call to i8*
+ %0 = tail call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %tcall) nounwind
+ tail call void @bar(i8* %0)
+ ret void
+}
+
+
declare i32* @qux()
declare i8* @objc_retainAutoreleasedReturnValue(i8*)
+declare i8* @objc_unsafeClaimAutoreleasedReturnValue(i8*)
declare void @bar(i8*)
!clang.arc.retainAutoreleasedReturnValueMarker = !{!0}
-!0 = !{!"mov\09r7, r7\09\09@ marker for objc_retainAutoreleaseReturnValue"}
+!0 = !{!"mov\09r7, r7\09\09@ marker for return value optimization"}
; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/contract-storestrong.ll b/test/Transforms/ObjCARC/contract-storestrong.ll
index aadc3a26539a..2b83bdb9bfbf 100644
--- a/test/Transforms/ObjCARC/contract-storestrong.ll
+++ b/test/Transforms/ObjCARC/contract-storestrong.ll
@@ -217,6 +217,32 @@ entry:
ret i1 %t
}
+; Make sure that we form the store strong even if there are bitcasts on
+; the pointers.
+; CHECK-LABEL: define void @test12(
+; CHECK: entry:
+; CHECK-NEXT: %p16 = bitcast i8** @x to i16**
+; CHECK-NEXT: %tmp16 = load i16*, i16** %p16, align 8
+; CHECK-NEXT: %tmp8 = bitcast i16* %tmp16 to i8*
+; CHECK-NEXT: %p32 = bitcast i8** @x to i32**
+; CHECK-NEXT: %v32 = bitcast i8* %p to i32*
+; CHECK-NEXT: %0 = bitcast i16** %p16 to i8**
+; CHECK-NEXT: tail call void @objc_storeStrong(i8** %0, i8* %p)
+; CHECK-NEXT: ret void
+; CHECK-NEXT: }
+define void @test12(i8* %p) {
+entry:
+ %retain = tail call i8* @objc_retain(i8* %p) nounwind
+ %p16 = bitcast i8** @x to i16**
+ %tmp16 = load i16*, i16** %p16, align 8
+ %tmp8 = bitcast i16* %tmp16 to i8*
+ %p32 = bitcast i8** @x to i32**
+ %v32 = bitcast i8* %retain to i32*
+ store i32* %v32, i32** %p32, align 8
+ tail call void @objc_release(i8* %tmp8) nounwind
+ ret void
+}
+
!0 = !{}
; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
index ef8d8e52d1cc..c856706d3f03 100644
--- a/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
+++ b/test/Transforms/ObjCARC/ensure-that-exception-unwind-path-is-visited.ll
@@ -113,10 +113,9 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!33, !34, !35, !36, !61}
-!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: 0, file: !60, enums: !1, retainedTypes: !1, subprograms: !3, globals: !1)
+!0 = distinct !DICompileUnit(language: DW_LANG_ObjC, producer: "clang version 3.3 ", isOptimized: true, runtimeVersion: 2, emissionKind: FullDebug, file: !60, enums: !1, retainedTypes: !1, globals: !1)
!1 = !{}
-!3 = !{!5, !27}
-!5 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, scopeLine: 10, file: !60, scope: !6, type: !7, variables: !11)
+!5 = distinct !DISubprogram(name: "main", line: 9, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, unit: !0, scopeLine: 10, file: !60, scope: !6, type: !7, variables: !11)
!6 = !DIFile(filename: "test.m", directory: "/Volumes/Files/gottesmmcab/Radar/12906997")
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
@@ -137,7 +136,7 @@ declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnon
!24 = !DIBasicType(tag: DW_TAG_base_type, name: "signed char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!25 = !DILocalVariable(name: "obj2", line: 15, scope: !26, file: !6, type: !14)
!26 = distinct !DILexicalBlock(line: 14, column: 0, file: !60, scope: !22)
-!27 = distinct !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 5, file: !60, scope: !6, type: !28, variables: !31)
+!27 = distinct !DISubprogram(name: "ThrowFunc", line: 4, isLocal: true, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 5, file: !60, scope: !6, type: !28, variables: !31)
!28 = !DISubroutineType(types: !29)
!29 = !{null, !14}
!31 = !{!32}
diff --git a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
index 1ec61c848101..3073abf7bf5a 100644
--- a/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
+++ b/test/Transforms/ObjCARC/tail-call-invariant-enforcement.ll
@@ -5,6 +5,7 @@ declare i8* @objc_retain(i8* %x)
declare i8* @objc_autorelease(i8* %x)
declare i8* @objc_autoreleaseReturnValue(i8* %x)
declare i8* @objc_retainAutoreleasedReturnValue(i8* %x)
+declare i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %x)
declare i8* @tmp(i8*)
; Never tail call objc_autorelease.
@@ -85,5 +86,19 @@ entry:
ret i8* %tmp0
}
+; Always tail call objc_unsafeClaimAutoreleasedReturnValue.
+; CHECK: define i8* @test6(i8* %x) [[NUW]] {
+; CHECK: %tmp0 = tail call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %y) [[NUW]]
+; CHECK: %tmp1 = tail call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %z) [[NUW]]
+; CHECK: }
+define i8* @test6(i8* %x) nounwind {
+entry:
+ %y = call i8* @tmp(i8* %x)
+ %tmp0 = call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %y)
+ %z = call i8* @tmp(i8* %x)
+ %tmp1 = tail call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %z)
+ ret i8* %x
+}
+
; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ObjCARC/unsafe-claim-rv.ll b/test/Transforms/ObjCARC/unsafe-claim-rv.ll
new file mode 100644
index 000000000000..addd0c8f9736
--- /dev/null
+++ b/test/Transforms/ObjCARC/unsafe-claim-rv.ll
@@ -0,0 +1,47 @@
+; RUN: opt -objc-arc -S < %s | FileCheck %s
+
+; Generated by compiling:
+;
+; id baz(void *X) { return (__bridge_transfer id)X; }
+;
+; void foo(id X) {
+; void *Y = 0;
+; if (X)
+; Y = (__bridge_retained void *)X;
+; baz(Y);
+; }
+;
+; clang -x objective-c -mllvm -enable-objc-arc-opts=0 -fobjc-arc -S -emit-llvm test.m
+;
+; And then hand-reduced further.
+
+declare i8* @objc_autoreleaseReturnValue(i8*)
+declare i8* @objc_unsafeClaimAutoreleasedReturnValue(i8*)
+declare i8* @objc_retain(i8*)
+declare void @objc_release(i8*)
+
+define void @foo(i8* %X) {
+entry:
+ %0 = tail call i8* @objc_retain(i8* %X)
+ %tobool = icmp eq i8* %0, null
+ br i1 %tobool, label %if.end, label %if.then
+
+if.then: ; preds = %entry
+ %1 = tail call i8* @objc_retain(i8* nonnull %0)
+ br label %if.end
+
+if.end: ; preds = %if.then, %entry
+ %Y.0 = phi i8* [ %1, %if.then ], [ null, %entry ]
+ %2 = tail call i8* @objc_autoreleaseReturnValue(i8* %Y.0)
+ %3 = tail call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %2)
+ tail call void @objc_release(i8* %0)
+ ret void
+}
+
+; CHECK: if.then
+; CHECK: tail call i8* @objc_retain
+; CHECK-NEXT: call i8* @objc_autorelease
+; CHECK: %Y.0 = phi
+; CHECK-NEXT: tail call i8* @objc_unsafeClaimAutoreleasedReturnValue(i8* %Y.0)
+; CHECK-NEXT: tail call void @objc_release
+
diff --git a/test/Transforms/PGOProfile/Inputs/PR28219.proftext b/test/Transforms/PGOProfile/Inputs/PR28219.proftext
new file mode 100644
index 000000000000..7ebc13cd1283
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/PR28219.proftext
@@ -0,0 +1,10 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
+@bar
+256
+1
+2
+@foo
+512
+1
+3
diff --git a/test/Transforms/PGOProfile/Inputs/branch1.proftext b/test/Transforms/PGOProfile/Inputs/branch1.proftext
index 3e28112706f1..8ca9db9c0515 100644
--- a/test/Transforms/PGOProfile/Inputs/branch1.proftext
+++ b/test/Transforms/PGOProfile/Inputs/branch1.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
test_br_1
25571299074
2
diff --git a/test/Transforms/PGOProfile/Inputs/branch2.proftext b/test/Transforms/PGOProfile/Inputs/branch2.proftext
index 7d9bd72b29f2..b5fee2b6f4fa 100644
--- a/test/Transforms/PGOProfile/Inputs/branch2.proftext
+++ b/test/Transforms/PGOProfile/Inputs/branch2.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
test_br_2
29667547796
2
diff --git a/test/Transforms/PGOProfile/Inputs/criticaledge.proftext b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
index f369ba7c3504..7613b643b163 100644
--- a/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
+++ b/test/Transforms/PGOProfile/Inputs/criticaledge.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
test_criticalEdge
82323253069
8
diff --git a/test/Transforms/PGOProfile/Inputs/diag.proftext b/test/Transforms/PGOProfile/Inputs/diag.proftext
index aaa137e3a420..a38d7939ebdd 100644
--- a/test/Transforms/PGOProfile/Inputs/diag.proftext
+++ b/test/Transforms/PGOProfile/Inputs/diag.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
foo
12884999999
1
diff --git a/test/Transforms/PGOProfile/Inputs/diag_FE.proftext b/test/Transforms/PGOProfile/Inputs/diag_FE.proftext
new file mode 100644
index 000000000000..aaa137e3a420
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/diag_FE.proftext
@@ -0,0 +1,5 @@
+foo
+12884999999
+1
+1
+
diff --git a/test/Transforms/PGOProfile/Inputs/indirect_call.proftext b/test/Transforms/PGOProfile/Inputs/indirect_call.proftext
new file mode 100644
index 000000000000..269d85c5fd91
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/indirect_call.proftext
@@ -0,0 +1,43 @@
+:ir
+bar
+# Func Hash:
+12884901887
+# Num Counters:
+1
+# Counter Values:
+140
+# Num Value Kinds:
+1
+# ValueKind = IPVK_IndirectCallTarget:
+0
+# NumValueSites:
+1
+3
+func2:80
+func1:40
+func3:20
+
+func1
+# Func Hash:
+12884901887
+# Num Counters:
+1
+# Counter Values:
+40
+
+func2
+# Func Hash:
+12884901887
+# Num Counters:
+1
+# Counter Values:
+80
+
+func3
+# Func Hash:
+12884901887
+# Num Counters:
+1
+# Counter Values:
+20
+
diff --git a/test/Transforms/PGOProfile/Inputs/landingpad.proftext b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
index b2bd451611bf..07b1bf86fc7f 100644
--- a/test/Transforms/PGOProfile/Inputs/landingpad.proftext
+++ b/test/Transforms/PGOProfile/Inputs/landingpad.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
foo
59130013419
4
diff --git a/test/Transforms/PGOProfile/Inputs/loop1.proftext b/test/Transforms/PGOProfile/Inputs/loop1.proftext
index 58c05fbe1676..c19737149ca2 100644
--- a/test/Transforms/PGOProfile/Inputs/loop1.proftext
+++ b/test/Transforms/PGOProfile/Inputs/loop1.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
test_simple_for
34137660316
2
diff --git a/test/Transforms/PGOProfile/Inputs/loop2.proftext b/test/Transforms/PGOProfile/Inputs/loop2.proftext
index 1c429ea5d5f4..af3a71df1e6b 100644
--- a/test/Transforms/PGOProfile/Inputs/loop2.proftext
+++ b/test/Transforms/PGOProfile/Inputs/loop2.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
test_nested_for
53929068288
3
diff --git a/test/Transforms/PGOProfile/Inputs/switch.proftext b/test/Transforms/PGOProfile/Inputs/switch.proftext
index 7b406b87ef70..bebd65fa4c9f 100644
--- a/test/Transforms/PGOProfile/Inputs/switch.proftext
+++ b/test/Transforms/PGOProfile/Inputs/switch.proftext
@@ -1,3 +1,5 @@
+# :ir is the flag to indicate this is IR level profile.
+:ir
test_switch
46200943743
4
diff --git a/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll b/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll
new file mode 100644
index 000000000000..c77eb5172d86
--- /dev/null
+++ b/test/Transforms/PGOProfile/Inputs/thinlto_indirect_call_promotion.ll
@@ -0,0 +1,7 @@
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @a() {
+entry:
+ ret void
+}
diff --git a/test/Transforms/PGOProfile/PR28219.ll b/test/Transforms/PGOProfile/PR28219.ll
new file mode 100644
index 000000000000..a0e1904c3331
--- /dev/null
+++ b/test/Transforms/PGOProfile/PR28219.ll
@@ -0,0 +1,12 @@
+; Test that we annotate entire program's summary and not just this module's
+; RUN: llvm-profdata merge %S/Inputs/PR28219.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s
+
+define i32 @bar() {
+entry:
+ ret i32 1
+}
+; CHECK-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
+; CHECK-DAG: {{![0-9]+}} = !{!"NumFunctions", i64 2}
+; CHECK-DAG: {{![0-9]+}} = !{!"MaxFunctionCount", i64 3}
+
diff --git a/test/Transforms/TailDup/lit.local.cfg b/test/Transforms/PGOProfile/X86/lit.local.cfg
index c8625f4d9d24..e71f3cc4c41e 100644
--- a/test/Transforms/TailDup/lit.local.cfg
+++ b/test/Transforms/PGOProfile/X86/lit.local.cfg
@@ -1,2 +1,3 @@
if not 'X86' in config.root.targets:
config.unsupported = True
+
diff --git a/test/Transforms/PGOProfile/X86/macho.ll b/test/Transforms/PGOProfile/X86/macho.ll
new file mode 100644
index 000000000000..d2fe65f2fa7f
--- /dev/null
+++ b/test/Transforms/PGOProfile/X86/macho.ll
@@ -0,0 +1,10 @@
+; RUN: opt < %s -pgo-instr-gen -instrprof -S | llc | FileCheck %s --check-prefix=MACHO-DIRECTIVE
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+; MACHO-DIRECTIVE: .weak_definition ___llvm_profile_raw_version
+define i32 @test_macho(i32 %i) {
+entry:
+ ret i32 %i
+}
diff --git a/test/Transforms/PGOProfile/branch1.ll b/test/Transforms/PGOProfile/branch1.ll
index cc354d3425c6..b2c7bcc2c19d 100644
--- a/test/Transforms/PGOProfile/branch1.ll
+++ b/test/Transforms/PGOProfile/branch1.ll
@@ -1,12 +1,28 @@
-; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN --check-prefix=GEN-COMDAT
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN --check-prefix=GEN-DARWIN-LINKONCE
+
+; New PM
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN --check-prefix=GEN-COMDAT
+; RUN: opt < %s -mtriple=x86_64-apple-darwin -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN --check-prefix=GEN-DARWIN-LINKONCE
+
; RUN: llvm-profdata merge %S/Inputs/branch1.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+
+; New PM
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; GEN-DARWIN-LINKONCE: target triple = "x86_64-apple-darwin"
+; GEN-COMDAT: $__llvm_profile_raw_version = comdat any
+; GEN-COMDAT: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
+; GEN-LINKONCE: @__llvm_profile_raw_version = linkonce constant i64 {{[0-9]+}}
; GEN: @__profn_test_br_1 = private constant [9 x i8] c"test_br_1"
define i32 @test_br_1(i32 %i) {
+; USE-LABEL: @test_br_1
+; USE-SAME: !prof ![[FUNC_ENTRY_COUNT:[0-9]+]]
entry:
; GEN: entry:
; GEN-NOT: llvm.instrprof.increment
@@ -14,7 +30,7 @@ entry:
br i1 %cmp, label %if.then, label %if.end
; USE: br i1 %cmp, label %if.then, label %if.end
; USE-SAME: !prof ![[BW_ENTRY:[0-9]+]]
-; USE: ![[BW_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
+; USE-DAG: ![[BW_ENTRY]] = !{!"branch_weights", i32 2, i32 1}
if.then:
; GEN: if.then:
@@ -28,3 +44,6 @@ if.end:
%retv = phi i32 [ %add, %if.then ], [ %i, %entry ]
ret i32 %retv
}
+; USE-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
+; USE-DAG: {{![0-9]+}} = !{!"DetailedSummary", {{![0-9]+}}}
+; USE-DAG: ![[FUNC_ENTRY_COUNT]] = !{!"function_entry_count", i64 3}
diff --git a/test/Transforms/PGOProfile/branch2.ll b/test/Transforms/PGOProfile/branch2.ll
index 1e8bc5ec2a38..f8df54b94d40 100644
--- a/test/Transforms/PGOProfile/branch2.ll
+++ b/test/Transforms/PGOProfile/branch2.ll
@@ -1,9 +1,13 @@
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/branch2.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; GEN: @__profn_test_br_2 = private constant [9 x i8] c"test_br_2"
define i32 @test_br_2(i32 %i) {
diff --git a/test/Transforms/PGOProfile/comdat_internal.ll b/test/Transforms/PGOProfile/comdat_internal.ll
new file mode 100644
index 000000000000..8cc41bf50068
--- /dev/null
+++ b/test/Transforms/PGOProfile/comdat_internal.ll
@@ -0,0 +1,26 @@
+; RUN: opt < %s -pgo-instr-gen -instrprof -S | FileCheck %s
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$foo = comdat any
+
+; CHECK: $__llvm_profile_raw_version = comdat any
+; CHECK: $__profv__stdin__foo = comdat any
+
+@bar = global i32 ()* @foo, align 8
+
+; CHECK: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
+; CHECK: @__profn__stdin__foo = private constant [11 x i8] c"<stdin>:foo"
+; CHECK: @__profc__stdin__foo = private global [1 x i64] zeroinitializer, section "__llvm_prf_cnts", comdat($__profv__stdin__foo), align 8
+; CHECK: @__profd__stdin__foo = private global { i64, i64, i64*, i8*, i8*, i32, [1 x i16] } { i64 -5640069336071256030, i64 12884901887, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__foo, i32 0, i32 0), i8*
+; CHECK-NOT: bitcast (i32 ()* @foo to i8*)
+; CHECK-SAME: null
+; CHECK-SAME: , i8* null, i32 1, [1 x i16] zeroinitializer }, section "__llvm_prf_data", comdat($__profv__stdin__foo), align 8
+; CHECK: @__llvm_prf_nm
+; CHECK: @llvm.used
+
+define internal i32 @foo() comdat {
+entry:
+ ret i32 1
+}
diff --git a/test/Transforms/PGOProfile/criticaledge.ll b/test/Transforms/PGOProfile/criticaledge.ll
index 0089bbea1558..4b2ea6becfec 100644
--- a/test/Transforms/PGOProfile/criticaledge.ll
+++ b/test/Transforms/PGOProfile/criticaledge.ll
@@ -1,9 +1,13 @@
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/criticaledge.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; GEN: @__profn_test_criticalEdge = private constant [17 x i8] c"test_criticalEdge"
; GEN: @__profn__stdin__bar = private constant [11 x i8] c"<stdin>:bar"
diff --git a/test/Transforms/PGOProfile/diag_FE_profile.ll b/test/Transforms/PGOProfile/diag_FE_profile.ll
new file mode 100644
index 000000000000..cd33954284f8
--- /dev/null
+++ b/test/Transforms/PGOProfile/diag_FE_profile.ll
@@ -0,0 +1,13 @@
+; RUN: llvm-profdata merge %S/Inputs/diag_FE.proftext -o %t.profdata
+; RUN: not opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+; RUN: not opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+
+; CHECK: Not an IR level instrumentation profile
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/diag_mismatch.ll b/test/Transforms/PGOProfile/diag_mismatch.ll
index a2d0b20620f0..e2b7f8cdcc58 100644
--- a/test/Transforms/PGOProfile/diag_mismatch.ll
+++ b/test/Transforms/PGOProfile/diag_mismatch.ll
@@ -1,5 +1,6 @@
; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
; CHECK: Function control flow change detected (hash mismatch) foo
diff --git a/test/Transforms/PGOProfile/diag_no_funcprofdata.ll b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
index 2e5ec0444b42..d49751a62b9b 100644
--- a/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
+++ b/test/Transforms/PGOProfile/diag_no_funcprofdata.ll
@@ -1,5 +1,6 @@
; RUN: llvm-profdata merge %S/Inputs/diag.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1 | FileCheck %s
; CHECK: No profile data available for function bar
diff --git a/test/Transforms/PGOProfile/diag_no_profile.ll b/test/Transforms/PGOProfile/diag_no_profile.ll
index ce7b59b8f69d..222d9bd09861 100644
--- a/test/Transforms/PGOProfile/diag_no_profile.ll
+++ b/test/Transforms/PGOProfile/diag_no_profile.ll
@@ -1,4 +1,5 @@
; RUN: not opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1
+; RUN: not opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S 2>&1
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/PGOProfile/do-not-instrument.ll b/test/Transforms/PGOProfile/do-not-instrument.ll
new file mode 100644
index 000000000000..616e94273756
--- /dev/null
+++ b/test/Transforms/PGOProfile/do-not-instrument.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.12.0"
+
+define i32 @f1() {
+; CHECK-LABEL: @f1
+entry:
+; CHECK: call void @llvm.instrprof.increment
+; CHECK-NOT: ptrtoint void (i8*)* asm sideeffect
+; CHECK-NOT: call void @llvm.instrprof.value.profile
+; CHECK: tail call void asm sideeffect
+ tail call void asm sideeffect "", "imr,~{memory},~{dirflag},~{fpsr},~{flags}"(i8* undef) #0
+ ret i32 0
+}
+
+define i32 @f2() {
+entry:
+; CHECK: call void @llvm.instrprof.increment
+; CHECK-NOT: call void @llvm.instrprof.value.profile
+ call void (i32, ...) bitcast (void (...)* @foo to void (i32, ...)*)(i32 21)
+ ret i32 0
+}
+
+declare void @foo(...) #0
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/PGOProfile/icp_covariant_call_return.ll b/test/Transforms/PGOProfile/icp_covariant_call_return.ll
new file mode 100644
index 000000000000..64f2025b924e
--- /dev/null
+++ b/test/Transforms/PGOProfile/icp_covariant_call_return.ll
@@ -0,0 +1,45 @@
+; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+%struct.D = type { %struct.B }
+%struct.B = type { i32 (...)** }
+%struct.Base = type { i8 }
+%struct.Derived = type { i8 }
+
+declare noalias i8* @_Znwm(i64)
+declare void @_ZN1DC2Ev(%struct.D*);
+declare %struct.Derived* @_ZN1D4funcEv(%struct.D*);
+
+define i32 @bar() {
+entry:
+ %call = call noalias i8* @_Znwm(i64 8)
+ %tmp = bitcast i8* %call to %struct.D*
+ call void @_ZN1DC2Ev(%struct.D* %tmp)
+ %tmp1 = bitcast %struct.D* %tmp to %struct.B*
+ %tmp2 = bitcast %struct.B* %tmp1 to %struct.Base* (%struct.B*)***
+ %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8
+ %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0
+ %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8
+; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8*
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*)
+; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
+; ICALL-PROM:if.true.direct_targ:
+; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D*
+; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]])
+; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base*
+; ICALL-PROM: br label %if.end.icp
+; ICALL-PROM:if.false.orig_indirect:
+; ICALL-PROM: %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1)
+; ICALL-PROM: br label %if.end.icp
+; ICALL-PROM:if.end.icp:
+; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi %struct.Base* [ %call1, %if.false.orig_indirect ], [ [[DIRCALL_RET_CAST]], %if.true.direct_targ ]
+ %call1 = call %struct.Base* %tmp3(%struct.B* %tmp1), !prof !1
+ ret i32 0
+}
+
+!1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
+; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
+; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0}
+; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
diff --git a/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll b/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll
new file mode 100644
index 000000000000..d2ff47dda0e6
--- /dev/null
+++ b/test/Transforms/PGOProfile/icp_covariant_invoke_return.ll
@@ -0,0 +1,111 @@
+; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+%struct.D = type { %struct.B }
+%struct.B = type { i32 (...)** }
+%struct.Derived = type { %struct.Base, i32 }
+%struct.Base = type { i32 }
+
+@_ZTIi = external constant i8*
+declare i8* @_Znwm(i64)
+declare void @_ZN1DC2Ev(%struct.D*)
+declare %struct.Derived* @_ZN1D4funcEv(%struct.D*)
+declare void @_ZN1DD0Ev(%struct.D*)
+declare void @_ZdlPv(i8*)
+declare i32 @__gxx_personality_v0(...)
+declare i32 @llvm.eh.typeid.for(i8*)
+declare i8* @__cxa_begin_catch(i8*)
+declare void @__cxa_end_catch()
+
+
+define i32 @foo() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %call = invoke i8* @_Znwm(i64 8)
+ to label %invoke.cont unwind label %lpad
+
+invoke.cont:
+ %tmp = bitcast i8* %call to %struct.D*
+ call void @_ZN1DC2Ev(%struct.D* %tmp)
+ %tmp1 = bitcast %struct.D* %tmp to %struct.B*
+ %tmp2 = bitcast %struct.B* %tmp1 to %struct.Base* (%struct.B*)***
+ %vtable = load %struct.Base* (%struct.B*)**, %struct.Base* (%struct.B*)*** %tmp2, align 8
+ %vfn = getelementptr inbounds %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vtable, i64 0
+ %tmp3 = load %struct.Base* (%struct.B*)*, %struct.Base* (%struct.B*)** %vfn, align 8
+; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast %struct.Base* (%struct.B*)* %tmp3 to i8*
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (%struct.Derived* (%struct.D*)* @_ZN1D4funcEv to i8*)
+; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
+; ICALL-PROM:if.true.direct_targ:
+; ICALL-PROM: [[ARG_BITCAST:%[0-9]+]] = bitcast %struct.B* %tmp1 to %struct.D*
+; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = invoke %struct.Derived* @_ZN1D4funcEv(%struct.D* [[ARG_BITCAST]])
+; ICALL-PROM: to label %if.end.icp unwind label %lpad
+; ICALL-PROM:if.false.orig_indirect:
+; ICAll-PROM: %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1)
+; ICAll-PROM: to label %invoke.cont1 unwind label %lpad
+; ICALL-PROM:if.end.icp:
+; ICALL-PROM: [[DIRCALL_RET_CAST:%[0-9]+]] = bitcast %struct.Derived* [[DIRCALL_RET]] to %struct.Base*
+; ICALL-PROM: br label %invoke.cont1
+ %call2 = invoke %struct.Base* %tmp3(%struct.B* %tmp1)
+ to label %invoke.cont1 unwind label %lpad, !prof !1
+
+invoke.cont1:
+; ICAll-PROM: [[PHI_RET:%[0-9]+]] = phi %struct.Base* [ %call2, %if.false.orig_indirect ], [ [[DIRCALL_RET_CAST]], %if.end.icp ]
+; ICAll-PROM: %isnull = icmp eq %struct.Base* [[PHI_RET]], null
+ %isnull = icmp eq %struct.Base* %call2, null
+ br i1 %isnull, label %delete.end, label %delete.notnull
+
+delete.notnull:
+ %tmp4 = bitcast %struct.Base* %call2 to i8*
+ call void @_ZdlPv(i8* %tmp4)
+ br label %delete.end
+
+delete.end:
+ %isnull3 = icmp eq %struct.B* %tmp1, null
+ br i1 %isnull3, label %delete.end8, label %delete.notnull4
+
+delete.notnull4:
+ %tmp5 = bitcast %struct.B* %tmp1 to void (%struct.B*)***
+ %vtable5 = load void (%struct.B*)**, void (%struct.B*)*** %tmp5, align 8
+ %vfn6 = getelementptr inbounds void (%struct.B*)*, void (%struct.B*)** %vtable5, i64 2
+ %tmp6 = load void (%struct.B*)*, void (%struct.B*)** %vfn6, align 8
+ invoke void %tmp6(%struct.B* %tmp1)
+ to label %invoke.cont7 unwind label %lpad
+
+invoke.cont7:
+ br label %delete.end8
+
+delete.end8:
+ br label %try.cont
+
+lpad:
+ %tmp7 = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %tmp8 = extractvalue { i8*, i32 } %tmp7, 0
+ %tmp9 = extractvalue { i8*, i32 } %tmp7, 1
+ br label %catch.dispatch
+
+catch.dispatch:
+ %tmp10 = call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+ %matches = icmp eq i32 %tmp9, %tmp10
+ br i1 %matches, label %catch, label %eh.resume
+
+catch:
+ %tmp11 = call i8* @__cxa_begin_catch(i8* %tmp8)
+ %tmp12 = bitcast i8* %tmp11 to i32*
+ %tmp13 = load i32, i32* %tmp12, align 4
+ call void @__cxa_end_catch()
+ br label %try.cont
+
+try.cont:
+ ret i32 0
+
+eh.resume:
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %tmp8, 0
+ %lpad.val11 = insertvalue { i8*, i32 } %lpad.val, i32 %tmp9, 1
+ resume { i8*, i32 } %lpad.val11
+}
+
+!1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
+; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
+; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0}
+; ICALL-PROM-NOT: !1 = !{!"VP", i32 0, i64 12345, i64 -3913987384944532146, i64 12345}
diff --git a/test/Transforms/PGOProfile/icp_invoke.ll b/test/Transforms/PGOProfile/icp_invoke.ll
new file mode 100644
index 000000000000..a6bf5a870955
--- /dev/null
+++ b/test/Transforms/PGOProfile/icp_invoke.ll
@@ -0,0 +1,105 @@
+; RUN: opt < %s -icp-lto -pgo-icall-prom -S -icp-count-threshold=0 | FileCheck %s --check-prefix=ICP
+; RUN: opt < %s -icp-lto -passes=pgo-icall-prom -S -icp-count-threshold=0 | FileCheck %s --check-prefix=ICP
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo1 = global void ()* null, align 8
+@foo2 = global i32 ()* null, align 8
+@_ZTIi = external constant i8*
+
+define internal void @_ZL4bar1v() !PGOFuncName !0 {
+entry:
+ ret void
+}
+
+define internal i32 @_ZL4bar2v() !PGOFuncName !1 {
+entry:
+ ret i32 100
+}
+
+define i32 @_Z3goov() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+entry:
+ %tmp = load void ()*, void ()** @foo1, align 8
+; ICP: [[BITCAST_IC1:%[0-9]+]] = bitcast void ()* %tmp to i8*
+; ICP: [[CMP_IC1:%[0-9]+]] = icmp eq i8* [[BITCAST_IC1]], bitcast (void ()* @_ZL4bar1v to i8*)
+; ICP: br i1 [[CMP_IC1]], label %[[TRUE_LABEL_IC1:.*]], label %[[FALSE_LABEL_IC1:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]]
+; ICP:[[TRUE_LABEL_IC1]]:
+; ICP: invoke void @_ZL4bar1v()
+; ICP: to label %[[DCALL_NORMAL_DEST_IC1:.*]] unwind label %lpad
+; ICP:[[FALSE_LABEL_IC1]]:
+ invoke void %tmp()
+ to label %try.cont unwind label %lpad, !prof !2
+
+; ICP:[[DCALL_NORMAL_DEST_IC1]]:
+; ICP: br label %try.cont
+
+lpad:
+ %tmp1 = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %tmp2 = extractvalue { i8*, i32 } %tmp1, 0
+ %tmp3 = extractvalue { i8*, i32 } %tmp1, 1
+ %tmp4 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+ %matches = icmp eq i32 %tmp3, %tmp4
+ br i1 %matches, label %catch, label %eh.resume
+
+catch:
+ %tmp5 = tail call i8* @__cxa_begin_catch(i8* %tmp2)
+ tail call void @__cxa_end_catch()
+ br label %try.cont
+
+try.cont:
+ %tmp6 = load i32 ()*, i32 ()** @foo2, align 8
+; ICP: [[BITCAST_IC2:%[0-9]+]] = bitcast i32 ()* %tmp6 to i8*
+; ICP: [[CMP_IC2:%[0-9]+]] = icmp eq i8* [[BITCAST_IC2]], bitcast (i32 ()* @_ZL4bar2v to i8*)
+; ICP: br i1 [[CMP_IC2]], label %[[TRUE_LABEL_IC2:.*]], label %[[FALSE_LABEL_IC2:.*]], !prof [[BRANCH_WEIGHT:![0-9]+]]
+; ICP:[[TRUE_LABEL_IC2]]:
+; ICP: [[RESULT_IC2:%[0-9]+]] = invoke i32 @_ZL4bar2v()
+; ICP: to label %[[DCALL_NORMAL_DEST_IC2:.*]] unwind label %lpad1
+; ICP:[[FALSE_LABEL_IC2]]:
+ %call = invoke i32 %tmp6()
+ to label %try.cont8 unwind label %lpad1, !prof !3
+
+; ICP:[[DCALL_NORMAL_DEST_IC2]]:
+; ICP: br label %try.cont8
+lpad1:
+ %tmp7 = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %tmp8 = extractvalue { i8*, i32 } %tmp7, 0
+ %tmp9 = extractvalue { i8*, i32 } %tmp7, 1
+ %tmp10 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+ %matches5 = icmp eq i32 %tmp9, %tmp10
+ br i1 %matches5, label %catch6, label %eh.resume
+
+catch6:
+ %tmp11 = tail call i8* @__cxa_begin_catch(i8* %tmp8)
+ tail call void @__cxa_end_catch()
+ br label %try.cont8
+
+try.cont8:
+ %i.0 = phi i32 [ undef, %catch6 ], [ %call, %try.cont ]
+; ICP: %i.0 = phi i32 [ undef, %catch6 ], [ %call, %[[FALSE_LABEL_IC2]] ], [ [[RESULT_IC2]], %[[DCALL_NORMAL_DEST_IC2]] ]
+ ret i32 %i.0
+
+eh.resume:
+ %ehselector.slot.0 = phi i32 [ %tmp9, %lpad1 ], [ %tmp3, %lpad ]
+ %exn.slot.0 = phi i8* [ %tmp8, %lpad1 ], [ %tmp2, %lpad ]
+ %lpad.val = insertvalue { i8*, i32 } undef, i8* %exn.slot.0, 0
+ %lpad.val11 = insertvalue { i8*, i32 } %lpad.val, i32 %ehselector.slot.0, 1
+ resume { i8*, i32 } %lpad.val11
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+declare i32 @llvm.eh.typeid.for(i8*)
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
+!0 = !{!"invoke.ll:_ZL4bar1v"}
+!1 = !{!"invoke.ll:_ZL4bar2v"}
+!2 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1}
+!3 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1}
+; ICP-NOT !3 = !{!"VP", i32 0, i64 1, i64 -2732222848796217051, i64 1}
+; ICP-NOT !4 = !{!"VP", i32 0, i64 1, i64 -6116256810522035449, i64 1}
+; ICP: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
diff --git a/test/Transforms/PGOProfile/icp_mismatch_msg.ll b/test/Transforms/PGOProfile/icp_mismatch_msg.ll
new file mode 100644
index 000000000000..408996a5f09c
--- /dev/null
+++ b/test/Transforms/PGOProfile/icp_mismatch_msg.ll
@@ -0,0 +1,40 @@
+; RUN: opt < %s -pgo-icall-prom -pass-remarks-missed=pgo-icall-prom -S 2>& 1 | FileCheck %s
+; RUN: opt < %s -passes=pgo-icall-prom -pass-remarks-missed=pgo-icall-prom -S 2>& 1 | FileCheck %s
+
+; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func4 with count of 1234: The number of arguments mismatch
+; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to 11517462787082255043 with count of 2345: Cannot find the target
+; CHECK: remark: <unknown>:0:0: Cannot promote indirect call to func2 with count of 7890: Return type mismatch
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo = common global i32 ()* null, align 8
+@foo2 = common global i32 ()* null, align 8
+@foo3 = common global i32 ()* null, align 8
+
+define i32 @func4(i32 %i) {
+entry:
+ ret i32 %i
+}
+
+define void @func2() {
+entry:
+ ret void
+}
+
+define i32 @bar() {
+entry:
+ %tmp = load i32 ()*, i32 ()** @foo, align 8
+ %call = call i32 %tmp(), !prof !1
+ %tmp2 = load i32 ()*, i32 ()** @foo2, align 8
+ %call1 = call i32 %tmp2(), !prof !2
+ %add = add nsw i32 %call1, %call
+ %tmp3 = load i32 ()*, i32 ()** @foo3, align 8
+ %call2 = call i32 %tmp3(), !prof !3
+ %add2 = add nsw i32 %add, %call2
+ ret i32 %add2
+}
+
+!1 = !{!"VP", i32 0, i64 1801, i64 7651369219802541373, i64 1234, i64 -4377547752858689819, i64 567}
+!2 = !{!"VP", i32 0, i64 3023, i64 -6929281286627296573, i64 2345, i64 -4377547752858689819, i64 678}
+!3 = !{!"VP", i32 0, i64 7890, i64 -4377547752858689819, i64 7890}
diff --git a/test/Transforms/PGOProfile/icp_vararg.ll b/test/Transforms/PGOProfile/icp_vararg.ll
new file mode 100644
index 000000000000..400aab3aead7
--- /dev/null
+++ b/test/Transforms/PGOProfile/icp_vararg.ll
@@ -0,0 +1,34 @@
+; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo = common global i32 (i32, ...)* null, align 8
+
+define i32 @va_func(i32 %num, ...) {
+entry:
+ ret i32 0
+}
+
+define i32 @bar() #1 {
+entry:
+ %tmp = load i32 (i32, ...)*, i32 (i32, ...)** @foo, align 8
+; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 (i32, ...)* %tmp to i8*
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 (i32, ...)* @va_func to i8*)
+; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
+; ICALL-PROM:if.true.direct_targ:
+; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 (i32, ...) @va_func(i32 3, i32 12, i32 22, i32 4)
+; ICALL-PROM: br label %if.end.icp
+ %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4), !prof !1
+; ICALL-PROM:if.false.orig_indirect:
+; ICALL-PROM: %call = call i32 (i32, ...) %tmp(i32 3, i32 12, i32 22, i32 4)
+; ICALL-PROM: br label %if.end.icp
+ ret i32 %call
+; ICALL-PROM:if.end.icp:
+; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ %call, %if.false.orig_indirect ], [ [[DIRCALL_RET]], %if.true.direct_targ ]
+; ICALL-PROM: ret i32 [[PHI_RET]]
+
+}
+
+!1 = !{!"VP", i32 0, i64 12345, i64 989055279648259519, i64 12345}
+; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 12345, i32 0}
diff --git a/test/Transforms/PGOProfile/indirect_call_annotation.ll b/test/Transforms/PGOProfile/indirect_call_annotation.ll
new file mode 100644
index 000000000000..6f72a998784b
--- /dev/null
+++ b/test/Transforms/PGOProfile/indirect_call_annotation.ll
@@ -0,0 +1,36 @@
+; RUN: llvm-profdata merge %S/Inputs/indirect_call.proftext -o %t.profdata
+; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=VP-ANNOTATION
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=VP-ANNOTATION
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo = common global i32 (i32)* null, align 8
+
+define i32 @func1(i32 %x) {
+entry:
+ ret i32 %x
+}
+
+define i32 @func2(i32 %x) {
+entry:
+ %add = add nsw i32 %x, 1
+ ret i32 %add
+}
+
+define i32 @func3(i32 %x) {
+entry:
+ %add = add nsw i32 %x, 3
+ ret i32 %add
+}
+
+define i32 @bar(i32 %i) {
+entry:
+ %tmp = load i32 (i32)*, i32 (i32)** @foo, align 8
+ %call = call i32 %tmp(i32 %i)
+; VP-ANNOTATION: %call = call i32 %tmp(i32 %i)
+; VP-ANNOTATION-SAME: !prof ![[VP:[0-9]+]]
+; VP-ANNOTATION: ![[VP]] = !{!"VP", i32 0, i64 140, i64 -4377547752858689819, i64 80, i64 -2545542355363006406, i64 40, i64 -6929281286627296573, i64 20}
+ ret i32 %call
+}
+
+
diff --git a/test/Transforms/PGOProfile/indirect_call_profile.ll b/test/Transforms/PGOProfile/indirect_call_profile.ll
new file mode 100644
index 000000000000..e1753acb7c74
--- /dev/null
+++ b/test/Transforms/PGOProfile/indirect_call_profile.ll
@@ -0,0 +1,71 @@
+; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen,instrprof -S | FileCheck %s --check-prefix=LOWER
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$foo3 = comdat any
+
+@bar = external global void ()*, align 8
+; GEN: @__profn_foo = private constant [3 x i8] c"foo"
+
+define void @foo() {
+entry:
+; GEN: entry:
+; GEN-NEXT: call void @llvm.instrprof.increment(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i32 1, i32 0)
+ %tmp = load void ()*, void ()** @bar, align 8
+; GEN: [[ICALL_TARGET:%[0-9]+]] = ptrtoint void ()* %tmp to i64
+; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([3 x i8], [3 x i8]* @__profn_foo, i32 0, i32 0), i64 12884901887, i64 [[ICALL_TARGET]], i32 0, i32 0)
+ call void %tmp()
+ ret void
+}
+
+@bar2 = global void ()* null, align 8
+@_ZTIi = external constant i8*
+
+define i32 @foo2(i32 %arg, i8** nocapture readnone %arg1) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+bb:
+ %tmp2 = load void ()*, void ()** @bar2, align 8
+ invoke void %tmp2()
+ to label %bb10 unwind label %bb2
+; GEN: [[ICALL_TARGET2:%[0-9]+]] = ptrtoint void ()* %tmp2 to i64
+; GEN-NEXT: call void @llvm.instrprof.value.profile(i8* getelementptr inbounds ([4 x i8], [4 x i8]* @__profn_foo2, i32 0, i32 0), i64 38432627612, i64 [[ICALL_TARGET2]], i32 0, i32 0)
+
+bb2: ; preds = %bb
+ %tmp3 = landingpad { i8*, i32 }
+ catch i8* bitcast (i8** @_ZTIi to i8*)
+ %tmp4 = extractvalue { i8*, i32 } %tmp3, 1
+ %tmp5 = tail call i32 @llvm.eh.typeid.for(i8* bitcast (i8** @_ZTIi to i8*))
+ %tmp6 = icmp eq i32 %tmp4, %tmp5
+ br i1 %tmp6, label %bb7, label %bb11
+
+bb7: ; preds = %bb2
+ %tmp8 = extractvalue { i8*, i32 } %tmp3, 0
+ %tmp9 = tail call i8* @__cxa_begin_catch(i8* %tmp8)
+ tail call void @__cxa_end_catch()
+ br label %bb10
+
+bb10: ; preds = %bb7, %bb
+ ret i32 0
+
+bb11: ; preds = %bb2
+ resume { i8*, i32 } %tmp3
+}
+
+; Test that comdat function's address is recorded.
+; LOWER: @__profd_foo3 = linkonce_odr{{.*}}@foo3
+; Function Attrs: nounwind uwtable
+define linkonce_odr i32 @foo3() comdat {
+ ret i32 1
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: nounwind readnone
+declare i32 @llvm.eh.typeid.for(i8*) #0
+
+declare i8* @__cxa_begin_catch(i8*)
+
+declare void @__cxa_end_catch()
+
diff --git a/test/Transforms/PGOProfile/indirect_call_promotion.ll b/test/Transforms/PGOProfile/indirect_call_promotion.ll
new file mode 100644
index 000000000000..c35166505eb9
--- /dev/null
+++ b/test/Transforms/PGOProfile/indirect_call_promotion.ll
@@ -0,0 +1,56 @@
+; RUN: opt < %s -pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt < %s -passes=pgo-icall-prom -S | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt < %s -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
+; RUN: opt < %s -passes=pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=0 -icp-percent-threshold=0 -icp-max-prom=4 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
+; PASS-REMARK: remark: <unknown>:0:0: Promote indirect call to func4 with count 1030 out of 1600
+; PASS-REMARK: remark: <unknown>:0:0: Promote indirect call to func2 with count 410 out of 570
+; PASS-REMARK: remark: <unknown>:0:0: Promote indirect call to func3 with count 150 out of 160
+; PASS-REMARK: remark: <unknown>:0:0: Promote indirect call to func1 with count 10 out of 10
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo = common global i32 ()* null, align 8
+
+define i32 @func1() {
+entry:
+ ret i32 0
+}
+
+define i32 @func2() {
+entry:
+ ret i32 1
+}
+
+define i32 @func3() {
+entry:
+ ret i32 2
+}
+
+define i32 @func4() {
+entry:
+ ret i32 3
+}
+
+define i32 @bar() {
+entry:
+ %tmp = load i32 ()*, i32 ()** @foo, align 8
+; ICALL-PROM: [[BITCAST:%[0-9]+]] = bitcast i32 ()* %tmp to i8*
+; ICALL-PROM: [[CMP:%[0-9]+]] = icmp eq i8* [[BITCAST]], bitcast (i32 ()* @func4 to i8*)
+; ICALL-PROM: br i1 [[CMP]], label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
+; ICALL-PROM: if.true.direct_targ:
+; ICALL-PROM: [[DIRCALL_RET:%[0-9]+]] = call i32 @func4()
+; ICALL-PROM: br label %if.end.icp
+ %call = call i32 %tmp(), !prof !1
+; ICALL-PROM: if.false.orig_indirect:
+; ICALL-PROM: %call = call i32 %tmp(), !prof [[NEW_VP_METADATA:![0-9]+]]
+ ret i32 %call
+; ICALL-PROM: if.end.icp:
+; ICALL-PROM: [[PHI_RET:%[0-9]+]] = phi i32 [ %call, %if.false.orig_indirect ], [ [[DIRCALL_RET]], %if.true.direct_targ ]
+; ICALL-PROM: ret i32 [[PHI_RET]]
+}
+
+!1 = !{!"VP", i32 0, i64 1600, i64 7651369219802541373, i64 1030, i64 -4377547752858689819, i64 410, i64 -6929281286627296573, i64 150, i64 -2545542355363006406, i64 10}
+
+; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1030, i32 570}
+; ICALL-PROM: [[NEW_VP_METADATA]] = !{!"VP", i32 0, i64 570, i64 -4377547752858689819, i64 410}
diff --git a/test/Transforms/PGOProfile/landingpad.ll b/test/Transforms/PGOProfile/landingpad.ll
index 33fe62fbae03..9452cd41b008 100644
--- a/test/Transforms/PGOProfile/landingpad.ll
+++ b/test/Transforms/PGOProfile/landingpad.ll
@@ -1,11 +1,15 @@
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/landingpad.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@val = global i32 0, align 4
@_ZTIi = external constant i8*
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; GEN: @__profn_bar = private constant [3 x i8] c"bar"
; GEN: @__profn_foo = private constant [3 x i8] c"foo"
diff --git a/test/Transforms/PGOProfile/loop1.ll b/test/Transforms/PGOProfile/loop1.ll
index aa5aa86b1e54..5d3be183694d 100644
--- a/test/Transforms/PGOProfile/loop1.ll
+++ b/test/Transforms/PGOProfile/loop1.ll
@@ -1,9 +1,13 @@
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/loop1.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; GEN: @__profn_test_simple_for = private constant [15 x i8] c"test_simple_for"
define i32 @test_simple_for(i32 %n) {
diff --git a/test/Transforms/PGOProfile/loop2.ll b/test/Transforms/PGOProfile/loop2.ll
index ec3e16d461bc..1fad53a90dca 100644
--- a/test/Transforms/PGOProfile/loop2.ll
+++ b/test/Transforms/PGOProfile/loop2.ll
@@ -1,9 +1,13 @@
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/loop2.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; GEN: @__profn_test_nested_for = private constant [15 x i8] c"test_nested_for"
define i32 @test_nested_for(i32 %r, i32 %s) {
diff --git a/test/Transforms/PGOProfile/preinline.ll b/test/Transforms/PGOProfile/preinline.ll
new file mode 100644
index 000000000000..2618666ede2b
--- /dev/null
+++ b/test/Transforms/PGOProfile/preinline.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -O2 -profile-generate=default.profraw -S | FileCheck %s --check-prefix=GEN
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @foo(i32 %i) {
+entry:
+; GEN: %pgocount = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc_foo
+; GEN-NOT: %pgocount.i = load i64, i64* getelementptr inbounds ([1 x i64], [1 x i64]* @__profc__stdin__bar
+ %call = call i32 @bar()
+ %add = add nsw i32 %i, %call
+ ret i32 %add
+}
+
+define internal i32 @bar() {
+; check that bar is inlined into foo and eliminiated from IR.
+; GEN-NOT: define internal i32 @bar
+entry:
+ %call = call i32 (...) @bar1()
+ ret i32 %call
+}
+
+declare i32 @bar1(...)
diff --git a/test/Transforms/PGOProfile/single_bb.ll b/test/Transforms/PGOProfile/single_bb.ll
index f904d09b8e7a..874d8e4d22d6 100644
--- a/test/Transforms/PGOProfile/single_bb.ll
+++ b/test/Transforms/PGOProfile/single_bb.ll
@@ -1,7 +1,10 @@
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; GEN: @__profn_single_bb = private constant [9 x i8] c"single_bb"
define i32 @single_bb() {
diff --git a/test/Transforms/PGOProfile/statics_counter_naming.ll b/test/Transforms/PGOProfile/statics_counter_naming.ll
new file mode 100644
index 000000000000..c882406ffe54
--- /dev/null
+++ b/test/Transforms/PGOProfile/statics_counter_naming.ll
@@ -0,0 +1,11 @@
+; RUN: opt %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; GEN: @__profn_statics_counter_naming.ll_func = private constant [30 x i8] c"statics_counter_naming.ll:func"
+
+define internal i32 @func() {
+entry:
+ ret i32 0
+}
diff --git a/test/Transforms/PGOProfile/switch.ll b/test/Transforms/PGOProfile/switch.ll
index 3177dc0bd040..e590e217013a 100644
--- a/test/Transforms/PGOProfile/switch.ll
+++ b/test/Transforms/PGOProfile/switch.ll
@@ -1,9 +1,13 @@
; RUN: opt < %s -pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
+; RUN: opt < %s -passes=pgo-instr-gen -S | FileCheck %s --check-prefix=GEN
; RUN: llvm-profdata merge %S/Inputs/switch.proftext -o %t.profdata
; RUN: opt < %s -pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
+; RUN: opt < %s -passes=pgo-instr-use -pgo-test-profile-file=%t.profdata -S | FileCheck %s --check-prefix=USE
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
+; GEN: $__llvm_profile_raw_version = comdat any
+; GEN: @__llvm_profile_raw_version = constant i64 {{[0-9]+}}, comdat
; GEN: @__profn_test_switch = private constant [11 x i8] c"test_switch"
define void @test_switch(i32 %i) {
diff --git a/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll b/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll
new file mode 100644
index 000000000000..49dc7fa39e04
--- /dev/null
+++ b/test/Transforms/PGOProfile/thinlto_indirect_call_promotion.ll
@@ -0,0 +1,32 @@
+; Do setup work for all below tests: generate bitcode and combined index
+; RUN: opt -module-summary %s -o %t.bc
+; RUN: opt -module-summary %p/Inputs/thinlto_indirect_call_promotion.ll -o %t2.bc
+; RUN: llvm-lto -thinlto -o %t3 %t.bc %t2.bc
+
+; RUN: opt -function-import -summary-file %t3.thinlto.bc %t.bc -o %t4.bc -print-imports 2>&1 | FileCheck %s --check-prefix=IMPORTS
+; IMPORTS: Import a
+
+; RUN: opt %t4.bc -pgo-icall-prom -S -icp-count-threshold=1 | FileCheck %s --check-prefix=ICALL-PROM
+; RUN: opt %t4.bc -pgo-icall-prom -S -pass-remarks=pgo-icall-prom -icp-count-threshold=1 2>&1 | FileCheck %s --check-prefix=PASS-REMARK
+; PASS-REMARK: Promote indirect call to a with count 1 out of 1
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@foo = external local_unnamed_addr global void ()*, align 8
+
+define i32 @main() local_unnamed_addr {
+entry:
+ %0 = load void ()*, void ()** @foo, align 8
+; ICALL-PROM: br i1 %{{[0-9]+}}, label %if.true.direct_targ, label %if.false.orig_indirect, !prof [[BRANCH_WEIGHT:![0-9]+]]
+ tail call void %0(), !prof !1
+ ret i32 0
+}
+
+!1 = !{!"VP", i32 0, i64 1, i64 -6289574019528802036, i64 1}
+
+; Should not have a VP annotation on new indirect call (check before and after
+; branch_weights annotation).
+; ICALL-PROM-NOT: !"VP"
+; ICALL-PROM: [[BRANCH_WEIGHT]] = !{!"branch_weights", i32 1, i32 0}
+; ICALL-PROM-NOT: !"VP"
diff --git a/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll b/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll
index 34cd672ed267..e4d4cf7827aa 100644
--- a/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll
+++ b/test/Transforms/PartiallyInlineLibCalls/bad-prototype.ll
@@ -1,4 +1,5 @@
; RUN: opt -S -partially-inline-libcalls < %s | FileCheck %s
+; RUN: opt -S -passes=partially-inline-libcalls < %s | FileCheck %s
target triple = "x86_64-unknown-linux-gnu"
diff --git a/test/Transforms/PhaseOrdering/globalaa-retained.ll b/test/Transforms/PhaseOrdering/globalaa-retained.ll
new file mode 100644
index 000000000000..bce193b5e851
--- /dev/null
+++ b/test/Transforms/PhaseOrdering/globalaa-retained.ll
@@ -0,0 +1,26 @@
+; RUN: opt -O3 -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64"
+
+@v = internal unnamed_addr global i32 0, align 4
+@p = common global i32* null, align 8
+
+; Function Attrs: norecurse nounwind
+define void @f(i32 %n) {
+entry:
+ %0 = load i32, i32* @v, align 4
+ %inc = add nsw i32 %0, 1
+ store i32 %inc, i32* @v, align 4
+ %1 = load i32*, i32** @p, align 8
+ store i32 %n, i32* %1, align 4
+ %2 = load i32, i32* @v, align 4
+ %inc1 = add nsw i32 %2, 1
+ store i32 %inc1, i32* @v, align 4
+ ret void
+}
+
+; check variable v is loaded only once after optimization, which should be
+; prove that globalsAA survives until the optimization that can use it to
+; optimize away the duplicate load/stores on variable v.
+; CHECK: load i32, i32* @v, align 4
+; CHECK-NOT: load i32, i32* @v, align 4
diff --git a/test/Transforms/PlaceSafepoints/basic.ll b/test/Transforms/PlaceSafepoints/basic.ll
index 8cdbc217b849..5cbf2798f4ae 100644
--- a/test/Transforms/PlaceSafepoints/basic.ll
+++ b/test/Transforms/PlaceSafepoints/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -S -place-safepoints | FileCheck %s
+; RUN: opt < %s -S -place-safepoints | FileCheck %s
; Do we insert a simple entry safepoint?
@@ -6,7 +6,7 @@ define void @test_entry() gc "statepoint-example" {
; CHECK-LABEL: @test_entry
entry:
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
ret void
}
@@ -14,7 +14,7 @@ entry:
define void @test_negative() {
; CHECK-LABEL: @test_negative
entry:
-; CHECK-NOT: statepoint
+; CHECK-NOT: do_safepoint
ret void
}
@@ -25,13 +25,12 @@ define void @test_backedge() gc "statepoint-example" {
entry:
; CHECK-LABEL: entry
; This statepoint is technically not required, but we don't exploit that yet.
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
br label %other
; CHECK-LABEL: other
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
other:
- call void undef()
br label %other
}
@@ -41,40 +40,24 @@ define void @test_unreachable() gc "statepoint-example" {
; CHECK-LABEL: test_unreachable
entry:
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
ret void
; CHECK-NOT: other
-; CHECK-NOT: statepoint
+; CHECK-NOT: do_safepoint
other:
br label %other
}
declare void @foo()
-; Do we turn a call into it's own statepoint
-define void @test_simple_call() gc "statepoint-example" {
-; CHECK-LABEL: test_simple_call
-entry:
- br label %other
-other:
-; CHECK-LABEL: other
-; CHECK: statepoint
-; CHECK-NOT: gc.result
- call void @foo()
- ret void
-}
-
declare zeroext i1 @i1_return_i1(i1)
define i1 @test_call_with_result() gc "statepoint-example" {
; CHECK-LABEL: test_call_with_result
-; This is checking that a statepoint_poll + statepoint + result is
-; inserted for a function that takes 1 argument.
-; CHECK: gc.statepoint.p0f_isVoidf
-; CHECK: gc.statepoint.p0f_i1i1f
-; CHECK: (i64 2882400000, i32 0, i1 (i1)* @i1_return_i1, i32 1, i32 0, i1 false, i32 0, i32 0)
-; CHECK: %call12 = call i1 @llvm.experimental.gc.result.i1
+; This is checking that a statepoint_poll is inserted for a function
+; that takes 1 argument.
+; CHECK: call void @do_safepoint
entry:
%call1 = tail call i1 (i1) @i1_return_i1(i1 false)
ret i1 %call1
diff --git a/test/Transforms/PlaceSafepoints/call-in-loop.ll b/test/Transforms/PlaceSafepoints/call-in-loop.ll
index 9edfeb75e0c4..7601b6c81fe9 100644
--- a/test/Transforms/PlaceSafepoints/call-in-loop.ll
+++ b/test/Transforms/PlaceSafepoints/call-in-loop.ll
@@ -1,7 +1,7 @@
; If there's a call in the loop which dominates the backedge, we
; don't need a safepoint poll (since the callee must contain a
; poll test).
-;; RUN: opt %s -place-safepoints -S | FileCheck %s
+;; RUN: opt < %s -place-safepoints -S | FileCheck %s
declare void @foo()
@@ -10,13 +10,12 @@ define void @test1() gc "statepoint-example" {
entry:
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
br label %loop
loop:
; CHECK-LABEL: loop
-; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @foo
-; CHECK-NOT: statepoint
+; CHECK-NOT: call void @do_safepoint
call void @foo()
br label %loop
}
diff --git a/test/Transforms/PlaceSafepoints/finite-loops.ll b/test/Transforms/PlaceSafepoints/finite-loops.ll
index b98073d6a6e6..e7d518781969 100644
--- a/test/Transforms/PlaceSafepoints/finite-loops.ll
+++ b/test/Transforms/PlaceSafepoints/finite-loops.ll
@@ -1,16 +1,16 @@
; Tests to ensure that we are not placing backedge safepoints in
; loops which are clearly finite.
-;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=32 -S | FileCheck %s
-;; RUN: opt %s -place-safepoints -spp-counted-loop-trip-width=64 -S | FileCheck %s -check-prefix=COUNTED-64
+;; RUN: opt < %s -place-safepoints -spp-counted-loop-trip-width=32 -S | FileCheck %s
+;; RUN: opt < %s -place-safepoints -spp-counted-loop-trip-width=64 -S | FileCheck %s -check-prefix=COUNTED-64
; A simple counted loop with trivially known range
define void @test1(i32) gc "statepoint-example" {
; CHECK-LABEL: test1
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
; CHECK-LABEL: loop
-; CHECK-NOT: statepoint
+; CHECK-NOT: call void @do_safepoint
; CHECK-LABEL: exit
entry:
@@ -30,9 +30,9 @@ exit:
define void @test2(i32) gc "statepoint-example" {
; CHECK-LABEL: test2
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
; CHECK-LABEL: loop
-; CHECK-NOT: statepoint
+; CHECK-NOT: call void @do_safepoint
; CHECK-LABEL: exit
entry:
@@ -55,9 +55,9 @@ exit:
define void @test3(i8 %upper) gc "statepoint-example" {
; CHECK-LABEL: test3
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
; CHECK-LABEL: loop
-; CHECK-NOT: statepoint
+; CHECK-NOT: call void @do_safepoint
; CHECK-LABEL: exit
entry:
@@ -77,16 +77,16 @@ exit:
define void @test4(i64 %upper) gc "statepoint-example" {
; CHECK-LABEL: test4
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
; CHECK-LABEL: loop
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
; CHECK-LABEL: exit
; COUNTED-64-LABEL: test4
; COUNTED-64-LABEL: entry
-; COUNTED-64: statepoint
+; COUNTED-64: call void @do_safepoint
; COUNTED-64-LABEL: loop
-; COUNTED-64-NOT: statepoint
+; COUNTED-64-NOT: call void @do_safepoint
; COUNTED-64-LABEL: exit
entry:
@@ -107,16 +107,16 @@ exit:
define void @test5(i64 %upper) gc "statepoint-example" {
; CHECK-LABEL: test5
; CHECK-LABEL: entry
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
; CHECK-LABEL: loop
-; CHECK: statepoint
+; CHECK: call void @do_safepoint
; CHECK-LABEL: exit
; COUNTED-64-LABEL: test5
; COUNTED-64-LABEL: entry
-; COUNTED-64: statepoint
+; COUNTED-64: call void @do_safepoint
; COUNTED-64-LABEL: loop
-; COUNTED-64: statepoint
+; COUNTED-64: call void @do_safepoint
; COUNTED-64-LABEL: exit
entry:
diff --git a/test/Transforms/PlaceSafepoints/memset.ll b/test/Transforms/PlaceSafepoints/memset.ll
index 534b2f120581..7edca282292b 100644
--- a/test/Transforms/PlaceSafepoints/memset.ll
+++ b/test/Transforms/PlaceSafepoints/memset.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -place-safepoints %s | FileCheck %s
+; RUN: opt < %s -S -place-safepoints | FileCheck %s
define void @test(i32, i8 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: @test
diff --git a/test/Transforms/PlaceSafepoints/no-statepoints.ll b/test/Transforms/PlaceSafepoints/no-statepoints.ll
new file mode 100644
index 000000000000..ad2442369b88
--- /dev/null
+++ b/test/Transforms/PlaceSafepoints/no-statepoints.ll
@@ -0,0 +1,23 @@
+; RUN: opt -S -place-safepoints < %s | FileCheck %s
+
+declare void @callee()
+
+define void @test() gc "statepoint-example" {
+; CHECK-LABEL: test(
+entry:
+; CHECK: entry:
+; CHECK: call void @do_safepoint()
+ br label %other
+
+other:
+; CHECK: other:
+ call void @callee() "gc-leaf-function"
+; CHECK: call void @do_safepoint()
+ br label %other
+}
+
+declare void @do_safepoint()
+define void @gc.safepoint_poll() {
+ call void @do_safepoint()
+ ret void
+}
diff --git a/test/Transforms/PlaceSafepoints/split-backedge.ll b/test/Transforms/PlaceSafepoints/split-backedge.ll
index b9fad45709d0..82dc52771358 100644
--- a/test/Transforms/PlaceSafepoints/split-backedge.ll
+++ b/test/Transforms/PlaceSafepoints/split-backedge.ll
@@ -1,10 +1,10 @@
;; A very basic test to make sure that splitting the backedge keeps working
-;; RUN: opt -place-safepoints -spp-split-backedge=1 -S %s | FileCheck %s
+;; RUN: opt < %s -place-safepoints -spp-split-backedge=1 -S | FileCheck %s
define void @test(i32, i1 %cond) gc "statepoint-example" {
; CHECK-LABEL: @test
; CHECK-LABEL: loop.loop_crit_edge
-; CHECK: gc.statepoint
+; CHECK: call void @do_safepoint
; CHECK-NEXT: br label %loop
entry:
br label %loop
@@ -23,10 +23,10 @@ exit:
define void @test2(i32, i1 %cond) gc "statepoint-example" {
; CHECK-LABEL: @test2
; CHECK-LABEL: loop2.loop2_crit_edge:
-; CHECK: gc.statepoint
+; CHECK: call void @do_safepoint
; CHECK-NEXT: br label %loop2
; CHECK-LABEL: loop2.loop_crit_edge:
-; CHECK: gc.statepoint
+; CHECK: call void @do_safepoint
; CHECK-NEXT: br label %loop
entry:
br label %loop
diff --git a/test/Transforms/PlaceSafepoints/statepoint-coreclr.ll b/test/Transforms/PlaceSafepoints/statepoint-coreclr.ll
index 0228549025ef..5914b2c51411 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-coreclr.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-coreclr.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -S -place-safepoints | FileCheck %s
+; RUN: opt < %s -S -place-safepoints | FileCheck %s
; Basic test to make sure that safepoints are placed
; for CoreCLR GC
@@ -8,11 +8,9 @@ declare void @foo()
define void @test_simple_call() gc "coreclr" {
; CHECK-LABEL: test_simple_call
entry:
+; CHECK: call void @do_safepoint
br label %other
other:
-; CHECK-LABEL: other
-; CHECK: statepoint
-; CHECK-NOT: gc.result
call void @foo()
ret void
}
diff --git a/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll b/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll
index c4e250957a8f..bd646bd2f549 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll
+++ b/test/Transforms/PlaceSafepoints/statepoint-frameescape.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -S -place-safepoints | FileCheck %s
+; RUN: opt < %s -S -place-safepoints | FileCheck %s
declare void @llvm.localescape(...)
@@ -9,7 +9,7 @@ entry:
; CHECK-LABEL: entry
; CHECK-NEXT: alloca
; CHECK-NEXT: localescape
-; CHECK-NEXT: statepoint
+; CHECK-NEXT: call void @do_safepoint
%ptr = alloca i32
call void (...) @llvm.localescape(i32* %ptr)
ret void
diff --git a/test/Transforms/PreISelIntrinsicLowering/load-relative.ll b/test/Transforms/PreISelIntrinsicLowering/load-relative.ll
new file mode 100644
index 000000000000..43cb0cc5a1d7
--- /dev/null
+++ b/test/Transforms/PreISelIntrinsicLowering/load-relative.ll
@@ -0,0 +1,27 @@
+; RUN: opt -pre-isel-intrinsic-lowering -S -o - %s | FileCheck %s
+; RUN: opt -passes='pre-isel-intrinsic-lowering' -S -o - %s | FileCheck %s
+
+; CHECK: define i8* @foo32(i8* [[P:%.*]], i32 [[O:%.*]])
+define i8* @foo32(i8* %p, i32 %o) {
+ ; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i32 [[O]]
+ ; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
+ ; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
+ ; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
+ ; CHECK: ret i8* [[R]]
+ %l = call i8* @llvm.load.relative.i32(i8* %p, i32 %o)
+ ret i8* %l
+}
+
+; CHECK: define i8* @foo64(i8* [[P:%.*]], i64 [[O:%.*]])
+define i8* @foo64(i8* %p, i64 %o) {
+ ; CHECK: [[OP:%.*]] = getelementptr i8, i8* [[P]], i64 [[O]]
+ ; CHECK: [[OPI32:%.*]] = bitcast i8* [[OP]] to i32*
+ ; CHECK: [[OI32:%.*]] = load i32, i32* [[OPI32]], align 4
+ ; CHECK: [[R:%.*]] = getelementptr i8, i8* [[P]], i32 [[OI32]]
+ ; CHECK: ret i8* [[R]]
+ %l = call i8* @llvm.load.relative.i64(i8* %p, i64 %o)
+ ret i8* %l
+}
+
+declare i8* @llvm.load.relative.i32(i8*, i32)
+declare i8* @llvm.load.relative.i64(i8*, i64)
diff --git a/test/Transforms/Reassociate/basictest.ll b/test/Transforms/Reassociate/basictest.ll
index c557017b4c6b..11c67bea2cb0 100644
--- a/test/Transforms/Reassociate/basictest.ll
+++ b/test/Transforms/Reassociate/basictest.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -reassociate -gvn -instcombine -S | FileCheck %s
+; RUN: opt < %s -passes='reassociate,gvn,instcombine' -S | FileCheck %s
define i32 @test1(i32 %arg) {
%tmp1 = sub i32 -12, %arg
diff --git a/test/Transforms/Reassociate/prev_insts_canonicalized.ll b/test/Transforms/Reassociate/prev_insts_canonicalized.ll
new file mode 100644
index 000000000000..649761e57c9a
--- /dev/null
+++ b/test/Transforms/Reassociate/prev_insts_canonicalized.ll
@@ -0,0 +1,57 @@
+; RUN: opt < %s -reassociate -S | FileCheck %s
+
+; These tests make sure that before processing insts
+; any previous instructions are already canonicalized.
+define i32 @foo(i32 %in) {
+; CHECK-LABEL: @foo
+; CHECK-NEXT: %factor = mul i32 %in, -4
+; CHECK-NEXT: %factor1 = mul i32 %in, 2
+; CHECK-NEXT: %_3 = add i32 %factor, 1
+; CHECK-NEXT: %_5 = add i32 %_3, %factor1
+; CHECK-NEXT: ret i32 %_5
+ %_0 = add i32 %in, 1
+ %_1 = mul i32 %in, -2
+ %_2 = add i32 %_0, %_1
+ %_3 = add i32 %_1, %_2
+ %_4 = add i32 %_3, 1
+ %_5 = add i32 %in, %_3
+ ret i32 %_5
+}
+
+; CHECK-LABEL: @foo1
+define void @foo1(float %in, i1 %cmp) {
+wrapper_entry:
+ br label %foo1
+
+for.body:
+ %0 = fadd float %in1, %in1
+ br label %foo1
+
+foo1:
+ %_0 = fmul fast float %in, -3.000000e+00
+ %_1 = fmul fast float %_0, 3.000000e+00
+ %in1 = fadd fast float -3.000000e+00, %_1
+ %in1use = fadd fast float %in1, %in1
+ br label %for.body
+
+
+}
+
+; CHECK-LABEL: @foo2
+define void @foo2(float %in, i1 %cmp) {
+wrapper_entry:
+ br label %for.body
+
+for.body:
+; If the operands of the phi are sheduled for processing before
+; foo1 is processed, the invariant of reassociate are not preserved
+ %unused = phi float [%in1, %foo1], [undef, %wrapper_entry]
+ br label %foo1
+
+foo1:
+ %_0 = fmul fast float %in, -3.000000e+00
+ %_1 = fmul fast float %_0, 3.000000e+00
+ %in1 = fadd fast float -3.000000e+00, %_1
+ %in1use = fadd fast float %in1, %in1
+ br label %for.body
+}
diff --git a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
index c2cdffce61e4..7d82ef7e7a2f 100644
--- a/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
+++ b/test/Transforms/Reassociate/reassoc-intermediate-fnegs.ll
@@ -1,8 +1,8 @@
; RUN: opt < %s -reassociate -S | FileCheck %s
; CHECK-LABEL: faddsubAssoc1
-; CHECK: [[TMP1:%tmp.*]] = fmul fast half %a, 0xH4500
-; CHECK: [[TMP2:%tmp.*]] = fmul fast half %b, 0xH4500
-; CHECK: fsub fast half [[TMP2]], [[TMP1]]
+; CHECK: [[TMP1:%.*]] = fsub fast half 0xH8000, %a
+; CHECK: [[TMP2:%.*]] = fadd fast half %b, [[TMP1]]
+; CHECK: fmul fast half [[TMP2]], 0xH4500
; CHECK: ret
; Input is A op (B op C)
define half @faddsubAssoc1(half %a, half %b) {
diff --git a/test/Transforms/Reassociate/xor_reassoc.ll b/test/Transforms/Reassociate/xor_reassoc.ll
index 0bed6f358808..a22689805fb5 100644
--- a/test/Transforms/Reassociate/xor_reassoc.ll
+++ b/test/Transforms/Reassociate/xor_reassoc.ll
@@ -88,8 +88,8 @@ define i32 @xor_special2(i32 %x, i32 %y) {
%xor1 = xor i32 %xor, %and
ret i32 %xor1
; CHECK-LABEL: @xor_special2(
-; CHECK: %xor = xor i32 %x, 123
-; CHECK: %xor1 = xor i32 %xor, %y
+; CHECK: %xor = xor i32 %y, 123
+; CHECK: %xor1 = xor i32 %xor, %x
; CHECK: ret i32 %xor1
}
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
index 3fd7fd9282f1..54e9f41c99be 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-1.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %merged_value base %merged_value.base
@@ -8,21 +8,18 @@ define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)*
entry:
br i1 %runtime_condition, label %here, label %there
-here:
+here: ; preds = %entry
%x = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
br label %merge
-there:
+there: ; preds = %entry
%y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
br label %merge
-merge:
+merge: ; preds = %there, %here
; CHECK-LABEL: merge:
; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %merged_value
}
-
-declare void @foo()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
index 19f1423eea03..04795741ead6 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-10.ll
@@ -1,38 +1,35 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: Base Pairs (w/o Relocation):
-; CHECK-DAG: derived %next base %next.base
-; CHECK-DAG: derived %next_x base %base_obj_x
-; CHECK-DAG: derived %next_y base %base_obj_y
-declare i1 @runtime_value()
+declare i1 @runtime_value() "gc-leaf-function"
+
declare void @do_safepoint()
define void @select_of_phi(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y) gc "statepoint-example" {
entry:
br label %loop
-loop:
- %current_x = phi i64 addrspace(1)* [ %base_obj_x , %entry ], [ %next_x, %merge ]
- %current_y = phi i64 addrspace(1)* [ %base_obj_y , %entry ], [ %next_y, %merge ]
- %current = phi i64 addrspace(1)* [ null , %entry ], [ %next , %merge ]
-
+loop: ; preds = %merge, %entry
+ %current_x = phi i64 addrspace(1)* [ %base_obj_x, %entry ], [ %next_x, %merge ]
+ %current_y = phi i64 addrspace(1)* [ %base_obj_y, %entry ], [ %next_y, %merge ]
+ %current = phi i64 addrspace(1)* [ null, %entry ], [ %next, %merge ]
%condition = call i1 @runtime_value()
%next_x = getelementptr i64, i64 addrspace(1)* %current_x, i32 1
%next_y = getelementptr i64, i64 addrspace(1)* %current_y, i32 1
-
br i1 %condition, label %true, label %false
-true:
+true: ; preds = %loop
br label %merge
-false:
+false: ; preds = %loop
br label %merge
-merge:
+merge: ; preds = %false, %true
%next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop
}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
+; CHECK: Base Pairs (w/o Relocation):
+; CHECK-DAG: derived %next base %next.base
+; CHECK-DAG: derived %next_x base %base_obj_x
+; CHECK-DAG: derived %next_y base %base_obj_y
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
index a28c925f7828..5149a2918152 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-11.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %next base %base_obj
@@ -19,8 +19,6 @@ loop: ; preds = %loop, %entry
; CHECK-DAG: [ %next.relocated.casted, %loop ]
%current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
%next = getelementptr i64, i64 addrspace(1)* %current, i32 1
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop
}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
index 5ebff642347d..4706ce70df18 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-12.ll
@@ -1,6 +1,6 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %select base @global
+; CHECK: derived %select base null
@global = external addrspace(1) global i8
@@ -8,7 +8,7 @@ define i8 @test(i1 %cond) gc "statepoint-example" {
%derived1 = getelementptr i8, i8 addrspace(1)* @global, i64 1
%derived2 = getelementptr i8, i8 addrspace(1)* @global, i64 2
%select = select i1 %cond, i8 addrspace(1)* %derived1, i8 addrspace(1)* %derived2
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+ call void @extern()
; CHECK-NOT: relocate
; CHECK: %load = load i8, i8 addrspace(1)* %select
%load = load i8, i8 addrspace(1)* %select
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
index 8e43e638f989..d01c771349e1 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-13.ll
@@ -1,12 +1,12 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-; CHECK: derived %derived base @global
+; CHECK: derived %derived base null
@global = external addrspace(1) global i8
define i8 @test(i64 %offset) gc "statepoint-example" {
%derived = getelementptr i8, i8 addrspace(1)* @global, i64 %offset
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @extern, i32 0, i32 0, i32 0, i32 0)
+ call void @extern()
; CHECK-NOT: relocate
; CHECK-NOT: remat
; CHECK: %load = load i8, i8 addrspace(1)* %derived
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
index 802ce5d79a33..f7676d272f58 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-2.ll
@@ -1,21 +1,19 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %merged_value base %base_obj
-
define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj, i1 %runtime_condition) gc "statepoint-example" {
entry:
br i1 %runtime_condition, label %merge, label %there
-there:
+there: ; preds = %entry
%derived_obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
br label %merge
-merge:
+merge: ; preds = %there, %entry
%merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %merged_value
}
declare void @foo()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
index e0035d353887..6f54f8929869 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-3.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %next.i64 base %base_obj
@@ -7,14 +7,13 @@ entry:
%obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
br label %loop
-loop:
+loop: ; preds = %loop, %entry
%current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next.i64, %loop ]
%current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
%next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
%next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop
}
declare void @do_safepoint()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
index 4e0bb14cb453..5694cfd5ecb0 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-4.ll
@@ -1,52 +1,44 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %obj_to_consume base %obj_to_consume
+; CHECK: derived %obj_to_consume base %obj_to_consume.base
declare void @foo()
+
declare i64 addrspace(1)* @generate_obj()
+
declare void @consume_obj(i64 addrspace(1)*)
define void @test(i32 %condition) gc "statepoint-example" {
entry:
br label %loop
-loop:
+loop: ; preds = %merge.split, %entry
; CHECK: loop:
-; CHECK: %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
-; CHECK-NEXT: %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result
- %safepoint_token1 = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 0, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- %obj2 = call i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token %safepoint_token1)
+; CHECK: [[TOKEN_0:%[^ ]+]] = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
+; CHECK-NEXT: [[RESULT_0:%[^ ]+]] = call i64 addrspace(1)* @llvm.experimental.gc.result
+ %0 = call i64 addrspace(1)* @generate_obj() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
switch i32 %condition, label %dest_a [
i32 0, label %dest_b
i32 1, label %dest_c
]
-dest_a:
+dest_a: ; preds = %loop
br label %merge
-dest_b:
+dest_b: ; preds = %loop
br label %merge
-dest_c:
+dest_c: ; preds = %loop
br label %merge
-merge:
+merge: ; preds = %dest_c, %dest_b, %dest_a
; CHECK: merge:
-; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
-
- %obj_to_consume = phi i64 addrspace(1)* [ %obj2, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
- %safepoint_token3 = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @consume_obj, i32 1, i32 0, i64 addrspace(1)* %obj_to_consume, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ [[RESULT_0]], %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+ %obj_to_consume = phi i64 addrspace(1)* [ %0, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
+ call void @consume_obj(i64 addrspace(1)* %obj_to_consume) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %merge.split
merge.split: ; preds = %merge
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop
}
-
-
-; Function Attrs: nounwind
-declare i64 addrspace(1)* @llvm.experimental.gc.result.p1i64(token) #0
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare token @llvm.experimental.gc.statepoint.p0f_p1i64f(i64, i32, i64 addrspace(1)* ()*, i32, i32, ...)
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
index c5acd2962f9e..c1e3a368de00 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-5.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %merged_value base %merged_value.base
@@ -8,23 +8,21 @@ define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)*
entry:
br i1 %runtime_condition, label %here, label %there
-here:
+here: ; preds = %entry
br label %bump
-bump:
+bump: ; preds = %here
br label %merge
-there:
+there: ; preds = %entry
%y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
br label %merge
-merge:
+merge: ; preds = %there, %bump
; CHECK: merge:
; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %merged_value
}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
index 95a42846a2fe..5db6d7ad6aed 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-6.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %merged_value base %merged_value.base
@@ -8,34 +8,30 @@ define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)*
entry:
br i1 %runtime_condition_x, label %here, label %there
-here:
- br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+here: ; preds = %entry
+ br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
-bump_here_a:
+bump_here_a: ; preds = %here
%x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
br label %merge_here
-bump_here_b:
+bump_here_b: ; preds = %here
%x_b = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 2
br label %merge_here
-
-merge_here:
- %x = phi i64 addrspace(1)* [ %x_a , %bump_here_a ], [ %x_b , %bump_here_b ]
+merge_here: ; preds = %bump_here_b, %bump_here_a
+ %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
br label %merge
-there:
+there: ; preds = %entry
%y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
br label %merge
-merge:
+merge: ; preds = %there, %merge_here
; CHECK: merge:
; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %merged_value
}
-
-declare void @do_safepoint()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
index 49cf20eab191..930a8380df80 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-7.ll
@@ -1,41 +1,38 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %merged_value base %merged_value.base
declare void @site_for_call_safpeoint()
-define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x,
- i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x,
- i1 %runtime_condition_y) gc "statepoint-example" {
+define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
entry:
br i1 %runtime_condition_x, label %here, label %there
-here:
- br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
+here: ; preds = %entry
+ br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
-bump_here_a:
+bump_here_a: ; preds = %here
%x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
br label %merge_here
-bump_here_b:
+bump_here_b: ; preds = %here
%x_b = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 2
br label %merge_here
-
-merge_here:
+merge_here: ; preds = %bump_here_b, %bump_here_a
; CHECK: merge_here:
; CHECK-DAG: %x.base
; CHECK-DAG: phi i64 addrspace(1)*
; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
- %x = phi i64 addrspace(1)* [ %x_a , %bump_here_a ], [ %x_b , %bump_here_b ]
+ %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
br label %merge
-there:
+there: ; preds = %entry
%y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
br label %merge
-merge:
+merge: ; preds = %there, %merge_here
; CHECK: merge:
; CHECK-DAG: %merged_value.base
; CHECK-DAG: phi i64 addrspace(1)*
@@ -43,10 +40,6 @@ merge:
; CHECK-DAG: [ %base_obj_y, %there ]
; CHECK: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
%merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
-
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @site_for_call_safpeoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %merged_value
}
-
-declare void @do_safepoint()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
index e5ef42dda24b..2f7fcd9974be 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-8.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %next_element_ptr base %array_obj
@@ -10,29 +10,28 @@ entry:
%array_elems = bitcast i32 addrspace(1)* %array_len_pointer.i32 to i64 addrspace(1)* addrspace(1)*
br label %loop_check
-loop_check:
+loop_check: ; preds = %loop_back, %entry
%index = phi i32 [ 0, %entry ], [ %next_index, %loop_back ]
%current_element_ptr = phi i64 addrspace(1)* addrspace(1)* [ %array_elems, %entry ], [ %next_element_ptr, %loop_back ]
%index_lt = icmp ult i32 %index, %array_len
br i1 %index_lt, label %check_for_null, label %not_found
-check_for_null:
+check_for_null: ; preds = %loop_check
%current_element = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr
%is_null = icmp eq i64 addrspace(1)* %current_element, null
br i1 %is_null, label %found, label %loop_back
-loop_back:
+loop_back: ; preds = %check_for_null
%next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
%next_index = add i32 %index, 1
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop_check
-not_found:
+not_found: ; preds = %loop_check
ret i32 -1
-found:
+found: ; preds = %check_for_null
ret i32 %index
}
declare void @do_safepoint()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
index 946d89a08e27..bf49f69515cf 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers-9.ll
@@ -1,21 +1,20 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-print-base-pointers -S 2>&1 | FileCheck %s
; CHECK: derived %next base %base_obj
-declare i1 @runtime_value()
+declare i1 @runtime_value() "gc-leaf-function"
define void @maybe_GEP(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
entry:
br label %loop
-loop:
+loop: ; preds = %loop, %entry
%current = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %next, %loop ]
%condition = call i1 @runtime_value()
%maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
%next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop
}
declare void @do_safepoint()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
diff --git a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
index cd0473a67678..e65897e7a899 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-pointers.ll
@@ -1,22 +1,24 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
-
-declare i64 addrspace(1)* @generate_obj()
-declare void @use_obj(i64 addrspace(1)*)
+; RUN: opt < %s -rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
; The rewriting needs to make %obj loop variant by inserting a phi
; of the original value and it's relocation.
+
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
+
+declare void @use_obj(i64 addrspace(1)*) "gc-leaf-function"
+
define void @def_use_safepoint() gc "statepoint-example" {
; CHECK-LABEL: def_use_safepoint
+; CHECK: phi i64 addrspace(1)*
+; CHECK-DAG: [ %obj.relocated.casted, %loop ]
+; CHECK-DAG: [ %obj, %entry ]
entry:
%obj = call i64 addrspace(1)* @generate_obj()
br label %loop
-loop:
-; CHECK: phi i64 addrspace(1)*
-; CHECK-DAG: [ %obj.relocated.casted, %loop ]
-; CHECK-DAG: [ %obj, %entry ]
+loop: ; preds = %loop, %entry
call void @use_obj(i64 addrspace(1)* %obj)
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop
}
@@ -26,39 +28,39 @@ declare void @parse_point(i64 addrspace(1)*)
define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i32 %unknown) gc "statepoint-example" {
; CHECK-LABEL: test1
- entry:
+entry:
br i1 undef, label %left, label %right
- left:
- %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+left: ; preds = %entry
; CHECK: left:
; CHECK-NEXT: %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
; CHECK-NEXT: [[CAST_L:%.*]] = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
-
; Our safepoint placement pass calls removeUnreachableBlocks, which does a bunch
; of simplifications to branch instructions. This bug is visible only when
; there are multiple branches into the same block from the same predecessor, and
; the following ceremony is to make that artefact survive a call to
; removeUnreachableBlocks. As an example, "br i1 undef, label %merge, label %merge"
; will get simplified to "br label %merge" by removeUnreachableBlocks.
- switch i32 %unknown, label %right [ i32 0, label %merge
- i32 1, label %merge
- i32 5, label %merge
- i32 3, label %right ]
-
- right:
- %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
- br label %merge
+ %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
+ switch i32 %unknown, label %right [
+ i32 0, label %merge
+ i32 1, label %merge
+ i32 5, label %merge
+ i32 3, label %right
+ ]
+
+right: ; preds = %left, %left, %entry
; CHECK: right:
; CHECK-NEXT: %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
; CHECK-NEXT: [[CAST_R:%.*]] = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+ %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
+ br label %merge
- merge:
+merge: ; preds = %right, %left, %left, %left
; CHECK: merge:
; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
- %value = phi i64 addrspace(1)* [ %a.cast, %left], [ %a.cast, %left], [ %a.cast, %left], [ %b.cast, %right]
- %safepoint_token = call token (i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64 0, i32 0, void (i64 addrspace(1)*)* @parse_point, i32 1, i32 0, i64 addrspace(1)* %value, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0)
-
+ %value = phi i64 addrspace(1)* [ %a.cast, %left ], [ %a.cast, %left ], [ %a.cast, %left ], [ %b.cast, %right ]
+ call void @parse_point(i64 addrspace(1)* %value) [ "deopt"(i32 0, i32 0, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %value
}
@@ -66,86 +68,87 @@ define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1
;; base defining value with inherent conflicts, we end up with a *single*
;; base phi/select per such node. This is testing an optimization, not a
;; fundemental correctness criteria
-define void @test2(i1 %cnd, i64 addrspace(1)* %base_obj, i64 addrspace(1)* %base_arg2) gc "statepoint-example" {
+define void @test2(i1 %cnd, i64 addrspace(1)* %base_obj, i64 addrspace(1)* %base_arg2) gc "statepoint-example" {
; CHECK-LABEL: @test2
entry:
%obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
br label %loop
-
-loop: ; preds = %loop, %entry
; CHECK-LABEL: loop
; CHECK: %current.base = phi i64 addrspace(1)*
; CHECK-DAG: [ %base_obj, %entry ]
+
; Given the two selects are equivelent, so are their base phis - ideally,
; we'd have commoned these, but that's a missed optimization, not correctness.
; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
; CHECK-NOT: extra.base
+; CHECK: next.base = select
; CHECK: next = select
; CHECK: extra2.base = select
; CHECK: extra2 = select
; CHECK: statepoint
;; Both 'next' and 'extra2' are live across the backedge safepoint...
+
+loop: ; preds = %loop, %entry
%current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
%extra = phi i64 addrspace(1)* [ %obj, %entry ], [ %extra2, %loop ]
%nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
%next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
%extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %loop
}
-define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj,
- i64 addrspace(1)* %obj2)
- gc "statepoint-example" {
+define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
; CHECK-LABEL: @test3
entry:
br i1 %cnd, label %merge, label %taken
-taken:
+
+taken: ; preds = %entry
br label %merge
-merge:
+
+merge: ; preds = %taken, %entry
; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
; CHECK-NEXT: gc.statepoint
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %bdv
}
-define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj,
- i64 addrspace(1)* %obj2)
- gc "statepoint-example" {
+define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
; CHECK-LABEL: @test4
entry:
br i1 %cnd, label %merge, label %taken
-taken:
+
+taken: ; preds = %entry
br label %merge
-merge:
+
+merge: ; preds = %taken, %entry
; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: phi
; CHECK-NEXT: gc.statepoint
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %bdv
}
-define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj,
- i64 addrspace(1)* %obj2)
- gc "statepoint-example" {
+define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
; CHECK-LABEL: @test5
entry:
br label %merge
-merge:
+
+merge: ; preds = %merge, %entry
; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
+; CHECK-NEXT: phi
+; CHECK-NEXT: phi
; CHECK-NEXT: br i1
%bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
br i1 %cnd, label %merge, label %next
-next:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+
+next: ; preds = %merge
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %bdv
}
-
declare void @foo()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidp1i64f(i64, i32, void (i64 addrspace(1)*)*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
index 6084efeb0509..39a78d725977 100644
--- a/test/Transforms/RewriteStatepointsForGC/base-vector.ll
+++ b/test/Transforms/RewriteStatepointsForGC/base-vector.ll
@@ -1,4 +1,5 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -S | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
+
define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
; CHECK-LABEL: @test
@@ -6,130 +7,131 @@ define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "state
; CHECK: extractelement
; CHECK: statepoint
; CHECK: gc.relocate
-; CHECK-DAG: ; (%base_ee, %base_ee)
-; CHECK: gc.relocate
; CHECK-DAG: ; (%base_ee, %obj)
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%base_ee, %base_ee)
; Note that the second extractelement is actually redundant here. A correct output would
; be to reuse the existing obj as a base since it is actually a base pointer.
entry:
%obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
-
+ call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
-define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2)
- gc "statepoint-example" {
+define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2) gc "statepoint-example" {
; CHECK-LABEL: test2
entry:
br i1 %cnd, label %taken, label %untaken
-taken:
+
+taken: ; preds = %entry
%obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
br label %merge
-untaken:
+
+untaken: ; preds = %entry
%objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
br label %merge
-merge:
- %vec = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
+
+merge: ; preds = %untaken, %taken
+ %vec = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
br i1 %cnd, label %taken2, label %untaken2
-taken2:
+
+taken2: ; preds = %merge
%obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
br label %merge2
-untaken2:
+
+untaken2: ; preds = %merge
%obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
br label %merge2
-merge2:
+
+merge2: ; preds = %untaken2, %taken2
; CHECK-LABEL: merge2:
-; CHECK-NEXT: %obj = phi i64 addrspace(1)*
-; CHECK-NEXT: statepoint
+; CHECK: %obj.base = phi i64 addrspace(1)*
+; CHECK: %obj = phi i64 addrspace(1)*
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj.base, %obj)
; CHECK: gc.relocate
-; CHECK-DAG: ; (%obj, %obj)
- %obj = phi i64 addrspace(1)* [%obj0, %taken2], [%obj1, %untaken2]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+; CHECK-DAG: ; (%obj.base, %obj.base)
+ %obj = phi i64 addrspace(1)* [ %obj0, %taken2 ], [ %obj1, %untaken2 ]
+ call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
-define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr)
- gc "statepoint-example" {
+define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test3
-entry:
- %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
- %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
; CHECK: insertelement
; CHECK: extractelement
; CHECK: statepoint
; CHECK: gc.relocate
-; CHECK-DAG: (%obj, %obj)
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+; CHECK-DAG: (%obj.base, %obj)
+entry:
+ %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
+ %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
+ call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
-define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr)
- gc "statepoint-example" {
+
+define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: test4
+; CHECK: statepoint
+; CHECK: gc.relocate
+; CHECK-DAG: ; (%obj.base, %obj)
+; When we can optimize an extractelement from a known
+; index and avoid introducing new base pointer instructions
entry:
%derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
%veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
%vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
%obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
-; CHECK: statepoint
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%ptr, %obj)
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%ptr, %ptr)
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"() ]
ret i64 addrspace(1)* %obj
}
-declare void @use(i64 addrspace(1)*)
+declare void @use(i64 addrspace(1)*) "gc-leaf-function"
-; When we can optimize an extractelement from a known
-; index and avoid introducing new base pointer instructions
-define void @test5(i1 %cnd, i64 addrspace(1)* %obj)
- gc "statepoint-example" {
+define void @test5(i1 %cnd, i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test5
; CHECK: gc.relocate
-; CHECK-DAG: (%obj, %bdv)
+; CHECK-DAG: (%bdv.base, %bdv)
+; When we fundementally have to duplicate
entry:
%gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
%bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
call void @use(i64 addrspace(1)* %bdv)
ret void
}
-; When we fundementally have to duplicate
-define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx)
- gc "statepoint-example" {
+define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx) gc "statepoint-example" {
; CHECK-LABEL: @test6
; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
-; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
+; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> zeroinitializer, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%bdv.base, %bdv)
+; A more complicated example involving vector and scalar bases.
+; This is derived from a failing test case when we didn't have correct
+; insertelement handling.
entry:
%gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
%bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
call void @use(i64 addrspace(1)* %bdv)
ret void
}
-; A more complicated example involving vector and scalar bases.
-; This is derived from a failing test case when we didn't have correct
-; insertelement handling.
-define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj,
- i64 addrspace(1)* %obj2)
- gc "statepoint-example" {
+define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
; CHECK-LABEL: @test7
entry:
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
br label %merge1
-merge1:
+
+merge1: ; preds = %merge1, %entry
; CHECK-LABEL: merge1:
; CHECK: vec2.base
; CHECK: vec2
@@ -140,28 +142,27 @@ merge1:
%gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
%vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
br i1 %cnd, label %merge1, label %next1
-next1:
+
+next1: ; preds = %merge1
; CHECK-LABEL: next1:
; CHECK: bdv.base =
; CHECK: bdv =
%bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
br label %merge
-merge:
+
+merge: ; preds = %merge, %next1
; CHECK-LABEL: merge:
; CHECK: %objb.base
; CHECK: %objb
; CHECK: gc.statepoint
; CHECK: gc.relocate
; CHECK-DAG: (%objb.base, %objb)
-
%objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
br i1 %cnd, label %merge, label %next
-next:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+
+next: ; preds = %merge
+ call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i64 addrspace(1)* %objb
}
-
declare void @do_safepoint()
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll b/test/Transforms/RewriteStatepointsForGC/basic.ll
index c0dc6940e5db..bb2210c7849a 100644
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basic.ll
+++ b/test/Transforms/RewriteStatepointsForGC/basic.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
+; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
declare void @g()
declare i32 @h()
@@ -63,3 +63,11 @@ define i32 addrspace(1)* @f3(i32 addrspace(1)* %arg) gc "statepoint-example" pe
%lpad = landingpad token cleanup
resume token undef
}
+
+define i32 addrspace(1)* @f4(i32 addrspace(1)* %arg) gc "statepoint-example" {
+; CHECK-LABEL: @f4(
+ entry:
+; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @g, i32 0, i32 1, i32 2, i32 400, i8 90,
+ call void @g() [ "gc-transition"(i32 400, i8 90) ]
+ ret i32 addrspace(1)* %arg
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/basics.ll b/test/Transforms/RewriteStatepointsForGC/basics.ll
index 48f464356865..967a804f7a18 100644
--- a/test/Transforms/RewriteStatepointsForGC/basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/basics.ll
@@ -1,88 +1,88 @@
; This is a collection of really basic tests for gc.statepoint rewriting.
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S | FileCheck %s
+
+; Trivial relocation over a single call
declare void @foo()
-; Trivial relocation over a single call
define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test1
+entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+; Two safepoints in a row (i.e. consistent liveness)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %obj
}
-; Two safepoints in a row (i.e. consistent liveness)
define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test2
+entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
-entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+; A simple derived pointer
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %obj
}
-; A simple derived pointer
define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
+entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: getelementptr
; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %derived.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: %derived.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: load i8, i8 addrspace(1)* %derived.relocated
; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
-entry:
+; Tests to make sure we visit both the taken and untaken predeccessor
+; of merge. This was a bug in the dataflow liveness at one point.
%derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
-
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
%a = load i8, i8 addrspace(1)* %derived
%b = load i8, i8 addrspace(1)* %obj
%c = sub i8 %a, %b
ret i8 %c
}
-; Tests to make sure we visit both the taken and untaken predeccessor
-; of merge. This was a bug in the dataflow liveness at one point.
define i8 addrspace(1)* @test4(i1 %cmp, i8 addrspace(1)* %obj) gc "statepoint-example" {
entry:
br i1 %cmp, label %taken, label %untaken
-taken:
+taken: ; preds = %entry
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %merge
-untaken:
+untaken: ; preds = %entry
; CHECK-LABEL: untaken:
; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %merge
-merge:
+merge: ; preds = %untaken, %taken
; CHECK-LABEL: merge:
-; CHECK-NEXT: %.0 = phi i8 addrspace(1)* [ %obj.relocated, %taken ], [ %obj.relocated1, %untaken ]
+; CHECK-NEXT: %.0 = phi i8 addrspace(1)* [ %obj.relocated, %taken ], [ %obj.relocated2, %untaken ]
; CHECK-NEXT: ret i8 addrspace(1)* %.0
+; When run over a function which doesn't opt in, should do nothing!
ret i8 addrspace(1)* %obj
}
-; When run over a function which doesn't opt in, should do nothing!
define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
; CHECK-LABEL: @test5
+entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: gc.statepoint
; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
-entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
ret i8 addrspace(1)* %obj
}
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...) \ No newline at end of file
+declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/PlaceSafepoints/call_gc_result.ll b/test/Transforms/RewriteStatepointsForGC/call-gc-result.ll
index f2929bfd58ab..6fcd9b5644ad 100644
--- a/test/Transforms/PlaceSafepoints/call_gc_result.ll
+++ b/test/Transforms/RewriteStatepointsForGC/call-gc-result.ll
@@ -1,4 +1,4 @@
-;; RUN: opt %s -place-safepoints -S | FileCheck %s
+;; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
;; This test is to verify that gc_result from a call statepoint
;; can have preceding phis in its parent basic block. Unlike
@@ -21,8 +21,8 @@ branch2:
merge:
;; CHECK: %phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
-;; CHECK-NEXT: %safepoint_token1 = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0)
-;; CHECK-NEXT: %ret2 = call i32 @llvm.experimental.gc.result.i32(token %safepoint_token1)
+;; CHECK-NEXT: [[TOKEN:%[^ ]+]] = call token (i64, i32, i32 ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32f(i64 2882400000, i32 0, i32 ()* @foo, i32 0, i32 0, i32 0, i32 0
+;; CHECK-NEXT: call i32 @llvm.experimental.gc.result.i32(token [[TOKEN]])
%phi = phi i32 [ %a, %branch2 ], [ %b, %branch1 ]
%ret = call i32 @foo()
ret i32 %ret
diff --git a/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
index 8221cd0e0f82..9e8cbaf0260d 100644
--- a/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
+++ b/test/Transforms/RewriteStatepointsForGC/codegen-cond.ll
@@ -1,39 +1,45 @@
; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
; A null test of a single value
+
define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
; CHECK-LABEL: @test
entry:
- %cond = icmp eq i8 addrspace(1)* %p, null
- br i1 %rare, label %safepoint, label %continue, !prof !0
-safepoint:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
- br label %continue
-continue:
+ %cond = icmp eq i8 addrspace(1)* %p, null
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint: ; preds = %entry
+ call void @safepoint() [ "deopt"() ]
+ br label %continue
+
+continue: ; preds = %safepoint, %entry
; CHECK-LABEL: continue:
; CHECK: phi
; CHECK-DAG: [ %p.relocated, %safepoint ]
; CHECK-DAG: [ %p, %entry ]
; CHECK: %cond = icmp
; CHECK: br i1 %cond
- br i1 %cond, label %taken, label %untaken
-taken:
- ret i1 true
-untaken:
- ret i1 false
+; Comparing two pointers
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %continue
+ ret i1 true
+
+untaken: ; preds = %continue
+ ret i1 false
}
-; Comparing two pointers
-define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare)
- gc "statepoint-example" {
+define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
; CHECK-LABEL: @test2
-entry:
- %cond = icmp eq i8 addrspace(1)* %p, %q
- br i1 %rare, label %safepoint, label %continue, !prof !0
-safepoint:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
- br label %continue
-continue:
+entry:
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %rare, label %safepoint, label %continue, !prof !0
+
+safepoint: ; preds = %entry
+ call void @safepoint() [ "deopt"() ]
+ br label %continue
+
+continue: ; preds = %safepoint, %entry
; CHECK-LABEL: continue:
; CHECK: phi
; CHECK-DAG: [ %q.relocated, %safepoint ]
@@ -43,32 +49,33 @@ continue:
; CHECK-DAG: [ %p, %entry ]
; CHECK: %cond = icmp
; CHECK: br i1 %cond
- br i1 %cond, label %taken, label %untaken
-taken:
- ret i1 true
-untaken:
- ret i1 false
-}
-
; Sanity check that nothing bad happens if already last instruction
; before terminator
-define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare)
- gc "statepoint-example" {
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %continue
+ ret i1 true
+
+untaken: ; preds = %continue
+ ret i1 false
+}
+
+define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
; CHECK-LABEL: @test3
-entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @safepoint, i32 0, i32 0, i32 0, i32 0)
; CHECK: gc.statepoint
; CHECK: %cond = icmp
; CHECK: br i1 %cond
- %cond = icmp eq i8 addrspace(1)* %p, %q
- br i1 %cond, label %taken, label %untaken
-taken:
- ret i1 true
-untaken:
- ret i1 false
+entry:
+ call void @safepoint() [ "deopt"() ]
+ %cond = icmp eq i8 addrspace(1)* %p, %q
+ br i1 %cond, label %taken, label %untaken
+
+taken: ; preds = %entry
+ ret i1 true
+
+untaken: ; preds = %entry
+ ret i1 false
}
declare void @safepoint()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-
!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/constants.ll b/test/Transforms/RewriteStatepointsForGC/constants.ll
index 0f600f215718..0a16f38f1369 100644
--- a/test/Transforms/RewriteStatepointsForGC/constants.ll
+++ b/test/Transforms/RewriteStatepointsForGC/constants.ll
@@ -1,58 +1,51 @@
-; RUN: opt -S -rewrite-statepoints-for-gc %s | FileCheck %s
+; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
+
+; constants don't get relocated.
+@G = addrspace(1) global i8 5
declare void @foo()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-; constants don't get relocated.
define i8 @test() gc "statepoint-example" {
; CHECK-LABEL: @test
; CHECK: gc.statepoint
; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
+; Mostly just here to show reasonable code test can come from.
entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
%res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
ret i8 %res
}
-
-; Mostly just here to show reasonable code test can come from.
define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
; CHECK-LABEL: @test2
; CHECK: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: icmp
+; Globals don't move and thus don't get relocated
entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
%cmp = icmp eq i8 addrspace(1)* %p, null
br i1 %cmp, label %taken, label %not_taken
-taken:
+taken: ; preds = %not_taken, %entry
ret i8 0
-not_taken:
+not_taken: ; preds = %entry
%cmp2 = icmp ne i8 addrspace(1)* %p, null
br i1 %cmp2, label %taken, label %dead
-dead:
- ; We see that dead can't be reached, but the optimizer might not. It's
- ; completely legal for it to exploit the fact that if dead executed, %p
- ; would have to equal null. This can produce intermediate states which
- ; look like that of test above, even if arbitrary constant addresses aren't
- ; legal in the source language
+dead: ; preds = %not_taken
%addr = getelementptr i8, i8 addrspace(1)* %p, i32 15
- %res = load i8, i8addrspace(1)* %addr
+ %res = load i8, i8 addrspace(1)* %addr
ret i8 %res
}
-@G = addrspace(1) global i8 5
-
-; Globals don't move and thus don't get relocated
define i8 @test3(i1 %always_true) gc "statepoint-example" {
; CHECK-LABEL: @test3
; CHECK: gc.statepoint
; CHECK-NEXT: load i8, i8 addrspace(1)* @G
entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
%res = load i8, i8 addrspace(1)* @G, align 1
ret i8 %res
}
@@ -67,7 +60,7 @@ entry:
br i1 %is_null, label %split, label %join
split:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo()
%arg_value_addr.i = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 8
%arg_value_addr_casted.i = bitcast i8 addrspace(1)* %arg_value_addr.i to i8 addrspace(1)* addrspace(1)*
br label %join
@@ -87,7 +80,7 @@ use:
; CHECK-LABEL: use:
; CHECK: gc.statepoint
; CHECK: gc.relocate
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo()
%res = load i8 addrspace(1)*, i8 addrspace(1)* addrspace(1)* %addr2, align 1
ret i8 addrspace(1)* %res
}
@@ -98,7 +91,173 @@ define i8 addrspace(1)* @test5(i1 %always_true) gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK-NEXT: %res = extractelement <2 x i8 addrspace(1)*> <i8 addrspace(1)* @G, i8 addrspace(1)* @G>, i32 0
entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo()
%res = extractelement <2 x i8 addrspace(1)*> <i8 addrspace(1)* @G, i8 addrspace(1)* @G>, i32 0
ret i8 addrspace(1)* %res
}
+
+define i8 addrspace(1)* @test6(i64 %arg) gc "statepoint-example" {
+entry:
+ ; Don't fail any assertions and don't record null as a live value
+ ; CHECK-LABEL: test6
+ ; CHECK: gc.statepoint
+ ; CHECK-NOT: call {{.*}}gc.relocate
+ %load_addr = getelementptr i8, i8 addrspace(1)* null, i64 %arg
+ call void @foo() [ "deopt"() ]
+ ret i8 addrspace(1)* %load_addr
+}
+
+define i8 addrspace(1)* @test7(i64 %arg) gc "statepoint-example" {
+entry:
+ ; Same as test7 but use regular constant instead of a null
+ ; CHECK-LABEL: test7
+ ; CHECK: gc.statepoint
+ ; CHECK-NOT: call {{.*}}gc.relocate
+ %load_addr = getelementptr i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*), i64 %arg
+ call void @foo() [ "deopt"() ]
+ ret i8 addrspace(1)* %load_addr
+}
+
+define i8 @test8(i8 addrspace(1)* %p) gc "statepoint-example" {
+; Checks that base( phi(gep null, oop) ) = phi(null, base(oop)) and that we
+; correctly relocate this value
+; CHECK-LABEL: @test8
+entry:
+ %is_null = icmp eq i8 addrspace(1)* %p, null
+ br i1 %is_null, label %null.crit-edge, label %not-null
+
+not-null:
+ %load_addr = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 8
+ br label %join
+
+null.crit-edge:
+ %load_addr.const = getelementptr inbounds i8, i8 addrspace(1)* null, i64 8
+ br label %join
+
+join:
+ %addr = phi i8 addrspace(1)* [ %load_addr, %not-null ], [%load_addr.const, %null.crit-edge]
+ ; CHECK: %addr.base = phi i8 addrspace(1)*
+ ; CHECK-DAG: [ %p, %not-null ]
+ ; CHECK-DAG: [ null, %null.crit-edge ]
+ ; CHECK: gc.statepoint
+ call void @foo() [ "deopt"() ]
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%addr.base, %addr.base)
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%addr.base, %addr)
+ br i1 %is_null, label %early-exit, label %use
+
+early-exit:
+ ret i8 0
+
+use:
+ %res = load i8, i8 addrspace(1)* %addr, align 1
+ ret i8 %res
+}
+
+define i8 @test9(i8 addrspace(1)* %p) gc "statepoint-example" {
+; Checks that base( phi(inttoptr, oop) ) = phi(null, base(oop)) and that we
+; correctly relocate this value
+; CHECK-LABEL: @test9
+entry:
+ %is_null = icmp eq i8 addrspace(1)* %p, null
+ br i1 %is_null, label %null.crit-edge, label %not-null
+
+not-null:
+ %load_addr = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 8
+ br label %join
+
+null.crit-edge:
+ br label %join
+
+join:
+ %addr = phi i8 addrspace(1)* [ %load_addr, %not-null ], [inttoptr (i64 8 to i8 addrspace(1)*), %null.crit-edge]
+ ; CHECK: %addr.base = phi i8 addrspace(1)*
+ ; CHECK-DAG: [ %p, %not-null ]
+ ; CHECK-DAG: [ null, %null.crit-edge ]
+ ; CHECK: gc.statepoint
+ call void @foo() [ "deopt"() ]
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%addr.base, %addr.base)
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%addr.base, %addr)
+ br i1 %is_null, label %early-exit, label %use
+
+early-exit:
+ ret i8 0
+
+use:
+ %res = load i8, i8 addrspace(1)* %addr, align 1
+ ret i8 %res
+}
+
+define i8 @test10(i8 addrspace(1)* %p) gc "statepoint-example" {
+; Checks that base( phi(const gep, oop) ) = phi(null, base(oop)) and that we
+; correctly relocate this value
+; CHECK-LABEL: @test10
+entry:
+ %is_null = icmp eq i8 addrspace(1)* %p, null
+ br i1 %is_null, label %null.crit-edge, label %not-null
+
+not-null:
+ %load_addr = getelementptr inbounds i8, i8 addrspace(1)* %p, i64 8
+ br label %join
+
+null.crit-edge:
+ br label %join
+
+join:
+ %addr = phi i8 addrspace(1)* [ %load_addr, %not-null ], [getelementptr (i8, i8 addrspace(1)* null, i64 8), %null.crit-edge]
+ ; CHECK: %addr.base = phi i8 addrspace(1)*
+ ; CHECK-DAG: [ %p, %not-null ]
+ ; CHECK-DAG: [ null, %null.crit-edge ]
+ ; CHECK: gc.statepoint
+ call void @foo() [ "deopt"() ]
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%addr.base, %addr.base)
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%addr.base, %addr)
+ br i1 %is_null, label %early-exit, label %use
+
+early-exit:
+ ret i8 0
+
+use:
+ %res = load i8, i8 addrspace(1)* %addr, align 1
+ ret i8 %res
+}
+
+define i32 addrspace(1)* @test11(i1 %c) gc "statepoint-example" {
+; CHECK-LABEL: @test11
+; Checks that base( select(const1, const2) ) == null and that we don't record
+; such value in the oop map
+entry:
+ %val = select i1 %c, i32 addrspace(1)* inttoptr (i64 8 to i32 addrspace(1)*), i32 addrspace(1)* inttoptr (i64 15 to i32 addrspace(1)*)
+ ; CHECK: gc.statepoint
+ ; CHECK-NOT: call {{.*}}gc.relocate
+ call void @foo() [ "deopt"() ]
+ ret i32 addrspace(1)* %val
+}
+
+
+define <2 x i32 addrspace(1)*> @test12(i1 %c) gc "statepoint-example" {
+; CHECK-LABEL: @test12
+; Same as test11 but with vectors
+entry:
+ %val = select i1 %c, <2 x i32 addrspace(1)*> <i32 addrspace(1)* inttoptr (i64 5 to i32 addrspace(1)*),
+ i32 addrspace(1)* inttoptr (i64 15 to i32 addrspace(1)*)>,
+ <2 x i32 addrspace(1)*> <i32 addrspace(1)* inttoptr (i64 30 to i32 addrspace(1)*),
+ i32 addrspace(1)* inttoptr (i64 60 to i32 addrspace(1)*)>
+ ; CHECK: gc.statepoint
+ ; CHECK-NOT: call {{.*}}gc.relocate
+ call void @foo() [ "deopt"() ]
+ ret <2 x i32 addrspace(1)*> %val
+}
+
+define <2 x i32 addrspace(1)*> @test13(i1 %c, <2 x i32 addrspace(1)*> %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test13
+; Similar to test8, test9 and test10 but with vectors
+entry:
+ %val = select i1 %c, <2 x i32 addrspace(1)*> %ptr,
+ <2 x i32 addrspace(1)*> <i32 addrspace(1)* inttoptr (i64 30 to i32 addrspace(1)*), i32 addrspace(1)* inttoptr (i64 60 to i32 addrspace(1)*)>
+ ; CHECK: %val.base = select i1 %c, <2 x i32 addrspace(1)*> %ptr, <2 x i32 addrspace(1)*> zeroinitializer, !is_base_value !0
+ ; CHECK: gc.statepoint
+ call void @foo() [ "deopt"() ]
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%val.base, %val.base)
+ ; CHECK-DAG: call {{.*}}gc.relocate{{.*}}(%val.base, %val)
+ ret <2 x i32 addrspace(1)*> %val
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
deleted file mode 100644
index 6af2a3012b5c..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-1.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %merged_value base %merged_value.base
-
-declare void @site_for_call_safpeoint()
-
-define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
-entry:
- br i1 %runtime_condition, label %here, label %there
-
-here: ; preds = %entry
- %x = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
- br label %merge
-
-there: ; preds = %entry
- %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
- br label %merge
-
-merge: ; preds = %there, %here
-; CHECK-LABEL: merge:
-; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %here ], [ %base_obj_y, %there ]
- %merged_value = phi i64 addrspace(1)* [ %x, %here ], [ %y, %there ]
- call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %merged_value
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
deleted file mode 100644
index 8c486d6b3896..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-10.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-
-declare i1 @runtime_value() "gc-leaf-function"
-
-declare void @do_safepoint()
-
-define void @select_of_phi(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y) gc "statepoint-example" {
-entry:
- br label %loop
-
-loop: ; preds = %merge, %entry
- %current_x = phi i64 addrspace(1)* [ %base_obj_x, %entry ], [ %next_x, %merge ]
- %current_y = phi i64 addrspace(1)* [ %base_obj_y, %entry ], [ %next_y, %merge ]
- %current = phi i64 addrspace(1)* [ null, %entry ], [ %next, %merge ]
- %condition = call i1 @runtime_value()
- %next_x = getelementptr i64, i64 addrspace(1)* %current_x, i32 1
- %next_y = getelementptr i64, i64 addrspace(1)* %current_y, i32 1
- br i1 %condition, label %true, label %false
-
-true: ; preds = %loop
- br label %merge
-
-false: ; preds = %loop
- br label %merge
-
-merge: ; preds = %false, %true
- %next = phi i64 addrspace(1)* [ %next_x, %true ], [ %next_y, %false ]
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop
-}
-; CHECK: Base Pairs (w/o Relocation):
-; CHECK-DAG: derived %next base %next.base
-; CHECK-DAG: derived %next_x base %base_obj_x
-; CHECK-DAG: derived %next_y base %base_obj_y
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
deleted file mode 100644
index ae793b2cb630..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-11.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %next base %base_obj
-
-declare void @do_safepoint()
-
-define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
-entry:
- %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
- br label %loop
-
-loop: ; preds = %loop, %entry
-; CHECK-LABEL: loop:
-; CHECK: phi i64 addrspace(1)*
-; CHECK-DAG: [ %base_obj.relocated.casted, %loop ]
-; CHECK-DAG: [ %base_obj, %entry ]
-; CHECK: %current = phi i64 addrspace(1)*
-; CHECK-DAG: [ %obj, %entry ]
-; CHECK-DAG: [ %next.relocated.casted, %loop ]
- %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
- %next = getelementptr i64, i64 addrspace(1)* %current, i32 1
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
deleted file mode 100644
index 2b9485388f80..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-2.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %merged_value base %base_obj
-
-define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj, i1 %runtime_condition) gc "statepoint-example" {
-entry:
- br i1 %runtime_condition, label %merge, label %there
-
-there: ; preds = %entry
- %derived_obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
- br label %merge
-
-merge: ; preds = %there, %entry
- %merged_value = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %derived_obj, %there ]
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %merged_value
-}
-
-declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
deleted file mode 100644
index 71bb309d1301..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-3.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %next.i64 base %base_obj
-
-define void @test(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
-entry:
- %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
- br label %loop
-
-loop: ; preds = %loop, %entry
- %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next.i64, %loop ]
- %current.i32 = bitcast i64 addrspace(1)* %current to i32 addrspace(1)*
- %next.i32 = getelementptr i32, i32 addrspace(1)* %current.i32, i32 1
- %next.i64 = bitcast i32 addrspace(1)* %next.i32 to i64 addrspace(1)*
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop
-}
-
-declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
deleted file mode 100644
index 3fcbf26a6fc0..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-4.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %obj_to_consume base %obj_to_consume
-
-declare void @foo()
-
-declare i64 addrspace(1)* @generate_obj()
-
-declare void @consume_obj(i64 addrspace(1)*)
-
-define void @test(i32 %condition) gc "statepoint-example" {
-entry:
- br label %loop
-
-loop: ; preds = %merge.split, %entry
-; CHECK: loop:
-; CHECK: [[TOKEN_0:%[^ ]+]] = call token (i64, i32, i64 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* ()* @generate_obj, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i3
-; CHECK-NEXT: [[RESULT_0:%[^ ]+]] = call i64 addrspace(1)* @llvm.experimental.gc.result
- %0 = call i64 addrspace(1)* @generate_obj() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- switch i32 %condition, label %dest_a [
- i32 0, label %dest_b
- i32 1, label %dest_c
- ]
-
-dest_a: ; preds = %loop
- br label %merge
-
-dest_b: ; preds = %loop
- br label %merge
-
-dest_c: ; preds = %loop
- br label %merge
-
-merge: ; preds = %dest_c, %dest_b, %dest_a
-; CHECK: merge:
-; CHECK: %obj_to_consume = phi i64 addrspace(1)* [ [[RESULT_0]], %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
- %obj_to_consume = phi i64 addrspace(1)* [ %0, %dest_a ], [ null, %dest_b ], [ null, %dest_c ]
- call void @consume_obj(i64 addrspace(1)* %obj_to_consume) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %merge.split
-
-merge.split: ; preds = %merge
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
deleted file mode 100644
index 4d43d7f7307c..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-5.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %merged_value base %merged_value.base
-
-declare void @foo()
-
-define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition) gc "statepoint-example" {
-entry:
- br i1 %runtime_condition, label %here, label %there
-
-here: ; preds = %entry
- br label %bump
-
-bump: ; preds = %here
- br label %merge
-
-there: ; preds = %entry
- %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
- br label %merge
-
-merge: ; preds = %there, %bump
-; CHECK: merge:
-; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %base_obj_y, %there ]
-; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
- %merged_value = phi i64 addrspace(1)* [ %base_obj_x, %bump ], [ %y, %there ]
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %merged_value
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
deleted file mode 100644
index 2d555d179c29..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-6.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %merged_value base %merged_value.base
-
-declare void @site_for_call_safpeoint()
-
-define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
-entry:
- br i1 %runtime_condition_x, label %here, label %there
-
-here: ; preds = %entry
- br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
-
-bump_here_a: ; preds = %here
- %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
- br label %merge_here
-
-bump_here_b: ; preds = %here
- %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 2
- br label %merge_here
-
-merge_here: ; preds = %bump_here_b, %bump_here_a
- %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
- br label %merge
-
-there: ; preds = %entry
- %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
- br label %merge
-
-merge: ; preds = %there, %merge_here
-; CHECK: merge:
-; CHECK: %merged_value.base = phi i64 addrspace(1)* [ %base_obj_x, %merge_here ], [ %base_obj_y, %there ]
-; CHECK-NEXT: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %merged_value
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
deleted file mode 100644
index e90ef63184ee..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-7.ll
+++ /dev/null
@@ -1,45 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %merged_value base %merged_value.base
-
-declare void @site_for_call_safpeoint()
-
-define i64 addrspace(1)* @test(i64 addrspace(1)* %base_obj_x, i64 addrspace(1)* %base_obj_y, i1 %runtime_condition_x, i1 %runtime_condition_y) gc "statepoint-example" {
-entry:
- br i1 %runtime_condition_x, label %here, label %there
-
-here: ; preds = %entry
- br i1 %runtime_condition_y, label %bump_here_a, label %bump_here_b
-
-bump_here_a: ; preds = %here
- %x_a = getelementptr i64, i64 addrspace(1)* %base_obj_x, i32 1
- br label %merge_here
-
-bump_here_b: ; preds = %here
- %x_b = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 2
- br label %merge_here
-
-merge_here: ; preds = %bump_here_b, %bump_here_a
-; CHECK: merge_here:
-; CHECK-DAG: %x.base
-; CHECK-DAG: phi i64 addrspace(1)*
-; CHECK-DAG: [ %base_obj_x, %bump_here_a ]
-; CHECK-DAG: [ %base_obj_y, %bump_here_b ]
- %x = phi i64 addrspace(1)* [ %x_a, %bump_here_a ], [ %x_b, %bump_here_b ]
- br label %merge
-
-there: ; preds = %entry
- %y = getelementptr i64, i64 addrspace(1)* %base_obj_y, i32 1
- br label %merge
-
-merge: ; preds = %there, %merge_here
-; CHECK: merge:
-; CHECK-DAG: %merged_value.base
-; CHECK-DAG: phi i64 addrspace(1)*
-; CHECK-DAG: %merge_here
-; CHECK-DAG: [ %base_obj_y, %there ]
-; CHECK: %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- %merged_value = phi i64 addrspace(1)* [ %x, %merge_here ], [ %y, %there ]
- call void @site_for_call_safpeoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %merged_value
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
deleted file mode 100644
index 628696ba2c2f..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-8.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %next_element_ptr base %array_obj
-
-define i32 @null_in_array(i64 addrspace(1)* %array_obj) gc "statepoint-example" {
-entry:
- %array_len_pointer.i64 = getelementptr i64, i64 addrspace(1)* %array_obj, i32 1
- %array_len_pointer.i32 = bitcast i64 addrspace(1)* %array_len_pointer.i64 to i32 addrspace(1)*
- %array_len = load i32, i32 addrspace(1)* %array_len_pointer.i32
- %array_elems = bitcast i32 addrspace(1)* %array_len_pointer.i32 to i64 addrspace(1)* addrspace(1)*
- br label %loop_check
-
-loop_check: ; preds = %loop_back, %entry
- %index = phi i32 [ 0, %entry ], [ %next_index, %loop_back ]
- %current_element_ptr = phi i64 addrspace(1)* addrspace(1)* [ %array_elems, %entry ], [ %next_element_ptr, %loop_back ]
- %index_lt = icmp ult i32 %index, %array_len
- br i1 %index_lt, label %check_for_null, label %not_found
-
-check_for_null: ; preds = %loop_check
- %current_element = load i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr
- %is_null = icmp eq i64 addrspace(1)* %current_element, null
- br i1 %is_null, label %found, label %loop_back
-
-loop_back: ; preds = %check_for_null
- %next_element_ptr = getelementptr i64 addrspace(1)*, i64 addrspace(1)* addrspace(1)* %current_element_ptr, i32 1
- %next_index = add i32 %index, 1
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop_check
-
-not_found: ; preds = %loop_check
- ret i32 -1
-
-found: ; preds = %check_for_null
- ret i32 %index
-}
-
-declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
deleted file mode 100644
index a82af3b96892..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers-9.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-print-base-pointers -S 2>&1 | FileCheck %s
-
-; CHECK: derived %next base %base_obj
-
-declare i1 @runtime_value() "gc-leaf-function"
-
-define void @maybe_GEP(i64 addrspace(1)* %base_obj) gc "statepoint-example" {
-entry:
- br label %loop
-
-loop: ; preds = %loop, %entry
- %current = phi i64 addrspace(1)* [ %base_obj, %entry ], [ %next, %loop ]
- %condition = call i1 @runtime_value()
- %maybe_next = getelementptr i64, i64 addrspace(1)* %current, i32 1
- %next = select i1 %condition, i64 addrspace(1)* %maybe_next, i64 addrspace(1)* %current
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop
-}
-
-declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
deleted file mode 100644
index a378d1502add..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-pointers.ll
+++ /dev/null
@@ -1,151 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
-
-; The rewriting needs to make %obj loop variant by inserting a phi
-; of the original value and it's relocation.
-
-declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
-
-declare void @use_obj(i64 addrspace(1)*) "gc-leaf-function"
-
-define void @def_use_safepoint() gc "statepoint-example" {
-; CHECK-LABEL: def_use_safepoint
-; CHECK: phi i64 addrspace(1)*
-; CHECK-DAG: [ %obj.relocated.casted, %loop ]
-; CHECK-DAG: [ %obj, %entry ]
-entry:
- %obj = call i64 addrspace(1)* @generate_obj()
- br label %loop
-
-loop: ; preds = %loop, %entry
- call void @use_obj(i64 addrspace(1)* %obj)
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop
-}
-
-declare void @do_safepoint()
-
-declare void @parse_point(i64 addrspace(1)*)
-
-define i64 addrspace(1)* @test1(i32 %caller, i8 addrspace(1)* %a, i8 addrspace(1)* %b, i32 %unknown) gc "statepoint-example" {
-; CHECK-LABEL: test1
-entry:
- br i1 undef, label %left, label %right
-
-left: ; preds = %entry
-; CHECK: left:
-; CHECK-NEXT: %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
-; CHECK-NEXT: [[CAST_L:%.*]] = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
-; Our safepoint placement pass calls removeUnreachableBlocks, which does a bunch
-; of simplifications to branch instructions. This bug is visible only when
-; there are multiple branches into the same block from the same predecessor, and
-; the following ceremony is to make that artefact survive a call to
-; removeUnreachableBlocks. As an example, "br i1 undef, label %merge, label %merge"
-; will get simplified to "br label %merge" by removeUnreachableBlocks.
- %a.cast = bitcast i8 addrspace(1)* %a to i64 addrspace(1)*
- switch i32 %unknown, label %right [
- i32 0, label %merge
- i32 1, label %merge
- i32 5, label %merge
- i32 3, label %right
- ]
-
-right: ; preds = %left, %left, %entry
-; CHECK: right:
-; CHECK-NEXT: %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
-; CHECK-NEXT: [[CAST_R:%.*]] = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
- %b.cast = bitcast i8 addrspace(1)* %b to i64 addrspace(1)*
- br label %merge
-
-merge: ; preds = %right, %left, %left, %left
-; CHECK: merge:
-; CHECK-NEXT: %value.base = phi i64 addrspace(1)* [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_L]], %left ], [ [[CAST_R]], %right ], !is_base_value !0
- %value = phi i64 addrspace(1)* [ %a.cast, %left ], [ %a.cast, %left ], [ %a.cast, %left ], [ %b.cast, %right ]
- call void @parse_point(i64 addrspace(1)* %value) [ "deopt"(i32 0, i32 0, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %value
-}
-
-;; The purpose of this test is to ensure that when two live values share a
-;; base defining value with inherent conflicts, we end up with a *single*
-;; base phi/select per such node. This is testing an optimization, not a
-;; fundemental correctness criteria
-define void @test2(i1 %cnd, i64 addrspace(1)* %base_obj, i64 addrspace(1)* %base_arg2) gc "statepoint-example" {
-; CHECK-LABEL: @test2
-entry:
- %obj = getelementptr i64, i64 addrspace(1)* %base_obj, i32 1
- br label %loop
-; CHECK-LABEL: loop
-; CHECK: %current.base = phi i64 addrspace(1)*
-; CHECK-DAG: [ %base_obj, %entry ]
-
-; Given the two selects are equivelent, so are their base phis - ideally,
-; we'd have commoned these, but that's a missed optimization, not correctness.
-; CHECK-DAG: [ [[DISCARD:%.*.base.relocated.casted]], %loop ]
-; CHECK-NOT: extra.base
-; CHECK: next = select
-; CHECK: extra2.base = select
-; CHECK: extra2 = select
-; CHECK: statepoint
-;; Both 'next' and 'extra2' are live across the backedge safepoint...
-
-loop: ; preds = %loop, %entry
- %current = phi i64 addrspace(1)* [ %obj, %entry ], [ %next, %loop ]
- %extra = phi i64 addrspace(1)* [ %obj, %entry ], [ %extra2, %loop ]
- %nexta = getelementptr i64, i64 addrspace(1)* %current, i32 1
- %next = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
- %extra2 = select i1 %cnd, i64 addrspace(1)* %nexta, i64 addrspace(1)* %base_arg2
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %loop
-}
-
-define i64 addrspace(1)* @test3(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
-; CHECK-LABEL: @test3
-entry:
- br i1 %cnd, label %merge, label %taken
-
-taken: ; preds = %entry
- br label %merge
-
-merge: ; preds = %taken, %entry
-; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
-; CHECK-NEXT: gc.statepoint
- %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %taken ]
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %bdv
-}
-
-define i64 addrspace(1)* @test4(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
-; CHECK-LABEL: @test4
-entry:
- br i1 %cnd, label %merge, label %taken
-
-taken: ; preds = %entry
- br label %merge
-
-merge: ; preds = %taken, %entry
-; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
-; CHECK-NEXT: gc.statepoint
- %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj, %taken ]
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %bdv
-}
-
-define i64 addrspace(1)* @test5(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
-; CHECK-LABEL: @test5
-entry:
- br label %merge
-
-merge: ; preds = %merge, %entry
-; CHECK-LABEL: merge:
-; CHECK-NEXT: %bdv = phi
-; CHECK-NEXT: br i1
- %bdv = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %merge ]
- br i1 %cnd, label %merge, label %next
-
-next: ; preds = %merge
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %bdv
-}
-
-declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
deleted file mode 100644
index 96b7390b77bc..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/base-vector.ll
+++ /dev/null
@@ -1,167 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
-
-
-define i64 addrspace(1)* @test(<2 x i64 addrspace(1)*> %vec, i32 %idx) gc "statepoint-example" {
-; CHECK-LABEL: @test
-; CHECK: extractelement
-; CHECK: extractelement
-; CHECK: statepoint
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%base_ee, %base_ee)
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%base_ee, %obj)
-; Note that the second extractelement is actually redundant here. A correct output would
-; be to reuse the existing obj as a base since it is actually a base pointer.
-entry:
- %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx
- call void @do_safepoint() [ "deopt"() ]
- ret i64 addrspace(1)* %obj
-}
-
-define i64 addrspace(1)* @test2(<2 x i64 addrspace(1)*>* %ptr, i1 %cnd, i32 %idx1, i32 %idx2) gc "statepoint-example" {
-; CHECK-LABEL: test2
-entry:
- br i1 %cnd, label %taken, label %untaken
-
-taken: ; preds = %entry
- %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- br label %merge
-
-untaken: ; preds = %entry
- %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- br label %merge
-
-merge: ; preds = %untaken, %taken
- %vec = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
- br i1 %cnd, label %taken2, label %untaken2
-
-taken2: ; preds = %merge
- %obj0 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx1
- br label %merge2
-
-untaken2: ; preds = %merge
- %obj1 = extractelement <2 x i64 addrspace(1)*> %vec, i32 %idx2
- br label %merge2
-
-merge2: ; preds = %untaken2, %taken2
-; CHECK-LABEL: merge2:
-; CHECK-NEXT: %obj = phi i64 addrspace(1)*
-; CHECK-NEXT: statepoint
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%obj, %obj)
- %obj = phi i64 addrspace(1)* [ %obj0, %taken2 ], [ %obj1, %untaken2 ]
- call void @do_safepoint() [ "deopt"() ]
- ret i64 addrspace(1)* %obj
-}
-
-define i64 addrspace(1)* @test3(i64 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: test3
-; CHECK: insertelement
-; CHECK: extractelement
-; CHECK: statepoint
-; CHECK: gc.relocate
-; CHECK-DAG: (%obj, %obj)
-entry:
- %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %ptr, i32 0
- %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
- call void @do_safepoint() [ "deopt"() ]
- ret i64 addrspace(1)* %obj
-}
-
-define i64 addrspace(1)* @test4(i64 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: test4
-; CHECK: statepoint
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%ptr, %obj)
-; CHECK: gc.relocate
-; CHECK-DAG: ; (%ptr, %ptr)
-; When we can optimize an extractelement from a known
-; index and avoid introducing new base pointer instructions
-entry:
- %derived = getelementptr i64, i64 addrspace(1)* %ptr, i64 16
- %veca = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %derived, i32 0
- %vec = insertelement <2 x i64 addrspace(1)*> %veca, i64 addrspace(1)* %ptr, i32 1
- %obj = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
- call void @do_safepoint() [ "deopt"() ]
- ret i64 addrspace(1)* %obj
-}
-
-declare void @use(i64 addrspace(1)*) "gc-leaf-function"
-
-define void @test5(i1 %cnd, i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test5
-; CHECK: gc.relocate
-; CHECK-DAG: (%obj, %bdv)
-; When we fundementally have to duplicate
-entry:
- %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
- %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
- %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i32 0
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- call void @use(i64 addrspace(1)* %bdv)
- ret void
-}
-
-define void @test6(i1 %cnd, i64 addrspace(1)* %obj, i64 %idx) gc "statepoint-example" {
-; CHECK-LABEL: @test6
-; CHECK: %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
-; CHECK: %vec.base = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj, i32 0, !is_base_value !0
-; CHECK: %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
-; CHECK: %bdv.base = extractelement <2 x i64 addrspace(1)*> %vec.base, i64 %idx, !is_base_value !0
-; CHECK: %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
-; CHECK: gc.statepoint
-; CHECK: gc.relocate
-; CHECK-DAG: (%bdv.base, %bdv)
-; A more complicated example involving vector and scalar bases.
-; This is derived from a failing test case when we didn't have correct
-; insertelement handling.
-entry:
- %gep = getelementptr i64, i64 addrspace(1)* %obj, i64 1
- %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
- %bdv = extractelement <2 x i64 addrspace(1)*> %vec, i64 %idx
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- call void @use(i64 addrspace(1)* %bdv)
- ret void
-}
-
-define i64 addrspace(1)* @test7(i1 %cnd, i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2) gc "statepoint-example" {
-; CHECK-LABEL: @test7
-entry:
- %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %obj2, i32 0
- br label %merge1
-
-merge1: ; preds = %merge1, %entry
-; CHECK-LABEL: merge1:
-; CHECK: vec2.base
-; CHECK: vec2
-; CHECK: gep
-; CHECK: vec3.base
-; CHECK: vec3
- %vec2 = phi <2 x i64 addrspace(1)*> [ %vec, %entry ], [ %vec3, %merge1 ]
- %gep = getelementptr i64, i64 addrspace(1)* %obj2, i64 1
- %vec3 = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %gep, i32 0
- br i1 %cnd, label %merge1, label %next1
-
-next1: ; preds = %merge1
-; CHECK-LABEL: next1:
-; CHECK: bdv.base =
-; CHECK: bdv =
- %bdv = extractelement <2 x i64 addrspace(1)*> %vec2, i32 0
- br label %merge
-
-merge: ; preds = %merge, %next1
-; CHECK-LABEL: merge:
-; CHECK: %objb.base
-; CHECK: %objb
-; CHECK: gc.statepoint
-; CHECK: gc.relocate
-; CHECK-DAG: (%objb.base, %objb)
- %objb = phi i64 addrspace(1)* [ %obj, %next1 ], [ %bdv, %merge ]
- br i1 %cnd, label %merge, label %next
-
-next: ; preds = %merge
- call void @do_safepoint() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i64 addrspace(1)* %objb
-}
-
-declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
deleted file mode 100644
index 48c45eaa1b01..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/basics.ll
+++ /dev/null
@@ -1,88 +0,0 @@
-; This is a collection of really basic tests for gc.statepoint rewriting.
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S | FileCheck %s
-
-; Trivial relocation over a single call
-
-declare void @foo()
-
-define i8 addrspace(1)* @test1(i8 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test1
-entry:
-; CHECK-LABEL: entry:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-; Two safepoints in a row (i.e. consistent liveness)
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %obj
-}
-
-define i8 addrspace(1)* @test2(i8 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test2
-entry:
-; CHECK-LABEL: entry:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
-; A simple derived pointer
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %obj
-}
-
-define i8 @test3(i8 addrspace(1)* %obj) gc "statepoint-example" {
-entry:
-; CHECK-LABEL: entry:
-; CHECK-NEXT: getelementptr
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %derived.relocated = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: load i8, i8 addrspace(1)* %derived.relocated
-; CHECK-NEXT: load i8, i8 addrspace(1)* %obj.relocated
-; Tests to make sure we visit both the taken and untaken predeccessor
-; of merge. This was a bug in the dataflow liveness at one point.
- %derived = getelementptr i8, i8 addrspace(1)* %obj, i64 10
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- %a = load i8, i8 addrspace(1)* %derived
- %b = load i8, i8 addrspace(1)* %obj
- %c = sub i8 %a, %b
- ret i8 %c
-}
-
-define i8 addrspace(1)* @test4(i1 %cmp, i8 addrspace(1)* %obj) gc "statepoint-example" {
-entry:
- br i1 %cmp, label %taken, label %untaken
-
-taken: ; preds = %entry
-; CHECK-LABEL: taken:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %merge
-
-untaken: ; preds = %entry
-; CHECK-LABEL: untaken:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %merge
-
-merge: ; preds = %untaken, %taken
-; CHECK-LABEL: merge:
-; CHECK-NEXT: %.0 = phi i8 addrspace(1)* [ %obj.relocated, %taken ], [ %obj.relocated2, %untaken ]
-; CHECK-NEXT: ret i8 addrspace(1)* %.0
-; When run over a function which doesn't opt in, should do nothing!
- ret i8 addrspace(1)* %obj
-}
-
-define i8 addrspace(1)* @test5(i8 addrspace(1)* %obj) gc "ocaml" {
-; CHECK-LABEL: @test5
-entry:
-; CHECK-LABEL: entry:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NOT: %obj.relocated = call coldcc i8 addrspace(1)*
- %0 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- ret i8 addrspace(1)* %obj
-}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
deleted file mode 100644
index f0da0c06db0a..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/codegen-cond.ll
+++ /dev/null
@@ -1,81 +0,0 @@
-; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
-
-; A null test of a single value
-
-define i1 @test(i8 addrspace(1)* %p, i1 %rare) gc "statepoint-example" {
-; CHECK-LABEL: @test
-entry:
- %cond = icmp eq i8 addrspace(1)* %p, null
- br i1 %rare, label %safepoint, label %continue, !prof !0
-
-safepoint: ; preds = %entry
- call void @safepoint() [ "deopt"() ]
- br label %continue
-
-continue: ; preds = %safepoint, %entry
-; CHECK-LABEL: continue:
-; CHECK: phi
-; CHECK-DAG: [ %p.relocated, %safepoint ]
-; CHECK-DAG: [ %p, %entry ]
-; CHECK: %cond = icmp
-; CHECK: br i1 %cond
-; Comparing two pointers
- br i1 %cond, label %taken, label %untaken
-
-taken: ; preds = %continue
- ret i1 true
-
-untaken: ; preds = %continue
- ret i1 false
-}
-
-define i1 @test2(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
-; CHECK-LABEL: @test2
-entry:
- %cond = icmp eq i8 addrspace(1)* %p, %q
- br i1 %rare, label %safepoint, label %continue, !prof !0
-
-safepoint: ; preds = %entry
- call void @safepoint() [ "deopt"() ]
- br label %continue
-
-continue: ; preds = %safepoint, %entry
-; CHECK-LABEL: continue:
-; CHECK: phi
-; CHECK-DAG: [ %q.relocated, %safepoint ]
-; CHECK-DAG: [ %q, %entry ]
-; CHECK: phi
-; CHECK-DAG: [ %p.relocated, %safepoint ]
-; CHECK-DAG: [ %p, %entry ]
-; CHECK: %cond = icmp
-; CHECK: br i1 %cond
-; Sanity check that nothing bad happens if already last instruction
-; before terminator
- br i1 %cond, label %taken, label %untaken
-
-taken: ; preds = %continue
- ret i1 true
-
-untaken: ; preds = %continue
- ret i1 false
-}
-
-define i1 @test3(i8 addrspace(1)* %p, i8 addrspace(1)* %q, i1 %rare) gc "statepoint-example" {
-; CHECK-LABEL: @test3
-; CHECK: gc.statepoint
-; CHECK: %cond = icmp
-; CHECK: br i1 %cond
-entry:
- call void @safepoint() [ "deopt"() ]
- %cond = icmp eq i8 addrspace(1)* %p, %q
- br i1 %cond, label %taken, label %untaken
-
-taken: ; preds = %entry
- ret i1 true
-
-untaken: ; preds = %entry
- ret i1 false
-}
-
-declare void @safepoint()
-!0 = !{!"branch_weights", i32 1, i32 10000}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
deleted file mode 100644
index eede1b09d161..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/constants.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles %s | FileCheck %s
-
-; constants don't get relocated.
-@G = addrspace(1) global i8 5
-
-declare void @foo()
-
-define i8 @test() gc "statepoint-example" {
-; CHECK-LABEL: @test
-; CHECK: gc.statepoint
-; CHECK-NEXT: load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
-; Mostly just here to show reasonable code test can come from.
-entry:
- call void @foo() [ "deopt"() ]
- %res = load i8, i8 addrspace(1)* inttoptr (i64 15 to i8 addrspace(1)*)
- ret i8 %res
-}
-
-define i8 @test2(i8 addrspace(1)* %p) gc "statepoint-example" {
-; CHECK-LABEL: @test2
-; CHECK: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: icmp
-; Globals don't move and thus don't get relocated
-entry:
- call void @foo() [ "deopt"() ]
- %cmp = icmp eq i8 addrspace(1)* %p, null
- br i1 %cmp, label %taken, label %not_taken
-
-taken: ; preds = %not_taken, %entry
- ret i8 0
-
-not_taken: ; preds = %entry
- %cmp2 = icmp ne i8 addrspace(1)* %p, null
- br i1 %cmp2, label %taken, label %dead
-
-dead: ; preds = %not_taken
- %addr = getelementptr i8, i8 addrspace(1)* %p, i32 15
- %res = load i8, i8 addrspace(1)* %addr
- ret i8 %res
-}
-
-define i8 @test3(i1 %always_true) gc "statepoint-example" {
-; CHECK-LABEL: @test3
-; CHECK: gc.statepoint
-; CHECK-NEXT: load i8, i8 addrspace(1)* @G
-entry:
- call void @foo() [ "deopt"() ]
- %res = load i8, i8 addrspace(1)* @G, align 1
- ret i8 %res
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
deleted file mode 100644
index f04c6784a878..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/deref-pointers.ll
+++ /dev/null
@@ -1,104 +0,0 @@
-; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
-
-; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
-; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
-; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
-; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
-
-declare void @foo()
-
-declare i8 addrspace(1)* @some_function() "gc-leaf-function"
-
-declare void @some_function_consumer(i8 addrspace(1)*) "gc-leaf-function"
-
-declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref() "gc-leaf-function"
-declare noalias i8 addrspace(1)* @some_function_ret_noalias() "gc-leaf-function"
-
-define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
-entry:
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
-entry:
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
-entry:
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
-; CHECK-LABEL: @test_deref_retval(
-; CHECK: %a = call i8 addrspace(1)* @some_function()
-entry:
- %a = call dereferenceable(4) i8 addrspace(1)* @some_function()
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
-; CHECK-LABEL: @test_deref_or_null_retval(
-; CHECK: %a = call i8 addrspace(1)* @some_function()
-entry:
- %a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
-; CHECK-LABEL: @test_noalias_retval(
-; CHECK: %a = call i8 addrspace(1)* @some_function()
-entry:
- %a = call noalias i8 addrspace(1)* @some_function()
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: @test_md(
-; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
-entry:
- %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 %tmp
-}
-
-define i8 addrspace(1)* @test_decl_only_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: @test_decl_only_attribute(
-; No change here, but the prototype of some_function_ret_deref should have changed.
-; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
-entry:
- %a = call i8 addrspace(1)* @some_function_ret_deref()
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: @test_decl_only_noalias(
-; No change here, but the prototype of some_function_ret_noalias should have changed.
-; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
-entry:
- %a = call i8 addrspace(1)* @some_function_ret_noalias()
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: @test_callsite_arg_attribute(
-; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
-; CHECK: !0 = !{!1, !1, i64 0}
-; CHECK: !1 = !{!"red", !2}
-; CHECK: !2 = !{!"blue"}
-entry:
- call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) noalias %ptr)
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret i8 addrspace(1)* %ptr
-}
-!0 = !{!1, !1, i64 0, i64 1}
-!1 = !{!"red", !2}
-!2 = !{!"blue"}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
deleted file mode 100644
index 284a993bae29..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector.ll
+++ /dev/null
@@ -1,149 +0,0 @@
-; Test that we can correctly handle vectors of pointers in statepoint
-; rewriting. Currently, we scalarize, but that's an implementation detail.
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -rs4gc-split-vector-values -S | FileCheck %s
-
-; A non-vector relocation for comparison
-
-define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: test
-; CHECK: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
-; A base vector from a argument
-entry:
- call void @do_safepoint() [ "deopt"() ]
- ret i64 addrspace(1)* %obj
-}
-
-define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
-; CHECK-LABEL: test2
-; CHECK: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
-; A base vector from a load
-entry:
- call void @do_safepoint() [ "deopt"() ]
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: test3
-; CHECK: load
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
-; When a statepoint is an invoke rather than a call
-entry:
- %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- call void @do_safepoint() [ "deopt"() ]
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-declare i32 @fake_personality_function()
-
-define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" personality i32 ()* @fake_personality_function {
-; CHECK-LABEL: test4
-; CHECK: load
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-entry:
- %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- invoke void @do_safepoint() [ "deopt"() ]
- to label %normal_return unwind label %exceptional_return
-
-normal_return: ; preds = %entry
-; CHECK-LABEL: normal_return:
-; CHECK: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
- ret <2 x i64 addrspace(1)*> %obj
-
-exceptional_return: ; preds = %entry
-; CHECK-LABEL: exceptional_return:
-; CHECK: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %13
-; Can we handle an insert element with a constant offset? This effectively
-; tests both the equal and inequal case since we have to relocate both indices
-; in the vector.
- %landing_pad4 = landingpad token
- cleanup
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
-; CHECK-LABEL: test5
-; CHECK: insertelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
-; A base vector from a load
-entry:
- %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
- call void @do_safepoint() [ "deopt"() ]
- ret <2 x i64 addrspace(1)*> %vec
-}
-
-define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: test6
-entry:
- br i1 %cnd, label %taken, label %untaken
-
-taken: ; preds = %entry
- %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- br label %merge
-
-untaken: ; preds = %entry
- %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- br label %merge
-
-merge: ; preds = %untaken, %taken
-; CHECK-LABEL: merge:
-; CHECK-NEXT: = phi
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
- %obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
- call void @do_safepoint() [ "deopt"() ]
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
deleted file mode 100644
index c5b213f4c82d..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/liveness-basics.ll
+++ /dev/null
@@ -1,165 +0,0 @@
-; A collection of liveness test cases to ensure we're reporting the
-; correct live values at statepoints
-; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S < %s | FileCheck %s
-
-; Tests to make sure we consider %obj live in both the taken and untaken
-; predeccessor of merge.
-
-define i64 addrspace(1)* @test1(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test1
-entry:
- br i1 %cmp, label %taken, label %untaken
-
-taken: ; preds = %entry
-; CHECK-LABEL: taken:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: br label %merge
- call void @foo() [ "deopt"() ]
- br label %merge
-
-untaken: ; preds = %entry
-; CHECK-LABEL: untaken:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: br label %merge
- call void @foo() [ "deopt"() ]
- br label %merge
-
-merge: ; preds = %untaken, %taken
-; CHECK-LABEL: merge:
-; CHECK-NEXT: %.0 = phi i64 addrspace(1)* [ %obj.relocated.casted, %taken ], [ %obj.relocated2.casted, %untaken ]
-; CHECK-NEXT: ret i64 addrspace(1)* %.0
-; A local kill should not effect liveness in predecessor block
- ret i64 addrspace(1)* %obj
-}
-
-define i64 addrspace(1)* @test2(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
-; CHECK-LABEL: @test2
-entry:
-; CHECK-LABEL: entry:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: br
- call void @foo() [ "deopt"() ]
- br i1 %cmp, label %taken, label %untaken
-
-taken: ; preds = %entry
-; CHECK-LABEL: taken:
-; CHECK-NEXT: %obj = load
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
-; A local kill should effect values live from a successor phi. Also, we
-; should only propagate liveness from a phi to the appropriate predecessors.
- %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call void @foo() [ "deopt"() ]
- ret i64 addrspace(1)* %obj
-
-untaken: ; preds = %entry
- ret i64 addrspace(1)* null
-}
-
-define i64 addrspace(1)* @test3(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
-; CHECK-LABEL: @test3
-entry:
- br i1 %cmp, label %taken, label %untaken
-
-taken: ; preds = %entry
-; CHECK-LABEL: taken:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj = load
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: br label %merge
- call void @foo() [ "deopt"() ]
- %obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call void @foo() [ "deopt"() ]
- br label %merge
-
-untaken: ; preds = %entry
-; CHECK-LABEL: taken:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: br label %merge
-; A base pointer must be live if it is needed at a later statepoint,
-; even if the base pointer is otherwise unused.
- call void @foo() [ "deopt"() ]
- br label %merge
-
-merge: ; preds = %untaken, %taken
- %phi = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
- ret i64 addrspace(1)* %phi
-}
-
-define i64 addrspace(1)* @test4(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test4
-entry:
-; CHECK-LABEL: entry:
-; CHECK-NEXT: %derived = getelementptr
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %derived.relocated =
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: %obj.relocated =
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %derived.relocated2 =
-; CHECK-NEXT: bitcast
-
-; Note: It's legal to relocate obj again, but not strictly needed
-; CHECK-NEXT: %obj.relocated3 =
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: ret i64 addrspace(1)* %derived.relocated2.casted
-;
-; Make sure that a phi def visited during iteration is considered a kill.
-; Also, liveness after base pointer analysis can change based on new uses,
-; not just new defs.
- %derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
- call void @foo() [ "deopt"() ]
- call void @foo() [ "deopt"() ]
- ret i64 addrspace(1)* %derived
-}
-
-declare void @consume(...) readonly "gc-leaf-function"
-
-define i64 addrspace(1)* @test5(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test5
-entry:
- br i1 %cmp, label %taken, label %untaken
-
-taken: ; preds = %entry
-; CHECK-LABEL: taken:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: br label %merge
- call void @foo() [ "deopt"() ]
- br label %merge
-
-untaken: ; preds = %entry
-; CHECK-LABEL: untaken:
-; CHECK-NEXT: br label %merge
- br label %merge
-
-merge: ; preds = %untaken, %taken
-; CHECK-LABEL: merge:
-; CHECK-NEXT: %.0 = phi i64 addrspace(1)*
-; CHECK-NEXT: %obj2a = phi
-; CHECK-NEXT: @consume
-; CHECK-NEXT: br label %final
- %obj2a = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
- call void (...) @consume(i64 addrspace(1)* %obj2a)
- br label %final
-
-final: ; preds = %merge
-; CHECK-LABEL: final:
-; CHECK-NEXT: @consume
-; CHECK-NEXT: ret i64 addrspace(1)* %.0
- call void (...) @consume(i64 addrspace(1)* %obj2a)
- ret i64 addrspace(1)* %obj
-}
-
-declare void @foo()
-
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
deleted file mode 100644
index 8f5c0ff4a710..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/patchable-statepoints.ll
+++ /dev/null
@@ -1,44 +0,0 @@
-; RUN: opt -S -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles < %s | FileCheck %s
-
-declare void @f()
-declare i32 @personality_function()
-
-define void @test_id() gc "statepoint-example" personality i32 ()* @personality_function {
-; CHECK-LABEL: @test_id(
-entry:
-; CHECK-LABEL: entry:
-; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 100, i32 0, void ()* @f
- invoke void @f() "statepoint-id"="100" to label %normal_return unwind label %exceptional_return
-
-normal_return:
- ret void
-
-exceptional_return:
- %landing_pad4 = landingpad {i8*, i32} cleanup
- ret void
-}
-
-define void @test_num_patch_bytes() gc "statepoint-example" personality i32 ()* @personality_function {
-; CHECK-LABEL: @test_num_patch_bytes(
-entry:
-; CHECK-LABEL: entry:
-; CHECK: invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 99, void ()* @f,
- invoke void @f() "statepoint-num-patch-bytes"="99" to label %normal_return unwind label %exceptional_return
-
-normal_return:
- ret void
-
-exceptional_return:
- %landing_pad4 = landingpad {i8*, i32} cleanup
- ret void
-}
-
-declare void @do_safepoint()
-define void @gc.safepoint_poll() {
-entry:
- call void @do_safepoint()
- ret void
-}
-
-; CHECK-NOT: statepoint-id
-; CHECK-NOT: statepoint-num-patch_bytes
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
deleted file mode 100644
index e0bd542aa5d5..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/preprocess.ll
+++ /dev/null
@@ -1,62 +0,0 @@
-; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S < %s | FileCheck %s
-
-; Test to make sure we destroy LCSSA's single entry phi nodes before
-; running liveness
-
-declare void @consume(...) "gc-leaf-function"
-
-define void @test6(i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test6
-entry:
- br label %next
-
-next: ; preds = %entry
-; CHECK-LABEL: next:
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
-; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
-; Need to delete unreachable gc.statepoint call
- %obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
- call void @foo() [ "deopt"() ]
- call void (...) @consume(i64 addrspace(1)* %obj2)
- call void (...) @consume(i64 addrspace(1)* %obj)
- ret void
-}
-
-define void @test7() gc "statepoint-example" {
-; CHECK-LABEL: test7
-; CHECK-NOT: gc.statepoint
-; Need to delete unreachable gc.statepoint invoke - tested seperately given
-; a correct implementation could only remove the instructions, not the block
- ret void
-
-unreached: ; preds = %unreached
- %obj = phi i64 addrspace(1)* [ null, %unreached ]
- call void @foo() [ "deopt"() ]
- call void (...) @consume(i64 addrspace(1)* %obj)
- br label %unreached
-}
-
-define void @test8() gc "statepoint-example" personality i32 ()* undef {
-; CHECK-LABEL: test8
-; CHECK-NOT: gc.statepoint
-; Bound the last check-not
- ret void
-
-unreached: ; No predecessors!
- invoke void @foo() [ "deopt"() ]
-; CHECK-LABEL: @foo
- to label %normal_return unwind label %exceptional_return
-
-normal_return: ; preds = %unreached
- ret void
-
-exceptional_return: ; preds = %unreached
- %landing_pad4 = landingpad { i8*, i32 }
- cleanup
- ret void
-}
-
-declare void @foo()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
deleted file mode 100644
index 584dc32b7529..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocation.ll
+++ /dev/null
@@ -1,279 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -spp-rematerialization-threshold=0 -S 2>&1 | FileCheck %s
-
-
-declare void @foo()
-
-declare void @use(...) "gc-leaf-function"
-
-define i64 addrspace(1)* @test1(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
-; CHECK-LABEL: @test1
-; CHECK-DAG: %obj.relocated
-; CHECK-DAG: %obj2.relocated
-entry:
- call void @foo() [ "deopt"() ]
- br label %joint
-
-joint: ; preds = %joint2, %entry
-; CHECK-LABEL: joint:
-; CHECK: %phi1 = phi i64 addrspace(1)* [ %obj.relocated.casted, %entry ], [ %obj3, %joint2 ]
- %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj3, %joint2 ]
- br i1 %condition, label %use, label %joint2
-
-use: ; preds = %joint
- br label %joint2
-
-joint2: ; preds = %use, %joint
-; CHECK-LABEL: joint2:
-; CHECK: %phi2 = phi i64 addrspace(1)* [ %obj.relocated.casted, %use ], [ %obj2.relocated.casted, %joint ]
-; CHECK: %obj3 = getelementptr i64, i64 addrspace(1)* %obj2.relocated.casted, i32 1
- %phi2 = phi i64 addrspace(1)* [ %obj, %use ], [ %obj2, %joint ]
- %obj3 = getelementptr i64, i64 addrspace(1)* %obj2, i32 1
- br label %joint
-}
-
-declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
-
-declare void @consume_obj(i64 addrspace(1)*) "gc-leaf-function"
-
-declare i1 @rt() "gc-leaf-function"
-
-define void @test2() gc "statepoint-example" {
-; CHECK-LABEL: @test2
-entry:
- %obj_init = call i64 addrspace(1)* @generate_obj()
- %obj = getelementptr i64, i64 addrspace(1)* %obj_init, i32 42
- br label %loop
-
-loop: ; preds = %loop.backedge, %entry
-; CHECK: loop:
-; CHECK-DAG: [ %obj_init.relocated.casted, %loop.backedge ]
-; CHECK-DAG: [ %obj_init, %entry ]
-; CHECK-DAG: [ %obj.relocated.casted, %loop.backedge ]
-; CHECK-DAG: [ %obj, %entry ]
-; CHECK-NOT: %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
- %index = phi i32 [ 0, %entry ], [ %index.inc, %loop.backedge ]
- %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
- call void @consume_obj(i64 addrspace(1)* %location)
- %index.inc = add i32 %index, 1
- %condition = call i1 @rt()
- br i1 %condition, label %loop_x, label %loop_y
-
-loop_x: ; preds = %loop
- br label %loop.backedge
-
-loop.backedge: ; preds = %loop_y, %loop_x
- call void @do_safepoint() [ "deopt"() ]
- br label %loop
-
-loop_y: ; preds = %loop
- br label %loop.backedge
-}
-
-declare void @some_call(i8 addrspace(1)*) "gc-leaf-function"
-
-define void @relocate_merge(i1 %cnd, i8 addrspace(1)* %arg) gc "statepoint-example" {
-; CHECK-LABEL: @relocate_merge
-
-bci_0:
- br i1 %cnd, label %if_branch, label %else_branch
-
-if_branch: ; preds = %bci_0
-; CHECK-LABEL: if_branch:
-; CHECK: gc.statepoint
-; CHECK: gc.relocate
- call void @foo() [ "deopt"() ]
- br label %join
-
-else_branch: ; preds = %bci_0
-; CHECK-LABEL: else_branch:
-; CHECK: gc.statepoint
-; CHECK: gc.relocate
-; We need to end up with a single relocation phi updated from both paths
- call void @foo() [ "deopt"() ]
- br label %join
-
-join: ; preds = %else_branch, %if_branch
-; CHECK-LABEL: join:
-; CHECK: phi i8 addrspace(1)*
-; CHECK-DAG: [ %arg.relocated, %if_branch ]
-; CHECK-DAG: [ %arg.relocated2, %else_branch ]
-; CHECK-NOT: phi
- call void @some_call(i8 addrspace(1)* %arg)
- ret void
-}
-
-; Make sure a use in a statepoint gets properly relocated at a previous one.
-; This is basically just making sure that statepoints aren't accidentally
-; treated specially.
-define void @test3(i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: @test3
-; CHECK: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.statepoint
-entry:
- call void undef(i64 undef) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- %0 = call i32 undef(i64 addrspace(1)* %obj) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- ret void
-}
-
-; Check specifically for the case where the result of a statepoint needs to
-; be relocated itself
-define void @test4() gc "statepoint-example" {
-; CHECK-LABEL: @test4
-; CHECK: gc.statepoint
-; CHECK: gc.result
-; CHECK: gc.statepoint
-; CHECK: [[RELOCATED:%[^ ]+]] = call {{.*}}gc.relocate
-; CHECK: @use(i8 addrspace(1)* [[RELOCATED]])
- %1 = call i8 addrspace(1)* undef() [ "deopt"() ]
- %2 = call i8 addrspace(1)* undef() [ "deopt"() ]
- call void (...) @use(i8 addrspace(1)* %1)
- unreachable
-}
-
-; Test updating a phi where not all inputs are live to begin with
-define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
-; CHECK-LABEL: test5
-entry:
- %0 = call i8 addrspace(1)* undef() [ "deopt"() ]
- switch i32 undef, label %kill [
- i32 10, label %merge
- i32 13, label %merge
- ]
-
-kill: ; preds = %entry
- br label %merge
-
-merge: ; preds = %kill, %entry, %entry
-; CHECK: merge:
-; CHECK: %test = phi i8 addrspace(1)
-; CHECK-DAG: [ null, %kill ]
-; CHECK-DAG: [ %arg.relocated, %entry ]
-; CHECK-DAG: [ %arg.relocated, %entry ]
- %test = phi i8 addrspace(1)* [ null, %kill ], [ %arg, %entry ], [ %arg, %entry ]
- call void (...) @use(i8 addrspace(1)* %test)
- unreachable
-}
-
-; Check to make sure we handle values live over an entry statepoint
-define void @test6(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) gc "statepoint-example" {
-; CHECK-LABEL: @test6
-entry:
- br i1 undef, label %gc.safepoint_poll.exit2, label %do_safepoint
-
-do_safepoint: ; preds = %entry
-; CHECK-LABEL: do_safepoint:
-; CHECK: gc.statepoint
-; CHECK: arg1.relocated =
-; CHECK: arg2.relocated =
-; CHECK: arg3.relocated =
- call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) ]
- br label %gc.safepoint_poll.exit2
-
-gc.safepoint_poll.exit2: ; preds = %do_safepoint, %entry
-; CHECK-LABEL: gc.safepoint_poll.exit2:
-; CHECK: phi i8 addrspace(1)*
-; CHECK-DAG: [ %arg3, %entry ]
-; CHECK-DAG: [ %arg3.relocated, %do_safepoint ]
-; CHECK: phi i8 addrspace(1)*
-; CHECK-DAG: [ %arg2, %entry ]
-; CHECK-DAG: [ %arg2.relocated, %do_safepoint ]
-; CHECK: phi i8 addrspace(1)*
-; CHECK-DAG: [ %arg1, %entry ]
-; CHECK-DAG: [ %arg1.relocated, %do_safepoint ]
- call void (...) @use(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
- ret void
-}
-
-; Check relocation in a loop nest where a relocation happens in the outer
-; but not the inner loop
-define void @test_outer_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
-; CHECK-LABEL: @test_outer_loop
-
-bci_0:
- br label %outer-loop
-
-outer-loop: ; preds = %outer-inc, %bci_0
-; CHECK-LABEL: outer-loop:
-; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
-; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
- br label %inner-loop
-
-inner-loop: ; preds = %inner-loop, %outer-loop
- br i1 %cmp, label %inner-loop, label %outer-inc
-
-outer-inc: ; preds = %inner-loop
-; CHECK-LABEL: outer-inc:
-; CHECK: %arg1.relocated
-; CHECK: %arg2.relocated
- call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
- br label %outer-loop
-}
-
-; Check that both inner and outer loops get phis when relocation is in
-; inner loop
-define void @test_inner_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
-; CHECK-LABEL: @test_inner_loop
-
-bci_0:
- br label %outer-loop
-
-outer-loop: ; preds = %outer-inc, %bci_0
-; CHECK-LABEL: outer-loop:
-; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
-; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
- br label %inner-loop
-; CHECK-LABEL: inner-loop
-; CHECK: phi i8 addrspace(1)*
-; CHECK-DAG: %outer-loop ]
-; CHECK-DAG: [ %arg2.relocated, %inner-loop ]
-; CHECK: phi i8 addrspace(1)*
-; CHECK-DAG: %outer-loop ]
-; CHECK-DAG: [ %arg1.relocated, %inner-loop ]
-; CHECK: gc.statepoint
-; CHECK: %arg1.relocated
-; CHECK: %arg2.relocated
-
-inner-loop: ; preds = %inner-loop, %outer-loop
- call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
- br i1 %cmp, label %inner-loop, label %outer-inc
-
-outer-inc: ; preds = %inner-loop
-; CHECK-LABEL: outer-inc:
-; This test shows why updating just those uses of the original value being
-; relocated dominated by the inserted relocation is not always sufficient.
- br label %outer-loop
-}
-
-define i64 addrspace(1)* @test7(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
-; CHECK-LABEL: @test7
-entry:
- br i1 %condition, label %branch2, label %join
-
-branch2: ; preds = %entry
- br i1 %condition, label %callbb, label %join2
-
-callbb: ; preds = %branch2
- call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
- br label %join
-
-join: ; preds = %callbb, %entry
-; CHECK-LABEL: join:
-; CHECK: phi i64 addrspace(1)* [ %obj.relocated.casted, %callbb ], [ %obj, %entry ]
-; CHECK: phi i64 addrspace(1)*
-; CHECK-DAG: [ %obj, %entry ]
-; CHECK-DAG: [ %obj2.relocated.casted, %callbb ]
- %phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %callbb ]
- br label %join2
-
-join2: ; preds = %join, %branch2
-; CHECK-LABEL: join2:
-; CHECK: phi2 = phi i64 addrspace(1)*
-; CHECK-DAG: %join ]
-; CHECK-DAG: [ %obj2, %branch2 ]
- %phi2 = phi i64 addrspace(1)* [ %obj, %join ], [ %obj2, %branch2 ]
- ret i64 addrspace(1)* %phi2
-}
-
-declare void @do_safepoint()
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
deleted file mode 100644
index 0020c5116c13..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rematerialize-derived-pointers.ll
+++ /dev/null
@@ -1,150 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
-
-
-declare void @use_obj16(i16 addrspace(1)*) "gc-leaf-function"
-declare void @use_obj32(i32 addrspace(1)*) "gc-leaf-function"
-declare void @use_obj64(i64 addrspace(1)*) "gc-leaf-function"
-
-declare void @do_safepoint()
-
-define void @test_gep_const(i32 addrspace(1)* %base) gc "statepoint-example" {
-; CHECK-LABEL: test_gep_const
-entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj32(i32 addrspace(1)* %base)
- call void @use_obj32(i32 addrspace(1)* %ptr)
- ret void
-}
-
-define void @test_gep_idx(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
-; CHECK-LABEL: test_gep_idx
-entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj32(i32 addrspace(1)* %base)
- call void @use_obj32(i32 addrspace(1)* %ptr)
- ret void
-}
-
-define void @test_bitcast(i32 addrspace(1)* %base) gc "statepoint-example" {
-; CHECK-LABEL: test_bitcast
-entry:
- %ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj32(i32 addrspace(1)* %base)
- call void @use_obj64(i64 addrspace(1)* %ptr)
- ret void
-}
-
-define void @test_bitcast_gep(i32 addrspace(1)* %base) gc "statepoint-example" {
-; CHECK-LABEL: test_bitcast_gep
-entry:
- %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj32(i32 addrspace(1)* %base)
- call void @use_obj64(i64 addrspace(1)* %ptr.cast)
- ret void
-}
-
-define void @test_intersecting_chains(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
-; CHECK-LABEL: test_intersecting_chains
-entry:
- %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
- %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj64(i64 addrspace(1)* %ptr.cast)
- call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
- ret void
-}
-
-define void @test_cost_threshold(i32 addrspace(1)* %base, i32 %idx1, i32 %idx2, i32 %idx3) gc "statepoint-example" {
-; CHECK-LABEL: test_cost_threshold
-entry:
- %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 %idx1
- %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 %idx2
- %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 %idx3
- %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj64(i64 addrspace(1)* %ptr.cast)
- ret void
-}
-
-define void @test_two_derived(i32 addrspace(1)* %base) gc "statepoint-example" {
-; CHECK-LABEL: test_two_derived
-entry:
- %ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
- %ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj32(i32 addrspace(1)* %ptr)
- call void @use_obj32(i32 addrspace(1)* %ptr2)
- ret void
-}
-
-define void @test_gep_smallint_array([3 x i32] addrspace(1)* %base) gc "statepoint-example" {
-; CHECK-LABEL: test_gep_smallint_array
-entry:
- %ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj32(i32 addrspace(1)* %ptr)
- ret void
-}
-
-declare i32 @fake_personality_function()
-
-define void @test_invoke(i32 addrspace(1)* %base) gc "statepoint-example" personality i32 ()* @fake_personality_function {
-; CHECK-LABEL: test_invoke
-entry:
- %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- %ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
- %ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
- invoke void @do_safepoint() [ "deopt"() ]
- to label %normal unwind label %exception
-
-normal: ; preds = %entry
- call void @use_obj64(i64 addrspace(1)* %ptr.cast)
- call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
- ret void
-
-exception: ; preds = %entry
- %landing_pad4 = landingpad token
- cleanup
- call void @use_obj64(i64 addrspace(1)* %ptr.cast)
- call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
- ret void
-}
-
-define void @test_loop(i32 addrspace(1)* %base) gc "statepoint-example" {
-; CHECK-LABEL: test_loop
-entry:
- %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- br label %loop
-
-loop: ; preds = %loop, %entry
- call void @use_obj32(i32 addrspace(1)* %ptr.gep)
- call void @do_safepoint() [ "deopt"() ]
- br label %loop
-}
-
-define void @test_too_long(i32 addrspace(1)* %base) gc "statepoint-example" {
-; CHECK-LABEL: test_too_long
-entry:
- %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- %ptr.gep1 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 15
- %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep1, i32 15
- %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 15
- %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 15
- %ptr.gep5 = getelementptr i32, i32 addrspace(1)* %ptr.gep4, i32 15
- %ptr.gep6 = getelementptr i32, i32 addrspace(1)* %ptr.gep5, i32 15
- %ptr.gep7 = getelementptr i32, i32 addrspace(1)* %ptr.gep6, i32 15
- %ptr.gep8 = getelementptr i32, i32 addrspace(1)* %ptr.gep7, i32 15
- %ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
- %ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
- %ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
- call void @do_safepoint() [ "deopt"() ]
- call void @use_obj32(i32 addrspace(1)* %ptr.gep11)
- ret void
-}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-intrinsic-cconv.ll b/test/Transforms/RewriteStatepointsForGC/deopt-intrinsic-cconv.ll
new file mode 100644
index 000000000000..b74c1963ddfd
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-intrinsic-cconv.ll
@@ -0,0 +1,16 @@
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare cc42 double @llvm.experimental.deoptimize.f64(...)
+
+define double @caller_3() gc "statepoint-example" {
+; CHECK-LABELL @caller_3(
+; CHECK: call cc42 token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint
+; CHECK: unreachable
+
+entry:
+ %val = call cc42 double(...) @llvm.experimental.deoptimize.f64() [ "deopt"() ]
+ ret double %val
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-intrinsic.ll b/test/Transforms/RewriteStatepointsForGC/deopt-intrinsic.ll
new file mode 100644
index 000000000000..ef0e2bd61afc
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/deopt-intrinsic.ll
@@ -0,0 +1,35 @@
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.11.0"
+
+declare i32 @llvm.experimental.deoptimize.i32(...)
+declare void @llvm.experimental.deoptimize.isVoid(...)
+
+define i32 @caller_0(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @caller_0(
+; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @__llvm_deoptimize, i32 0
+; CHECK: unreachable
+entry:
+ %v = call i32(...) @llvm.experimental.deoptimize.i32() [ "deopt"(i32 0, i32 addrspace(1)* %ptr) ]
+ ret i32 %v
+}
+
+
+define i32 @caller_1(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @caller_1
+; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidi32p1i32f(i64 2882400000, i32 0, void (i32, i32 addrspace(1)*)* bitcast (void ()* @__llvm_deoptimize to void (i32, i32 addrspace(1)*)*), i32 2, i32 0, i32 50, i32 addrspace(1)* %ptr
+; CHECK: unreachable
+entry:
+ %v = call i32(...) @llvm.experimental.deoptimize.i32(i32 50, i32 addrspace(1)* %ptr) [ "deopt"(i32 0) ]
+ ret i32 %v
+}
+
+define void @caller_2(i32 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @caller_2(
+; CHECK: @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 2882400000, i32 0, void ()* @__llvm_deoptimize, i32 0
+; CHECK: unreachable
+entry:
+ call void(...) @llvm.experimental.deoptimize.isVoid() [ "deopt"(i32 0, i32 addrspace(1)* %ptr) ]
+ ret void
+}
diff --git a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
index b4954f6a9b60..551da0843ad0 100644
--- a/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/deref-pointers.ll
@@ -1,114 +1,104 @@
; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
-declare void @foo()
-declare i8 addrspace(1)* @some_function()
-declare void @some_function_consumer(i8 addrspace(1)*)
-declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref()
; CHECK: declare i8 addrspace(1)* @some_function_ret_deref()
-declare noalias i8 addrspace(1)* @some_function_ret_noalias()
-; CHECK: declare i8 addrspace(1)* @some_function_ret_noalias()
+; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
+; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
+
+declare void @foo()
+
+declare i8 addrspace(1)* @some_function() "gc-leaf-function"
+
+declare void @some_function_consumer(i8 addrspace(1)*) "gc-leaf-function"
+
+declare dereferenceable(4) i8 addrspace(1)* @some_function_ret_deref() "gc-leaf-function"
+declare noalias i8 addrspace(1)* @some_function_ret_noalias() "gc-leaf-function"
define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* dereferenceable(4) %a) gc "statepoint-example" {
-; CHECK: define i8 addrspace(1)* @test_deref_arg(i8 addrspace(1)* %a)
entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %a
}
define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* dereferenceable_or_null(4) %a) gc "statepoint-example" {
-; CHECK: define i8 addrspace(1)* @test_deref_or_null_arg(i8 addrspace(1)* %a)
entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
+entry:
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %a
}
define i8 addrspace(1)* @test_deref_retval() gc "statepoint-example" {
; CHECK-LABEL: @test_deref_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
entry:
%a = call dereferenceable(4) i8 addrspace(1)* @some_function()
-; CHECK: %a = call i8 addrspace(1)* @some_function()
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %a
}
define i8 addrspace(1)* @test_deref_or_null_retval() gc "statepoint-example" {
; CHECK-LABEL: @test_deref_or_null_retval(
+; CHECK: %a = call i8 addrspace(1)* @some_function()
entry:
%a = call dereferenceable_or_null(4) i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ ret i8 addrspace(1)* %a
+}
+
+define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
+; CHECK-LABEL: @test_noalias_retval(
; CHECK: %a = call i8 addrspace(1)* @some_function()
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+entry:
+ %a = call noalias i8 addrspace(1)* @some_function()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %a
}
define i8 @test_md(i8 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: @test_md(
- entry:
; CHECK: %tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
+entry:
%tmp = load i8, i8 addrspace(1)* %ptr, !tbaa !0
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 %tmp
}
define i8 addrspace(1)* @test_decl_only_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: @test_decl_only_attribute(
-entry:
; No change here, but the prototype of some_function_ret_deref should have changed.
; CHECK: call i8 addrspace(1)* @some_function_ret_deref()
- %a = call i8 addrspace(1)* @some_function_ret_deref()
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: @test_callsite_arg_attribute(
-entry:
-; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
- call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) %ptr)
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- ret i8 addrspace(1)* %ptr
-}
-
-define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* noalias %a) gc "statepoint-example" {
-; CHECK: define i8 addrspace(1)* @test_noalias_arg(i8 addrspace(1)* %a)
entry:
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- ret i8 addrspace(1)* %a
-}
-
-define i8 addrspace(1)* @test_noalias_retval() gc "statepoint-example" {
-; CHECK-LABEL: @test_noalias_retval(
-entry:
- %a = call noalias i8 addrspace(1)* @some_function()
-; CHECK: %a = call i8 addrspace(1)* @some_function()
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ %a = call i8 addrspace(1)* @some_function_ret_deref()
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %a
}
define i8 addrspace(1)* @test_decl_only_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
; CHECK-LABEL: @test_decl_only_noalias(
-entry:
; No change here, but the prototype of some_function_ret_noalias should have changed.
; CHECK: call i8 addrspace(1)* @some_function_ret_noalias()
+entry:
%a = call i8 addrspace(1)* @some_function_ret_noalias()
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %a
}
-define i8 addrspace(1)* @test_callsite_arg_noalias(i8 addrspace(1)* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: @test_callsite_arg_noalias(
-entry:
+define i8 addrspace(1)* @test_callsite_arg_attribute(i8 addrspace(1)* %ptr) gc "statepoint-example" {
+; CHECK-LABEL: @test_callsite_arg_attribute(
; CHECK: call void @some_function_consumer(i8 addrspace(1)* %ptr)
- call void @some_function_consumer(i8 addrspace(1)* noalias %ptr)
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+; CHECK: !0 = !{!1, !1, i64 0}
+; CHECK: !1 = !{!"red", !2}
+; CHECK: !2 = !{!"blue"}
+entry:
+ call void @some_function_consumer(i8 addrspace(1)* dereferenceable(4) noalias %ptr)
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret i8 addrspace(1)* %ptr
}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-
!0 = !{!1, !1, i64 0, i64 1}
!1 = !{!"red", !2}
!2 = !{!"blue"}
-
-; CHECK: !0 = !{!1, !1, i64 0}
-; CHECK: !1 = !{!"red", !2}
-; CHECK: !2 = !{!"blue"}
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll b/test/Transforms/RewriteStatepointsForGC/gc-relocate-creation.ll
index 0d53af704df2..714d7399c5b3 100644
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/gc-relocate-creation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/gc-relocate-creation.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
; This test is to verify gc.relocate can handle pointer to vector of
; pointers (<2 x i32 addrspace(1)*> addrspace(1)* in this case).
diff --git a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll b/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
deleted file mode 100644
index 3cd4bc65d1a5..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/gc_relocate_creation.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
-; This test is to verify gc.relocate can handle pointer to vector of
-; pointers (<2 x i32 addrspace(1)*> addrspace(1)* in this case).
-; The old scheme to create a gc.relocate of <2 x i32 addrspace(1)*> addrspace(1)*
-; type will fail because llvm does not support mangling vector of pointers.
-; The new scheme will create all gc.relocate to i8 addrspace(1)* type and
-; then bitcast to the correct type.
-
-declare void @foo()
-declare void @use(...)
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-
-define void @test1(<2 x i32 addrspace(1)*> addrspace(1)* %obj) gc "statepoint-example" {
-entry:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
-; CHECK: %obj.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %safepoint_token, i32 7, i32 7)
-; CHECK-NEXT: %obj.relocated.casted = bitcast i8 addrspace(1)* %obj.relocated to <2 x i32 addrspace(1)*> addrspace(1)*
- call void (...) @use(<2 x i32 addrspace(1)*> addrspace(1)* %obj)
- ret void
-}
diff --git a/test/Transforms/PlaceSafepoints/invokes.ll b/test/Transforms/RewriteStatepointsForGC/invokes.ll
index a93e4545bc97..afcb6ad559de 100644
--- a/test/Transforms/PlaceSafepoints/invokes.ll
+++ b/test/Transforms/RewriteStatepointsForGC/invokes.ll
@@ -1,15 +1,15 @@
-; RUN: opt %s -S -place-safepoints | FileCheck %s
+; RUN: opt < %s -S -rewrite-statepoints-for-gc | FileCheck %s
-declare i64 addrspace(1)* @"some_call"(i64 addrspace(1)*)
-declare i32 @"personality_function"()
+declare i64 addrspace(1)* @some_call(i64 addrspace(1)*)
+declare i32 @personality_function()
-define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @"personality_function" {
+define i64 addrspace(1)* @test_basic(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality_function {
; CHECK-LABEL: entry:
entry:
; CHECK: invoke
; CHECK: statepoint
; CHECK: some_call
- %ret_val = invoke i64 addrspace(1)* @"some_call"(i64 addrspace(1)* %obj)
+ %ret_val = invoke i64 addrspace(1)* @some_call(i64 addrspace(1)* %obj)
to label %normal_return unwind label %exceptional_return
; CHECK-LABEL: normal_return:
@@ -24,18 +24,18 @@ normal_return:
; CHECK: ret i64
exceptional_return:
- %landing_pad4 = landingpad {i8*, i32}
+ %landing_pad4 = landingpad token
cleanup
ret i64 addrspace(1)* %obj1
}
-define i64 addrspace(1)* @test_two_invokes(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @"personality_function" {
+define i64 addrspace(1)* @test_two_invokes(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality_function {
; CHECK-LABEL: entry:
entry:
; CHECK: invoke
; CHECK: statepoint
; CHECK: some_call
- %ret_val1 = invoke i64 addrspace(1)* @"some_call"(i64 addrspace(1)* %obj)
+ %ret_val1 = invoke i64 addrspace(1)* @some_call(i64 addrspace(1)* %obj)
to label %second_invoke unwind label %exceptional_return
; CHECK-LABEL: second_invoke:
@@ -43,7 +43,7 @@ second_invoke:
; CHECK: invoke
; CHECK: statepoint
; CHECK: some_call
- %ret_val2 = invoke i64 addrspace(1)* @"some_call"(i64 addrspace(1)* %ret_val1)
+ %ret_val2 = invoke i64 addrspace(1)* @some_call(i64 addrspace(1)* %ret_val1)
to label %normal_return unwind label %exceptional_return
; CHECK-LABEL: normal_return:
@@ -56,23 +56,23 @@ normal_return:
; CHECK: ret i64
exceptional_return:
- %landing_pad4 = landingpad {i8*, i32}
+ %landing_pad4 = landingpad token
cleanup
ret i64 addrspace(1)* %obj1
}
-define i64 addrspace(1)* @test_phi_node(i1 %cond, i64 addrspace(1)* %obj) gc "statepoint-example" personality i32 ()* @"personality_function" {
+define i64 addrspace(1)* @test_phi_node(i1 %cond, i64 addrspace(1)* %obj) gc "statepoint-example" personality i32 ()* @personality_function {
; CHECK-LABEL: @test_phi_node
; CHECK-LABEL: entry:
entry:
br i1 %cond, label %left, label %right
left:
- %ret_val_left = invoke i64 addrspace(1)* @"some_call"(i64 addrspace(1)* %obj)
+ %ret_val_left = invoke i64 addrspace(1)* @some_call(i64 addrspace(1)* %obj)
to label %merge unwind label %exceptional_return
right:
- %ret_val_right = invoke i64 addrspace(1)* @"some_call"(i64 addrspace(1)* %obj)
+ %ret_val_right = invoke i64 addrspace(1)* @some_call(i64 addrspace(1)* %obj)
to label %merge unwind label %exceptional_return
; CHECK: merge[[A:[0-9]]]:
@@ -94,7 +94,7 @@ merge:
; CHECK: ret i64 addrspace(1)*
exceptional_return:
- %landing_pad4 = landingpad {i8*, i32}
+ %landing_pad4 = landingpad token
cleanup
ret i64 addrspace(1)* %obj
}
diff --git a/test/Transforms/PlaceSafepoints/leaf-function.ll b/test/Transforms/RewriteStatepointsForGC/leaf-function.ll
index 2f4193827ae7..e2350d4f9e0a 100644
--- a/test/Transforms/PlaceSafepoints/leaf-function.ll
+++ b/test/Transforms/RewriteStatepointsForGC/leaf-function.ll
@@ -1,4 +1,4 @@
-; RUN: opt %s -S -place-safepoints | FileCheck %s
+; RUN: opt < %s -S -rewrite-statepoints-for-gc | FileCheck %s
declare void @foo() "gc-leaf-function"
declare void @bar()
@@ -7,8 +7,7 @@ declare void @bar()
; into a safepoint. An entry safepoint should get inserted, though.
define void @test_leaf_function() gc "statepoint-example" {
; CHECK-LABEL: test_leaf_function
-; CHECK: gc.statepoint.p0f_isVoidf
-; CHECK-NOT: statepoint
+; CHECK-NOT: gc.statepoint
; CHECK-NOT: gc.result
entry:
call void @foo()
@@ -17,8 +16,7 @@ entry:
define void @test_leaf_function_call() gc "statepoint-example" {
; CHECK-LABEL: test_leaf_function_call
-; CHECK: gc.statepoint.p0f_isVoidf
-; CHECK-NOT: statepoint
+; CHECK-NOT: gc.statepoint
; CHECK-NOT: gc.result
entry:
call void @bar() "gc-leaf-function"
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector-nosplit.ll b/test/Transforms/RewriteStatepointsForGC/live-vector-nosplit.ll
index ee578eb3d309..cc0140a97c5d 100644
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/live-vector-nosplit.ll
+++ b/test/Transforms/RewriteStatepointsForGC/live-vector-nosplit.ll
@@ -1,6 +1,6 @@
; Test that we can correctly handle vectors of pointers in statepoint
; rewriting.
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -rs4gc-split-vector-values=0 -S | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
; A non-vector relocation for comparison
define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
@@ -73,9 +73,12 @@ exceptional_return: ; preds = %entry
define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p) gc "statepoint-example" {
; CHECK-LABEL: test5
; CHECK: insertelement
+; CHECK-NEXT: insertelement
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %vec.relocated.casted
entry:
%vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
@@ -100,9 +103,12 @@ untaken: ; preds = %entry
merge: ; preds = %untaken, %taken
; CHECK-LABEL: merge:
; CHECK-NEXT: = phi
+; CHECK-NEXT: = phi
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
+; CHECK-NEXT: gc.relocate
+; CHECK-NEXT: bitcast
; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
%obj = phi <2 x i64 addrspace(1)*> [ %obja, %taken ], [ %objb, %untaken ]
call void @do_safepoint() [ "deopt"() ]
diff --git a/test/Transforms/RewriteStatepointsForGC/live-vector.ll b/test/Transforms/RewriteStatepointsForGC/live-vector.ll
deleted file mode 100644
index 2ec09d6acae6..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/live-vector.ll
+++ /dev/null
@@ -1,152 +0,0 @@
-; Test that we can correctly handle vectors of pointers in statepoint
-; rewriting. Currently, we scalarize, but that's an implementation detail.
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-split-vector-values -S | FileCheck %s
-
-; A non-vector relocation for comparison
-define i64 addrspace(1)* @test(i64 addrspace(1)* %obj) gc "statepoint-example" {
-; CHECK-LABEL: test
-; CHECK: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
-entry:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ret i64 addrspace(1)* %obj
-}
-
-; A base vector from a argument
-define <2 x i64 addrspace(1)*> @test2(<2 x i64 addrspace(1)*> %obj) gc "statepoint-example" {
-; CHECK-LABEL: test2
-; CHECK: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
-entry:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-; A base vector from a load
-define <2 x i64 addrspace(1)*> @test3(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" {
-; CHECK-LABEL: test3
-; CHECK: load
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
-entry:
- %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-declare i32 @fake_personality_function()
-
-; When a statepoint is an invoke rather than a call
-define <2 x i64 addrspace(1)*> @test4(<2 x i64 addrspace(1)*>* %ptr) gc "statepoint-example" personality i32 ()* @fake_personality_function {
-; CHECK-LABEL: test4
-; CHECK: load
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-entry:
- %obj = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- to label %normal_return unwind label %exceptional_return
-
-; CHECK-LABEL: normal_return:
-; CHECK: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %8
-normal_return: ; preds = %entry
- ret <2 x i64 addrspace(1)*> %obj
-
-; CHECK-LABEL: exceptional_return:
-; CHECK: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %14
-exceptional_return: ; preds = %entry
- %landing_pad4 = landingpad token
- cleanup
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-; Can we handle an insert element with a constant offset? This effectively
-; tests both the equal and inequal case since we have to relocate both indices
-; in the vector.
-define <2 x i64 addrspace(1)*> @test5(i64 addrspace(1)* %p)
- gc "statepoint-example" {
-; CHECK-LABEL: test5
-; CHECK: insertelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*> %7
-entry:
- %vec = insertelement <2 x i64 addrspace(1)*> undef, i64 addrspace(1)* %p, i32 0
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ret <2 x i64 addrspace(1)*> %vec
-}
-
-
-; A base vector from a load
-define <2 x i64 addrspace(1)*> @test6(i1 %cnd, <2 x i64 addrspace(1)*>* %ptr)
- gc "statepoint-example" {
-; CHECK-LABEL: test6
-; CHECK-LABEL: merge:
-; CHECK-NEXT: = phi
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: extractelement
-; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: gc.relocate
-; CHECK-NEXT: bitcast
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: insertelement
-; CHECK-NEXT: ret <2 x i64 addrspace(1)*>
-entry:
- br i1 %cnd, label %taken, label %untaken
-taken:
- %obja = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- br label %merge
-untaken:
- %objb = load <2 x i64 addrspace(1)*>, <2 x i64 addrspace(1)*>* %ptr
- br label %merge
-
-merge:
- %obj = phi <2 x i64 addrspace(1)*> [%obja, %taken], [%objb, %untaken]
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ret <2 x i64 addrspace(1)*> %obj
-}
-
-
-declare void @do_safepoint()
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
index 207003c17b5f..9c848e52faf9 100644
--- a/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
+++ b/test/Transforms/RewriteStatepointsForGC/liveness-basics.ll
@@ -2,73 +2,72 @@
; correct live values at statepoints
; RUN: opt -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S < %s | FileCheck %s
-
; Tests to make sure we consider %obj live in both the taken and untaken
; predeccessor of merge.
+
define i64 addrspace(1)* @test1(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test1
entry:
br i1 %cmp, label %taken, label %untaken
-taken:
+taken: ; preds = %entry
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
br label %merge
-untaken:
+untaken: ; preds = %entry
; CHECK-LABEL: untaken:
; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %obj.relocated1 = call coldcc i8 addrspace(1)*
+; CHECK-NEXT: %obj.relocated2 = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
br label %merge
-merge:
+merge: ; preds = %untaken, %taken
; CHECK-LABEL: merge:
-; CHECK-NEXT: %.0 = phi i64 addrspace(1)* [ %obj.relocated.casted, %taken ], [ %obj.relocated1.casted, %untaken ]
+; CHECK-NEXT: %.0 = phi i64 addrspace(1)* [ %obj.relocated.casted, %taken ], [ %obj.relocated2.casted, %untaken ]
; CHECK-NEXT: ret i64 addrspace(1)* %.0
+; A local kill should not effect liveness in predecessor block
ret i64 addrspace(1)* %obj
}
-; A local kill should not effect liveness in predecessor block
define i64 addrspace(1)* @test2(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
; CHECK-LABEL: @test2
entry:
; CHECK-LABEL: entry:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: br
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
br i1 %cmp, label %taken, label %untaken
-taken:
+taken: ; preds = %entry
; CHECK-LABEL: taken:
; CHECK-NEXT: %obj = load
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: ret i64 addrspace(1)* %obj.relocated.casted
-
+; A local kill should effect values live from a successor phi. Also, we
+; should only propagate liveness from a phi to the appropriate predecessors.
%obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
ret i64 addrspace(1)* %obj
-untaken:
+untaken: ; preds = %entry
ret i64 addrspace(1)* null
}
-; A local kill should effect values live from a successor phi. Also, we
-; should only propagate liveness from a phi to the appropriate predecessors.
define i64 addrspace(1)* @test3(i1 %cmp, i64 addrspace(1)** %loc) gc "statepoint-example" {
; CHECK-LABEL: @test3
entry:
br i1 %cmp, label %taken, label %untaken
-taken:
+taken: ; preds = %entry
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj = load
@@ -76,25 +75,25 @@ taken:
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
%obj = load i64 addrspace(1)*, i64 addrspace(1)** %loc
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
br label %merge
-untaken:
+untaken: ; preds = %entry
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: br label %merge
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+; A base pointer must be live if it is needed at a later statepoint,
+; even if the base pointer is otherwise unused.
+ call void @foo() [ "deopt"() ]
br label %merge
-merge:
+merge: ; preds = %untaken, %taken
%phi = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
ret i64 addrspace(1)* %phi
}
-; A base pointer must be live if it is needed at a later statepoint,
-; even if the base pointer is otherwise unused.
define i64 addrspace(1)* @test4(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test4
entry:
@@ -106,54 +105,55 @@ entry:
; CHECK-NEXT: %obj.relocated =
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.statepoint
-; CHECK-NEXT: %derived.relocated1 =
+; CHECK-NEXT: %derived.relocated2 =
; CHECK-NEXT: bitcast
+
; Note: It's legal to relocate obj again, but not strictly needed
-; CHECK-NEXT: %obj.relocated2 =
+; CHECK-NEXT: %obj.relocated3 =
; CHECK-NEXT: bitcast
-; CHECK-NEXT: ret i64 addrspace(1)* %derived.relocated1.casted
+; CHECK-NEXT: ret i64 addrspace(1)* %derived.relocated2.casted
;
+; Make sure that a phi def visited during iteration is considered a kill.
+; Also, liveness after base pointer analysis can change based on new uses,
+; not just new defs.
%derived = getelementptr i64, i64 addrspace(1)* %obj, i64 8
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
-
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
+ call void @foo() [ "deopt"() ]
ret i64 addrspace(1)* %derived
}
-declare void @consume(...) readonly
+declare void @consume(...) readonly "gc-leaf-function"
-; Make sure that a phi def visited during iteration is considered a kill.
-; Also, liveness after base pointer analysis can change based on new uses,
-; not just new defs.
define i64 addrspace(1)* @test5(i1 %cmp, i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test5
entry:
br i1 %cmp, label %taken, label %untaken
-taken:
+taken: ; preds = %entry
; CHECK-LABEL: taken:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
; CHECK-NEXT: bitcast
; CHECK-NEXT: br label %merge
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
br label %merge
-untaken:
+untaken: ; preds = %entry
; CHECK-LABEL: untaken:
; CHECK-NEXT: br label %merge
br label %merge
-merge:
+merge: ; preds = %untaken, %taken
; CHECK-LABEL: merge:
; CHECK-NEXT: %.0 = phi i64 addrspace(1)*
; CHECK-NEXT: %obj2a = phi
; CHECK-NEXT: @consume
; CHECK-NEXT: br label %final
- %obj2a = phi i64 addrspace(1)* [ %obj, %taken ], [null, %untaken]
+ %obj2a = phi i64 addrspace(1)* [ %obj, %taken ], [ null, %untaken ]
call void (...) @consume(i64 addrspace(1)* %obj2a)
br label %final
-final:
+
+final: ; preds = %merge
; CHECK-LABEL: final:
; CHECK-NEXT: @consume
; CHECK-NEXT: ret i64 addrspace(1)* %.0
@@ -163,4 +163,3 @@ final:
declare void @foo()
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll b/test/Transforms/RewriteStatepointsForGC/patchable-statepoints.ll
index 2303ac7ef515..924620a64678 100644
--- a/test/Transforms/PlaceSafepoints/patchable-statepoints.ll
+++ b/test/Transforms/RewriteStatepointsForGC/patchable-statepoints.ll
@@ -1,4 +1,4 @@
-; RUN: opt -place-safepoints -S < %s | FileCheck %s
+; RUN: opt -S -rewrite-statepoints-for-gc < %s | FileCheck %s
declare void @f()
declare i32 @personality_function()
diff --git a/test/Transforms/RewriteStatepointsForGC/preprocess.ll b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
index e1657497485b..df42eb14cfd6 100644
--- a/test/Transforms/RewriteStatepointsForGC/preprocess.ll
+++ b/test/Transforms/RewriteStatepointsForGC/preprocess.ll
@@ -1,65 +1,62 @@
; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
-declare void @consume(...)
-
; Test to make sure we destroy LCSSA's single entry phi nodes before
; running liveness
+
+declare void @consume(...) "gc-leaf-function"
+
define void @test6(i64 addrspace(1)* %obj) gc "statepoint-example" {
; CHECK-LABEL: @test6
entry:
br label %next
-next:
+next: ; preds = %entry
; CHECK-LABEL: next:
; CHECK-NEXT: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
; CHECK-NEXT: @consume(i64 addrspace(1)* %obj.relocated.casted)
+; Need to delete unreachable gc.statepoint call
%obj2 = phi i64 addrspace(1)* [ %obj, %entry ]
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
call void (...) @consume(i64 addrspace(1)* %obj2)
call void (...) @consume(i64 addrspace(1)* %obj)
ret void
}
-declare void @some_call(i64 addrspace(1)*)
-
-; Need to delete unreachable gc.statepoint call
define void @test7() gc "statepoint-example" {
; CHECK-LABEL: test7
; CHECK-NOT: gc.statepoint
+; Need to delete unreachable gc.statepoint invoke - tested seperately given
+; a correct implementation could only remove the instructions, not the block
ret void
-unreached:
- %obj = phi i64 addrspace(1)* [null, %unreached]
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+unreached: ; preds = %unreached
+ %obj = phi i64 addrspace(1)* [ null, %unreached ]
+ call void @foo() [ "deopt"() ]
call void (...) @consume(i64 addrspace(1)* %obj)
br label %unreached
}
-; Need to delete unreachable gc.statepoint invoke - tested seperately given
-; a correct implementation could only remove the instructions, not the block
define void @test8() gc "statepoint-example" personality i32 ()* undef {
; CHECK-LABEL: test8
; CHECK-NOT: gc.statepoint
+; Bound the last check-not
ret void
-unreached:
- invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+unreached: ; No predecessors!
+ invoke void @foo() [ "deopt"() ]
+; CHECK-LABEL: @foo
to label %normal_return unwind label %exceptional_return
-normal_return: ; preds = %entry
+normal_return: ; preds = %unreached
ret void
-exceptional_return: ; preds = %entry
+exceptional_return: ; preds = %unreached
%landing_pad4 = landingpad { i8*, i32 }
cleanup
ret void
}
declare void @foo()
-; Bound the last check-not
-; CHECK-LABEL: @foo
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll b/test/Transforms/RewriteStatepointsForGC/relocate-invoke-result.ll
index 688cf36168d4..b96ec3e3962d 100644
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/relocate-invoke-result.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocate-invoke-result.ll
@@ -1,5 +1,5 @@
-;; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+;; RUN: opt -rewrite-statepoints-for-gc -verify -S < %s | FileCheck %s
;; This test is to verify that RewriteStatepointsForGC correctly relocates values
;; defined by invoke instruction results.
diff --git a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll b/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
deleted file mode 100644
index d11441e9346f..000000000000
--- a/test/Transforms/RewriteStatepointsForGC/relocate_invoke_result.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-;; RUN: opt -rewrite-statepoints-for-gc -verify -S < %s | FileCheck %s
-
-;; This test is to verify that RewriteStatepointsForGC correctly relocates values
-;; defined by invoke instruction results.
-
-declare i64* addrspace(1)* @non_gc_call()
-
-declare void @gc_call()
-
-declare i32* @fake_personality_function()
-
-; Function Attrs: nounwind
-define i64* addrspace(1)* @test() gc "statepoint-example" personality i32* ()* @fake_personality_function {
-entry:
- %obj = invoke i64* addrspace(1)* @non_gc_call()
- to label %normal_dest unwind label %unwind_dest
-
-unwind_dest:
- %lpad = landingpad { i8*, i32 }
- cleanup
- resume { i8*, i32 } undef
-
-normal_dest:
-;; CHECK-LABEL: normal_dest:
-;; CHECK-NEXT: gc.statepoint
-;; CHECK-NEXT: %obj.relocated = call coldcc i8 addrspace(1)*
-;; CHECK-NEXT: bitcast
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @gc_call, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- ret i64* addrspace(1)* %obj
-}
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-
diff --git a/test/Transforms/RewriteStatepointsForGC/relocation.ll b/test/Transforms/RewriteStatepointsForGC/relocation.ll
index deea377c5a28..eaa826c52dc2 100644
--- a/test/Transforms/RewriteStatepointsForGC/relocation.ll
+++ b/test/Transforms/RewriteStatepointsForGC/relocation.ll
@@ -1,27 +1,28 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -spp-rematerialization-threshold=0 -S | FileCheck %s
declare void @foo()
-declare void @use(...)
+
+declare void @use(...) "gc-leaf-function"
define i64 addrspace(1)* @test1(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
-entry:
; CHECK-LABEL: @test1
; CHECK-DAG: %obj.relocated
; CHECK-DAG: %obj2.relocated
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+entry:
+ call void @foo() [ "deopt"() ]
br label %joint
-joint:
+joint: ; preds = %joint2, %entry
; CHECK-LABEL: joint:
; CHECK: %phi1 = phi i64 addrspace(1)* [ %obj.relocated.casted, %entry ], [ %obj3, %joint2 ]
%phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj3, %joint2 ]
br i1 %condition, label %use, label %joint2
-use:
+use: ; preds = %joint
br label %joint2
-joint2:
+joint2: ; preds = %use, %joint
; CHECK-LABEL: joint2:
; CHECK: %phi2 = phi i64 addrspace(1)* [ %obj.relocated.casted, %use ], [ %obj2.relocated.casted, %joint ]
; CHECK: %obj3 = getelementptr i64, i64 addrspace(1)* %obj2.relocated.casted, i32 1
@@ -30,11 +31,11 @@ joint2:
br label %joint
}
-declare i64 addrspace(1)* @generate_obj()
+declare i64 addrspace(1)* @generate_obj() "gc-leaf-function"
-declare void @consume_obj(i64 addrspace(1)*)
+declare void @consume_obj(i64 addrspace(1)*) "gc-leaf-function"
-declare i1 @rt()
+declare i1 @rt() "gc-leaf-function"
define void @test2() gc "statepoint-example" {
; CHECK-LABEL: @test2
@@ -43,60 +44,61 @@ entry:
%obj = getelementptr i64, i64 addrspace(1)* %obj_init, i32 42
br label %loop
-loop:
+loop: ; preds = %loop.backedge, %entry
; CHECK: loop:
; CHECK-DAG: [ %obj_init.relocated.casted, %loop.backedge ]
; CHECK-DAG: [ %obj_init, %entry ]
; CHECK-DAG: [ %obj.relocated.casted, %loop.backedge ]
; CHECK-DAG: [ %obj, %entry ]
- %index = phi i32 [ 0, %entry ], [ %index.inc, %loop.backedge ]
; CHECK-NOT: %location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
+ %index = phi i32 [ 0, %entry ], [ %index.inc, %loop.backedge ]
%location = getelementptr i64, i64 addrspace(1)* %obj, i32 %index
call void @consume_obj(i64 addrspace(1)* %location)
%index.inc = add i32 %index, 1
%condition = call i1 @rt()
br i1 %condition, label %loop_x, label %loop_y
-loop_x:
+loop_x: ; preds = %loop
br label %loop.backedge
-loop.backedge:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
+loop.backedge: ; preds = %loop_y, %loop_x
+ call void @do_safepoint() [ "deopt"() ]
br label %loop
-loop_y:
+loop_y: ; preds = %loop
br label %loop.backedge
}
-declare void @some_call(i8 addrspace(1)*)
+declare void @some_call(i8 addrspace(1)*) "gc-leaf-function"
define void @relocate_merge(i1 %cnd, i8 addrspace(1)* %arg) gc "statepoint-example" {
; CHECK-LABEL: @relocate_merge
+
bci_0:
br i1 %cnd, label %if_branch, label %else_branch
-if_branch:
+if_branch: ; preds = %bci_0
; CHECK-LABEL: if_branch:
; CHECK: gc.statepoint
; CHECK: gc.relocate
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+ call void @foo() [ "deopt"() ]
br label %join
-else_branch:
+else_branch: ; preds = %bci_0
; CHECK-LABEL: else_branch:
; CHECK: gc.statepoint
; CHECK: gc.relocate
- %safepoint_token1 = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 0)
+; We need to end up with a single relocation phi updated from both paths
+ call void @foo() [ "deopt"() ]
br label %join
-join:
-; We need to end up with a single relocation phi updated from both paths
+join: ; preds = %else_branch, %if_branch
; CHECK-LABEL: join:
; CHECK: phi i8 addrspace(1)*
; CHECK-DAG: [ %arg.relocated, %if_branch ]
-; CHECK-DAG: [ %arg.relocated4, %else_branch ]
+; CHECK-DAG: [ %arg.relocated2, %else_branch ]
; CHECK-NOT: phi
- call void (i8 addrspace(1)*) @some_call(i8 addrspace(1)* %arg)
+ call void @some_call(i8 addrspace(1)* %arg)
ret void
}
@@ -104,14 +106,14 @@ join:
; This is basically just making sure that statepoints aren't accidentally
; treated specially.
define void @test3(i64 addrspace(1)* %obj) gc "statepoint-example" {
-entry:
; CHECK-LABEL: @test3
; CHECK: gc.statepoint
; CHECK-NEXT: gc.relocate
; CHECK-NEXT: bitcast
; CHECK-NEXT: gc.statepoint
- %safepoint_token = call token (i64, i32, void (i64)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64 0, i32 0, void (i64)* undef, i32 1, i32 0, i64 undef, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
- %safepoint_token1 = call token (i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64 0, i32 0, i32 (i64 addrspace(1)*)* undef, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+entry:
+ call void undef(i64 undef) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
+ %0 = call i32 undef(i64 addrspace(1)* %obj) [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
ret void
}
@@ -122,30 +124,28 @@ define void @test4() gc "statepoint-example" {
; CHECK: gc.statepoint
; CHECK: gc.result
; CHECK: gc.statepoint
-; CHECK: gc.relocate
-; CHECK: @use(i8 addrspace(1)* %res.relocated)
- %safepoint_token2 = tail call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
- %res = call i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token %safepoint_token2)
- call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
- call void (...) @use(i8 addrspace(1)* %res)
+; CHECK: [[RELOCATED:%[^ ]+]] = call {{.*}}gc.relocate
+; CHECK: @use(i8 addrspace(1)* [[RELOCATED]])
+ %1 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ %2 = call i8 addrspace(1)* undef() [ "deopt"() ]
+ call void (...) @use(i8 addrspace(1)* %1)
unreachable
}
-
; Test updating a phi where not all inputs are live to begin with
define void @test5(i8 addrspace(1)* %arg) gc "statepoint-example" {
; CHECK-LABEL: test5
entry:
- call token (i64, i32, i8 addrspace(1)* ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i8f(i64 0, i32 0, i8 addrspace(1)* ()* undef, i32 0, i32 0, i32 0, i32 0)
+ %0 = call i8 addrspace(1)* undef() [ "deopt"() ]
switch i32 undef, label %kill [
i32 10, label %merge
i32 13, label %merge
]
-kill:
+kill: ; preds = %entry
br label %merge
-merge:
+merge: ; preds = %kill, %entry, %entry
; CHECK: merge:
; CHECK: %test = phi i8 addrspace(1)
; CHECK-DAG: [ null, %kill ]
@@ -156,24 +156,22 @@ merge:
unreachable
}
-
; Check to make sure we handle values live over an entry statepoint
-define void @test6(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2,
- i8 addrspace(1)* %arg3) gc "statepoint-example" {
+define void @test6(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) gc "statepoint-example" {
; CHECK-LABEL: @test6
entry:
br i1 undef, label %gc.safepoint_poll.exit2, label %do_safepoint
-do_safepoint:
+do_safepoint: ; preds = %entry
; CHECK-LABEL: do_safepoint:
; CHECK: gc.statepoint
; CHECK: arg1.relocated =
; CHECK: arg2.relocated =
; CHECK: arg3.relocated =
- call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 3, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3)
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i8 addrspace(1)* %arg3) ]
br label %gc.safepoint_poll.exit2
-gc.safepoint_poll.exit2:
+gc.safepoint_poll.exit2: ; preds = %do_safepoint, %entry
; CHECK-LABEL: gc.safepoint_poll.exit2:
; CHECK: phi i8 addrspace(1)*
; CHECK-DAG: [ %arg3, %entry ]
@@ -190,44 +188,42 @@ gc.safepoint_poll.exit2:
; Check relocation in a loop nest where a relocation happens in the outer
; but not the inner loop
-define void @test_outer_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2,
- i1 %cmp) gc "statepoint-example" {
+define void @test_outer_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
; CHECK-LABEL: @test_outer_loop
+
bci_0:
br label %outer-loop
-outer-loop:
+outer-loop: ; preds = %outer-inc, %bci_0
; CHECK-LABEL: outer-loop:
; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
br label %inner-loop
-inner-loop:
+inner-loop: ; preds = %inner-loop, %outer-loop
br i1 %cmp, label %inner-loop, label %outer-inc
-outer-inc:
+outer-inc: ; preds = %inner-loop
; CHECK-LABEL: outer-inc:
; CHECK: %arg1.relocated
; CHECK: %arg2.relocated
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
br label %outer-loop
}
; Check that both inner and outer loops get phis when relocation is in
; inner loop
-define void @test_inner_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2,
- i1 %cmp) gc "statepoint-example" {
+define void @test_inner_loop(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2, i1 %cmp) gc "statepoint-example" {
; CHECK-LABEL: @test_inner_loop
+
bci_0:
br label %outer-loop
-outer-loop:
+outer-loop: ; preds = %outer-inc, %bci_0
; CHECK-LABEL: outer-loop:
; CHECK: phi i8 addrspace(1)* [ %arg2, %bci_0 ], [ %arg2.relocated, %outer-inc ]
; CHECK: phi i8 addrspace(1)* [ %arg1, %bci_0 ], [ %arg1.relocated, %outer-inc ]
br label %inner-loop
-
-inner-loop:
; CHECK-LABEL: inner-loop
; CHECK: phi i8 addrspace(1)*
; CHECK-DAG: %outer-loop ]
@@ -238,42 +234,40 @@ inner-loop:
; CHECK: gc.statepoint
; CHECK: %arg1.relocated
; CHECK: %arg2.relocated
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 2, i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2)
+
+inner-loop: ; preds = %inner-loop, %outer-loop
+ call void @foo() [ "deopt"(i8 addrspace(1)* %arg1, i8 addrspace(1)* %arg2) ]
br i1 %cmp, label %inner-loop, label %outer-inc
-outer-inc:
+outer-inc: ; preds = %inner-loop
; CHECK-LABEL: outer-inc:
+; This test shows why updating just those uses of the original value being
+; relocated dominated by the inserted relocation is not always sufficient.
br label %outer-loop
}
-
-; This test shows why updating just those uses of the original value being
-; relocated dominated by the inserted relocation is not always sufficient.
define i64 addrspace(1)* @test7(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj2, i1 %condition) gc "statepoint-example" {
; CHECK-LABEL: @test7
entry:
br i1 %condition, label %branch2, label %join
-branch2:
+branch2: ; preds = %entry
br i1 %condition, label %callbb, label %join2
-callbb:
- %safepoint_token = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @foo, i32 0, i32 0, i32 0, i32 5, i32 0, i32 -1, i32 0, i32 0, i32 0)
+callbb: ; preds = %branch2
+ call void @foo() [ "deopt"(i32 0, i32 -1, i32 0, i32 0, i32 0) ]
br label %join
-join:
+join: ; preds = %callbb, %entry
; CHECK-LABEL: join:
; CHECK: phi i64 addrspace(1)* [ %obj.relocated.casted, %callbb ], [ %obj, %entry ]
; CHECK: phi i64 addrspace(1)*
; CHECK-DAG: [ %obj, %entry ]
; CHECK-DAG: [ %obj2.relocated.casted, %callbb ]
- ; This is a phi outside the dominator region of the new defs inserted by
- ; the safepoint, BUT we can't stop the search here or we miss the second
- ; phi below.
%phi1 = phi i64 addrspace(1)* [ %obj, %entry ], [ %obj2, %callbb ]
br label %join2
-join2:
+join2: ; preds = %join, %branch2
; CHECK-LABEL: join2:
; CHECK: phi2 = phi i64 addrspace(1)*
; CHECK-DAG: %join ]
@@ -282,14 +276,4 @@ join2:
ret i64 addrspace(1)* %phi2
}
-
declare void @do_safepoint()
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
-declare token @llvm.experimental.gc.statepoint.p0f_p1i8f(i64, i32, i8 addrspace(1)* ()*, i32, i32, ...)
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidi64f(i64, i32, void (i64)*, i32, i32, ...)
-declare token @llvm.experimental.gc.statepoint.p0f_i32p1i64f(i64, i32, i32 (i64 addrspace(1)*)*, i32, i32, ...)
-declare i8 addrspace(1)* @llvm.experimental.gc.result.p1i8(token) #3
-
-
-
diff --git a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
index 445ab7bd768d..c4ec2ce5bf77 100644
--- a/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
+++ b/test/Transforms/RewriteStatepointsForGC/rematerialize-derived-pointers.ll
@@ -1,256 +1,261 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -S 2>&1 | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
+
+
+declare void @use_obj16(i16 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj32(i32 addrspace(1)*) "gc-leaf-function"
+declare void @use_obj64(i64 addrspace(1)*) "gc-leaf-function"
-declare void @use_obj16(i16 addrspace(1)*)
-declare void @use_obj32(i32 addrspace(1)*)
-declare void @use_obj64(i64 addrspace(1)*)
declare void @do_safepoint()
-define void @"test_gep_const"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_gep_const(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_gep_const
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
- ; CHECK: getelementptr i32, i32 addrspace(1)* %base, i32 15
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
- ; CHECK: bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
- ; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 15
+; CHECK: getelementptr i32, i32 addrspace(1)* %base, i32 15
+ call void @do_safepoint() [ "deopt"() ]
+; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 7, i32 7)
+; CHECK: bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 15
call void @use_obj32(i32 addrspace(1)* %base)
call void @use_obj32(i32 addrspace(1)* %ptr)
ret void
}
-define void @"test_gep_idx"(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+define void @test_gep_idx(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
; CHECK-LABEL: test_gep_idx
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %base, i32 %idx
- ; CHECK: getelementptr
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
- ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
- ; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 %idx
+; CHECK: getelementptr
+ call void @do_safepoint() [ "deopt"() ]
+; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 7, i32 7)
+; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+; CHECK: getelementptr i32, i32 addrspace(1)* %base.relocated.casted, i32 %idx
call void @use_obj32(i32 addrspace(1)* %base)
call void @use_obj32(i32 addrspace(1)* %ptr)
ret void
}
-define void @"test_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_bitcast(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_bitcast
entry:
%ptr = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
- ; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
- ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
- ; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
+; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 7, i32 7)
+; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
call void @use_obj32(i32 addrspace(1)* %base)
call void @use_obj64(i64 addrspace(1)* %ptr)
ret void
}
-define void @"test_bitcast_bitcast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_bitcast_bitcast(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_bitcast_bitcast
entry:
%ptr1 = bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
%ptr2 = bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
- ; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
- ; CHECK: bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
- ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
- ; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
- ; CHECK: bitcast i64 addrspace(1)* %ptr1.remat to i16 addrspace(1)*
+; CHECK: bitcast i32 addrspace(1)* %base to i64 addrspace(1)*
+; CHECK: bitcast i64 addrspace(1)* %ptr1 to i16 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+
+; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 7, i32 7)
+; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
+; CHECK: bitcast i32 addrspace(1)* %base.relocated.casted to i64 addrspace(1)*
+; CHECK: bitcast i64 addrspace(1)* %ptr1.remat to i16 addrspace(1)*
call void @use_obj32(i32 addrspace(1)* %base)
call void @use_obj16(i16 addrspace(1)* %ptr2)
ret void
}
-define void @"test_addrspacecast_addrspacecast"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_addrspacecast_addrspacecast(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_addrspacecast_addrspacecast
entry:
%ptr1 = addrspacecast i32 addrspace(1)* %base to i32*
%ptr2 = addrspacecast i32* %ptr1 to i32 addrspace(1)*
- ; CHECK: addrspacecast i32 addrspace(1)* %base to i32*
- ; CHECK: addrspacecast i32* %ptr1 to i32 addrspace(1)*
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 7)
- ; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
- ; CHECK: %ptr2.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %sp, i32 7, i32 8)
- ; CHECK: %ptr2.relocated.casted = bitcast i8 addrspace(1)* %ptr2.relocated to i32 addrspace(1)*
+; CHECK: addrspacecast i32 addrspace(1)* %base to i32*
+; CHECK: addrspacecast i32* %ptr1 to i32 addrspace(1)*
+ call void @do_safepoint() [ "deopt"() ]
+
+; CHECK: %ptr2.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 8, i32 7)
+; CHECK: %ptr2.relocated.casted = bitcast i8 addrspace(1)* %ptr2.relocated to i32 addrspace(1)*
+; CHECK: %base.relocated = call coldcc i8 addrspace(1)* @llvm.experimental.gc.relocate.p1i8(token %statepoint_token, i32 8, i32 8)
+; CHECK: %base.relocated.casted = bitcast i8 addrspace(1)* %base.relocated to i32 addrspace(1)*
call void @use_obj32(i32 addrspace(1)* %base)
call void @use_obj32(i32 addrspace(1)* %ptr2)
ret void
}
-define void @"test_bitcast_gep"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_bitcast_gep(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_bitcast_gep
entry:
%ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- ; CHECK: getelementptr
+; CHECK: getelementptr
+; CHECK: bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
- ; CHECK: bitcast
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: getelementptr
- ; CHECK: bitcast
+ call void @do_safepoint() [ "deopt"() ]
+
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: bitcast
call void @use_obj32(i32 addrspace(1)* %base)
call void @use_obj64(i64 addrspace(1)* %ptr.cast)
ret void
}
-define void @"test_intersecting_chains"(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
+define void @test_intersecting_chains(i32 addrspace(1)* %base, i32 %idx) gc "statepoint-example" {
; CHECK-LABEL: test_intersecting_chains
entry:
%ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- ; CHECK: getelementptr
+; CHECK: getelementptr
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
- ; CHECK: bitcast
+; CHECK: bitcast
%ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
- ; CHECK: bitcast
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: getelementptr
- ; CHECK: bitcast
- ; CHECK: getelementptr
- ; CHECK: bitcast
+; CHECK: bitcast
+ call void @do_safepoint() [ "deopt"() ]
+
+; CHECK: getelementptr
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: bitcast
call void @use_obj64(i64 addrspace(1)* %ptr.cast)
call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
ret void
}
-define void @"test_cost_threshold"(i32 addrspace(1)* %base, i32 %idx1, i32 %idx2, i32 %idx3) gc "statepoint-example" {
+define void @test_cost_threshold(i32 addrspace(1)* %base, i32 %idx1, i32 %idx2, i32 %idx3) gc "statepoint-example" {
; CHECK-LABEL: test_cost_threshold
entry:
%ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- ; CHECK: getelementptr
+; CHECK: getelementptr
%ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 %idx1
- ; CHECK: getelementptr
+; CHECK: getelementptr
%ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 %idx2
- ; CHECK: getelementptr
+; CHECK: getelementptr
%ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 %idx3
- ; CHECK: getelementptr
+; CHECK: getelementptr
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep4 to i64 addrspace(1)*
- ; CHECK: bitcast
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: gc.relocate
- ; CHECK: bitcast
+ call void @do_safepoint() [ "deopt"() ]
+
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: gc.relocate
+; CHECK: bitcast
call void @use_obj64(i64 addrspace(1)* %ptr.cast)
ret void
}
-define void @"test_two_derived"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_two_derived(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_two_derived
entry:
%ptr = getelementptr i32, i32 addrspace(1)* %base, i32 15
%ptr2 = getelementptr i32, i32 addrspace(1)* %base, i32 12
- ; CHECK: getelementptr
- ; CHECK: getelementptr
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: getelementptr
- ; CHECK: getelementptr
+; CHECK: getelementptr
+; CHECK: getelementptr
+ call void @do_safepoint() [ "deopt"() ]
+
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: getelementptr
call void @use_obj32(i32 addrspace(1)* %ptr)
call void @use_obj32(i32 addrspace(1)* %ptr2)
ret void
}
-define void @"test_gep_smallint_array"([3 x i32] addrspace(1)* %base) gc "statepoint-example" {
+define void @test_gep_smallint_array([3 x i32] addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_gep_smallint_array
entry:
%ptr = getelementptr [3 x i32], [3 x i32] addrspace(1)* %base, i32 0, i32 2
- ; CHECK: getelementptr
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: getelementptr
+; CHECK: getelementptr
+ call void @do_safepoint() [ "deopt"() ]
+
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: getelementptr
call void @use_obj32(i32 addrspace(1)* %ptr)
ret void
}
declare i32 @fake_personality_function()
-define void @"test_invoke"(i32 addrspace(1)* %base) gc "statepoint-example" personality i32 ()* @fake_personality_function {
+define void @test_invoke(i32 addrspace(1)* %base) gc "statepoint-example" personality i32 ()* @fake_personality_function {
; CHECK-LABEL: test_invoke
entry:
%ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- ; CHECK: getelementptr
+; CHECK: getelementptr
%ptr.cast = bitcast i32 addrspace(1)* %ptr.gep to i64 addrspace(1)*
- ; CHECK: bitcast
+; CHECK: bitcast
%ptr.cast2 = bitcast i32 addrspace(1)* %ptr.gep to i16 addrspace(1)*
- ; CHECK: bitcast
- %sp = invoke token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- to label %normal unwind label %exception
+; CHECK: bitcast
+ invoke void @do_safepoint() [ "deopt"() ]
+ to label %normal unwind label %exception
normal:
- ; CHECK-LABEL: normal:
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: getelementptr
- ; CHECK: bitcast
- ; CHECK: getelementptr
- ; CHECK: bitcast
+; CHECK: normal:
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: bitcast
call void @use_obj64(i64 addrspace(1)* %ptr.cast)
call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
ret void
exception:
- ; CHECK-LABEL: exception:
+; CHECK: exception:
%landing_pad4 = landingpad token
cleanup
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: getelementptr
- ; CHECK: bitcast
- ; CHECK: getelementptr
- ; CHECK: bitcast
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: bitcast
+; CHECK: getelementptr
+; CHECK: bitcast
call void @use_obj64(i64 addrspace(1)* %ptr.cast)
call void @use_obj16(i16 addrspace(1)* %ptr.cast2)
ret void
}
-define void @"test_loop"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_loop(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_loop
entry:
%ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- ; CHECK: getelementptr
+; CHECK: getelementptr
br label %loop
-loop:
- ; CHECK: phi i32 addrspace(1)* [ %ptr.gep, %entry ], [ %ptr.gep.remat, %loop ]
- ; CHECK: phi i32 addrspace(1)* [ %base, %entry ], [ %base.relocated.casted, %loop ]
+loop: ; preds = %loop, %entry
+; CHECK: phi i32 addrspace(1)* [ %ptr.gep, %entry ], [ %ptr.gep.remat, %loop ]
+; CHECK: phi i32 addrspace(1)* [ %base, %entry ], [ %base.relocated.casted, %loop ]
call void @use_obj32(i32 addrspace(1)* %ptr.gep)
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: getelementptr
+ call void @do_safepoint() [ "deopt"() ]
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: getelementptr
br label %loop
}
-define void @"test_too_long"(i32 addrspace(1)* %base) gc "statepoint-example" {
+define void @test_too_long(i32 addrspace(1)* %base) gc "statepoint-example" {
; CHECK-LABEL: test_too_long
entry:
- %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
- %ptr.gep1 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 15
- %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep1, i32 15
- %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 15
- %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 15
- %ptr.gep5 = getelementptr i32, i32 addrspace(1)* %ptr.gep4, i32 15
- %ptr.gep6 = getelementptr i32, i32 addrspace(1)* %ptr.gep5, i32 15
- %ptr.gep7 = getelementptr i32, i32 addrspace(1)* %ptr.gep6, i32 15
- %ptr.gep8 = getelementptr i32, i32 addrspace(1)* %ptr.gep7, i32 15
- %ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
+ %ptr.gep = getelementptr i32, i32 addrspace(1)* %base, i32 15
+ %ptr.gep1 = getelementptr i32, i32 addrspace(1)* %ptr.gep, i32 15
+ %ptr.gep2 = getelementptr i32, i32 addrspace(1)* %ptr.gep1, i32 15
+ %ptr.gep3 = getelementptr i32, i32 addrspace(1)* %ptr.gep2, i32 15
+ %ptr.gep4 = getelementptr i32, i32 addrspace(1)* %ptr.gep3, i32 15
+ %ptr.gep5 = getelementptr i32, i32 addrspace(1)* %ptr.gep4, i32 15
+ %ptr.gep6 = getelementptr i32, i32 addrspace(1)* %ptr.gep5, i32 15
+ %ptr.gep7 = getelementptr i32, i32 addrspace(1)* %ptr.gep6, i32 15
+ %ptr.gep8 = getelementptr i32, i32 addrspace(1)* %ptr.gep7, i32 15
+ %ptr.gep9 = getelementptr i32, i32 addrspace(1)* %ptr.gep8, i32 15
%ptr.gep10 = getelementptr i32, i32 addrspace(1)* %ptr.gep9, i32 15
%ptr.gep11 = getelementptr i32, i32 addrspace(1)* %ptr.gep10, i32 15
- %sp = call token (i64, i32, void ()*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_isVoidf(i64 0, i32 0, void ()* @do_safepoint, i32 0, i32 0, i32 0, i32 0)
- ; CHECK: gc.relocate
- ; CHECK: bitcast
- ; CHECK: gc.relocate
- ; CHECK: bitcast
+ call void @do_safepoint() [ "deopt"() ]
+; CHECK: gc.relocate
+; CHECK: bitcast
+; CHECK: gc.relocate
+; CHECK: bitcast
call void @use_obj32(i32 addrspace(1)* %ptr.gep11)
ret void
}
-
-
-declare token @llvm.experimental.gc.statepoint.p0f_isVoidf(i64, i32, void ()*, i32, i32, ...)
diff --git a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll b/test/Transforms/RewriteStatepointsForGC/rewrite-invoke.ll
index e1d0140c1dcd..91d4fa303b1b 100644
--- a/test/Transforms/RewriteStatepointsForGC/deopt-bundles/rewrite-invoke.ll
+++ b/test/Transforms/RewriteStatepointsForGC/rewrite-invoke.ll
@@ -1,4 +1,4 @@
-; RUN: opt -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -verify -S < %s | FileCheck %s
+; RUN: opt -rewrite-statepoints-for-gc -verify -S < %s | FileCheck %s
declare i8 addrspace(1)* @gc_call()
diff --git a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll b/test/Transforms/RewriteStatepointsForGC/statepoint-calling-conventions.ll
index eaefefa7ad1d..f40ff8f3a7d1 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-calling-conventions.ll
+++ b/test/Transforms/RewriteStatepointsForGC/statepoint-calling-conventions.ll
@@ -1,4 +1,4 @@
-; RUN: opt -place-safepoints -S < %s | FileCheck %s
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
; Ensure that the gc.statepoint calls / invokes we generate carry over
; the right calling conventions.
@@ -6,7 +6,7 @@
define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
; CHECK-LABEL: @test_invoke_format(
; CHECK-LABEL: entry:
-; CHECK: invoke coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0
entry:
%ret_val = invoke coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
to label %normal_return unwind label %exceptional_return
@@ -15,7 +15,7 @@ normal_return:
ret i64 addrspace(1)* %ret_val
exceptional_return:
- %landing_pad4 = landingpad {i8*, i32}
+ %landing_pad4 = landingpad token
cleanup
ret i64 addrspace(1)* %obj1
}
@@ -23,7 +23,7 @@ exceptional_return:
define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
; CHECK-LABEL: @test_call_format(
; CHECK-LABEL: entry:
-; CHECK: call coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call coldcc token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0
entry:
%ret_val = call coldcc i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/RewriteStatepointsForGC/statepoint-coreclr.ll b/test/Transforms/RewriteStatepointsForGC/statepoint-coreclr.ll
new file mode 100644
index 000000000000..a19196eab5cf
--- /dev/null
+++ b/test/Transforms/RewriteStatepointsForGC/statepoint-coreclr.ll
@@ -0,0 +1,31 @@
+; RUN: opt < %s -S -rewrite-statepoints-for-gc | FileCheck %s
+
+; Basic test to make sure that safepoints are placed
+; for CoreCLR GC
+
+declare void @foo()
+
+define void @test_simple_call() gc "coreclr" {
+; CHECK-LABEL: test_simple_call
+entry:
+ br label %other
+other:
+; CHECK-LABEL: other
+; CHECK: statepoint
+; CHECK-NOT: gc.result
+ call void @foo()
+ ret void
+}
+
+; This function is inlined when inserting a poll. To avoid recursive
+; issues, make sure we don't place safepoints in it.
+declare void @do_safepoint()
+define void @gc.safepoint_poll() {
+; CHECK-LABEL: gc.safepoint_poll
+; CHECK-LABEL: entry
+; CHECK-NEXT: do_safepoint
+; CHECK-NEXT: ret void
+entry:
+ call void @do_safepoint()
+ ret void
+}
diff --git a/test/Transforms/PlaceSafepoints/statepoint-format.ll b/test/Transforms/RewriteStatepointsForGC/statepoint-format.ll
index c3712a3ace00..029864e3efa0 100644
--- a/test/Transforms/PlaceSafepoints/statepoint-format.ll
+++ b/test/Transforms/RewriteStatepointsForGC/statepoint-format.ll
@@ -1,4 +1,4 @@
-; RUN: opt -place-safepoints -S < %s | FileCheck %s
+; RUN: opt -rewrite-statepoints-for-gc -S < %s | FileCheck %s
; Ensure that the gc.statepoint calls / invokes we generate have the
; set of arguments we expect it to have.
@@ -6,7 +6,7 @@
define i64 addrspace(1)* @test_invoke_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" personality i32 ()* @personality {
; CHECK-LABEL: @test_invoke_format(
; CHECK-LABEL: entry:
-; CHECK: invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: invoke token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0, i64 addrspace(1)* %obj1, i64 addrspace(1)* %obj)
entry:
%ret_val = invoke i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
to label %normal_return unwind label %exceptional_return
@@ -15,7 +15,7 @@ normal_return:
ret i64 addrspace(1)* %ret_val
exceptional_return:
- %landing_pad4 = landingpad {i8*, i32}
+ %landing_pad4 = landingpad token
cleanup
ret i64 addrspace(1)* %obj1
}
@@ -23,7 +23,7 @@ exceptional_return:
define i64 addrspace(1)* @test_call_format(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1) gc "statepoint-example" {
; CHECK-LABEL: @test_call_format(
; CHECK-LABEL: entry:
-; CHECK: call token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0)
+; CHECK: call token (i64, i32, i64 addrspace(1)* (i64 addrspace(1)*)*, i32, i32, ...) @llvm.experimental.gc.statepoint.p0f_p1i64p1i64f(i64 2882400000, i32 0, i64 addrspace(1)* (i64 addrspace(1)*)* @callee, i32 1, i32 0, i64 addrspace(1)* %obj, i32 0, i32 0, i64 addrspace(1)* %obj)
entry:
%ret_val = call i64 addrspace(1)* @callee(i64 addrspace(1)* %obj)
ret i64 addrspace(1)* %ret_val
diff --git a/test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll b/test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll
index d3d3c5a8d1ab..f35a3668a6b1 100644
--- a/test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll
+++ b/test/Transforms/RewriteStatepointsForGC/two-invokes-one-landingpad.ll
@@ -1,12 +1,12 @@
-; RUN: opt %s -rewrite-statepoints-for-gc -rs4gc-use-deopt-bundles -S | FileCheck %s
+; RUN: opt < %s -rewrite-statepoints-for-gc -S | FileCheck %s
declare void @some_call(i64 addrspace(1)*)
-declare i32 @"dummy_personality_function"()
+declare i32 @dummy_personality_function()
define i64 addrspace(1)* @test(i64 addrspace(1)* %obj, i64 addrspace(1)* %obj1)
gc "statepoint-example"
- personality i32 ()* @"dummy_personality_function" {
+ personality i32 ()* @dummy_personality_function {
entry:
invoke void @some_call(i64 addrspace(1)* %obj) [ "deopt"() ]
to label %second_invoke unwind label %exceptional_return
diff --git a/test/Transforms/SCCP/bitcast.ll b/test/Transforms/SCCP/bitcast.ll
new file mode 100644
index 000000000000..285823512e57
--- /dev/null
+++ b/test/Transforms/SCCP/bitcast.ll
@@ -0,0 +1,9 @@
+; RUN: opt < %s -ipsccp -S | FileCheck %s
+
+define i128 @vector_to_int_cast() {
+ %A = bitcast <4 x i32> <i32 1073741824, i32 1073741824, i32 1073741824, i32 1073741824> to i128
+ ret i128 %A
+}
+
+; CHECK: define i128 @vector_to_int_cast(
+; CHECK-NEXT: ret i128 85070591750041656499021422275829170176
diff --git a/test/Transforms/SCCP/comdat-ipo.ll b/test/Transforms/SCCP/comdat-ipo.ll
new file mode 100644
index 000000000000..618075fd5e3f
--- /dev/null
+++ b/test/Transforms/SCCP/comdat-ipo.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -ipsccp -S | FileCheck %s
+
+; See PR26774
+
+define i32 @baz() {
+ ret i32 10
+}
+
+; We can const-prop @baz's return value *into* @foo, but cannot
+; constprop @foo's return value into bar.
+
+define linkonce_odr i32 @foo() {
+; CHECK-LABEL: @foo(
+; CHECK-NEXT: %val = call i32 @baz()
+; CHECK-NEXT: ret i32 10
+
+ %val = call i32 @baz()
+ ret i32 %val
+}
+
+define i32 @bar() {
+; CHECK-LABEL: @bar(
+; CHECK-NEXT: %val = call i32 @foo()
+; CHECK-NEXT: ret i32 %val
+
+ %val = call i32 @foo()
+ ret i32 %val
+}
diff --git a/test/Transforms/SCCP/constant-struct.ll b/test/Transforms/SCCP/constant-struct.ll
new file mode 100644
index 000000000000..2b33d5691678
--- /dev/null
+++ b/test/Transforms/SCCP/constant-struct.ll
@@ -0,0 +1,72 @@
+; Test that constant structs are folded.
+; RUN: opt %s -sccp -S | FileCheck %s
+
+define internal {i64} @struct1() {
+ %a = insertvalue {i64} undef, i64 24, 0
+ ret {i64} %a
+}
+
+; CHECK: define internal { i64 } @struct1() {
+; CHECK-NEXT: ret { i64 } { i64 24 }
+; CHECK-NEXT: }
+
+define internal {i64, i64} @struct2() {
+ %a = insertvalue {i64, i64} undef, i64 24, 0
+ ret {i64, i64} %a
+}
+
+; CHECK: define internal { i64, i64 } @struct2() {
+; CHECK-NEXT: ret { i64, i64 } { i64 24, i64 undef }
+; CHECK-NEXT: }
+
+define internal {i64, i64, i64} @struct3(i64 %x) {
+ %a = insertvalue {i64, i64, i64} undef, i64 24, 0
+ %b = insertvalue {i64, i64, i64} %a, i64 36, 1
+ %c = insertvalue {i64, i64, i64} %b, i64 %x, 2
+ ret {i64, i64, i64} %c
+}
+
+; CHECK: define internal { i64, i64, i64 } @struct3(i64 %x) {
+; CHECK-NEXT: %c = insertvalue { i64, i64, i64 } { i64 24, i64 36, i64 undef }, i64 %x, 2
+; CHECK-NEXT: ret { i64, i64, i64 } %c
+; CHECK-NEXT: }
+
+; Test(s) for overdefined values.
+define internal {i64, i32} @struct4(i32 %x) {
+ %a = insertvalue {i64, i32} {i64 12, i32 24}, i32 %x, 1
+ ret {i64, i32} %a
+}
+
+; CHECK: define internal { i64, i32 } @struct4(i32 %x) {
+; CHECK-NEXT: %a = insertvalue { i64, i32 } { i64 12, i32 24 }, i32 %x, 1
+; CHECK-NEXT: ret { i64, i32 } %a
+; CHECK-NEXT: }
+
+define internal {i32} @struct5(i32 %x) {
+ %a = insertvalue {i32} undef, i32 %x, 0
+ ret {i32} %a
+}
+
+; CHECK: define internal { i32 } @struct5(i32 %x) {
+; CHECK-NEXT: %a = insertvalue { i32 } undef, i32 %x, 0
+; CHECK-NEXT: ret { i32 } %a
+; CHECK-NEXT: }
+
+
+define internal {i32} @struct6({i32} %x) {
+ %a = insertvalue {i32} %x, i32 12, 0
+ ret {i32} %a
+}
+
+; CHECK: define internal { i32 } @struct6({ i32 } %x) {
+; CHECK-NEXT: ret { i32 } { i32 12 }
+; CHECK-NEXT: }
+
+define internal {i16} @struct7() {
+ %a = insertvalue {i16} {i16 4}, i16 7, 0
+ ret {i16} %a
+}
+
+; CHECK: define internal { i16 } @struct7() {
+; CHECK-NEXT: ret { i16 } { i16 7 }
+; CHECK-NEXT: }
diff --git a/test/Transforms/SCCP/global-alias-constprop.ll b/test/Transforms/SCCP/global-alias-constprop.ll
index be7e083e6a67..8eac3ac18059 100644
--- a/test/Transforms/SCCP/global-alias-constprop.ll
+++ b/test/Transforms/SCCP/global-alias-constprop.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sccp -S | FileCheck %s
+; RUN: opt < %s -passes=sccp -S | FileCheck %s
@0 = private unnamed_addr constant [2 x i32] [i32 -1, i32 1]
@"\01??_7A@@6B@" = unnamed_addr alias i32, getelementptr inbounds ([2 x i32], [2 x i32]* @0, i32 0, i32 1)
diff --git a/test/Transforms/SCCP/ipsccp-basic.ll b/test/Transforms/SCCP/ipsccp-basic.ll
index bf37134545ed..917aaa02acac 100644
--- a/test/Transforms/SCCP/ipsccp-basic.ll
+++ b/test/Transforms/SCCP/ipsccp-basic.ll
@@ -82,6 +82,10 @@ define internal {i64,i64} @test4a() {
ret {i64,i64} %b
}
+; CHECK-LABEL: define internal { i64, i64 } @test4a(
+; CHECK-NEXT: ret { i64, i64 } { i64 5, i64 4 }
+; CHECK-NEXT: }
+
define i64 @test4b() personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
%a = invoke {i64,i64} @test4a()
to label %A unwind label %B
@@ -130,7 +134,7 @@ B:
; CHECK: define i64 @test5b()
; CHECK: A:
-; CHECK-NEXT: %c = call i64 @test5c({ i64, i64 } %a)
+; CHECK-NEXT: %c = call i64 @test5c({ i64, i64 } { i64 5, i64 4 })
; CHECK-NEXT: ret i64 5
define internal i64 @test5c({i64,i64} %a) {
@@ -163,8 +167,7 @@ define internal %T @test7a(i32 %A) {
%mrv1 = insertvalue %T %mrv0, i32 %A, 1
ret %T %mrv1
; CHECK-LABEL: @test7a(
-; CHECK-NEXT: %mrv0 = insertvalue %T undef, i32 18, 0
-; CHECK-NEXT: %mrv1 = insertvalue %T %mrv0, i32 17, 1
+; CHECK-NEXT: ret %T { i32 18, i32 17 }
}
define i32 @test7b() {
@@ -208,6 +211,12 @@ entry:
ret void
}
+; CHECK-LABEL: define void @test9(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %local_foo = alloca {}
+; CHECK-NEXT: store {} zeroinitializer, {}* %local_foo
+; CHECK-NEXT: ret void
+
declare i32 @__gxx_personality_v0(...)
;;======================== test10
diff --git a/test/Transforms/SCCP/pr27712.ll b/test/Transforms/SCCP/pr27712.ll
new file mode 100644
index 000000000000..b41c3981d53a
--- /dev/null
+++ b/test/Transforms/SCCP/pr27712.ll
@@ -0,0 +1,30 @@
+; RUN: opt -sccp -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() {
+entry:
+ br label %lbl_1154
+
+lbl_1154:
+ %b0.0 = phi i32 [ -119, %entry ], [ 0, %lbl_1154 ]
+ %cmp11 = icmp slt i32 %b0.0, 0
+ %shl.op = shl i32 33554432, %b0.0
+ %cmp1445 = icmp ult i32 %shl.op, 33554432
+ %cmp14 = or i1 %cmp11, %cmp1445
+ br i1 %cmp14, label %lbl_1154, label %if.end19
+
+if.end19:
+ br i1 %cmp11, label %if.then22, label %cleanup26
+
+if.then22:
+ tail call void @abort()
+ unreachable
+
+cleanup26:
+ ret i32 %shl.op
+}
+; CHECK-LABEL: define i32 @main(
+; CHECK-NOT: ret i32 undef
+
+declare void @abort()
diff --git a/test/Transforms/SCCP/ub-shift.ll b/test/Transforms/SCCP/ub-shift.ll
new file mode 100644
index 000000000000..3fb2d97457d9
--- /dev/null
+++ b/test/Transforms/SCCP/ub-shift.ll
@@ -0,0 +1,69 @@
+; RUN: opt < %s -sccp -S | FileCheck %s
+
+; CHECK-LABEL: shift_undef_64
+define void @shift_undef_64(i64* %p) {
+ %r1 = lshr i64 -1, 4294967296 ; 2^32
+ ; CHECK: store i64 undef
+ store i64 %r1, i64* %p
+
+ %r2 = ashr i64 -1, 4294967297 ; 2^32 + 1
+ ; CHECK: store i64 undef
+ store i64 %r2, i64* %p
+
+ %r3 = shl i64 -1, 4294967298 ; 2^32 + 2
+ ; CHECK: store i64 undef
+ store i64 %r3, i64* %p
+
+ ret void
+}
+
+; CHECK-LABEL: shift_undef_65
+define void @shift_undef_65(i65* %p) {
+ %r1 = lshr i65 2, 18446744073709551617
+ ; CHECK: store i65 undef
+ store i65 %r1, i65* %p
+
+ %r2 = ashr i65 4, 18446744073709551617
+ ; CHECK: store i65 undef
+ store i65 %r2, i65* %p
+
+ %r3 = shl i65 1, 18446744073709551617
+ ; CHECK: store i65 undef
+ store i65 %r3, i65* %p
+
+ ret void
+}
+
+; CHECK-LABEL: shift_undef_256
+define void @shift_undef_256(i256* %p) {
+ %r1 = lshr i256 2, 18446744073709551617
+ ; CHECK: store i256 undef
+ store i256 %r1, i256* %p
+
+ %r2 = ashr i256 4, 18446744073709551618
+ ; CHECK: store i256 undef
+ store i256 %r2, i256* %p
+
+ %r3 = shl i256 1, 18446744073709551619
+ ; CHECK: store i256 undef
+ store i256 %r3, i256* %p
+
+ ret void
+}
+
+; CHECK-LABEL: shift_undef_511
+define void @shift_undef_511(i511* %p) {
+ %r1 = lshr i511 -1, 1208925819614629174706276 ; 2^80 + 100
+ ; CHECK: store i511 undef
+ store i511 %r1, i511* %p
+
+ %r2 = ashr i511 -2, 1208925819614629174706200
+ ; CHECK: store i511 undef
+ store i511 %r2, i511* %p
+
+ %r3 = shl i511 -3, 1208925819614629174706180
+ ; CHECK: store i511 undef
+ store i511 %r3, i511* %p
+
+ ret void
+}
diff --git a/test/Transforms/SCCP/undef-resolve.ll b/test/Transforms/SCCP/undef-resolve.ll
index 2b40183c2cc5..fcfe3f573ea2 100644
--- a/test/Transforms/SCCP/undef-resolve.ll
+++ b/test/Transforms/SCCP/undef-resolve.ll
@@ -170,3 +170,13 @@ entry:
; CHECK-LABEL: @test10(
; CHECK: ret i64 undef
}
+
+@GV = external global i32
+
+define i32 @test11(i1 %tobool) {
+entry:
+ %shr4 = ashr i32 undef, zext (i1 icmp eq (i32* bitcast (i32 (i1)* @test11 to i32*), i32* @GV) to i32)
+ ret i32 %shr4
+; CHECK-LABEL: @test11(
+; CHECK: ret i32 -1
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll b/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
new file mode 100644
index 000000000000..d74e26ec20a3
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/gather-reduce.ll
@@ -0,0 +1,258 @@
+; RUN: opt -S -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=GENERIC
+; RUN: opt -S -mcpu=kryo -slp-vectorizer -dce -instcombine < %s | FileCheck %s --check-prefix=KRYO
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; These tests check that we vectorize the index calculations in the
+; gather-reduce pattern shown below. We check cases having i32 and i64
+; subtraction.
+;
+; int gather_reduce_8x16(short *a, short *b, short *g, int n) {
+; int sum = 0;
+; for (int i = 0; i < n ; ++i) {
+; sum += g[*a++ - b[0]]; sum += g[*a++ - b[1]];
+; sum += g[*a++ - b[2]]; sum += g[*a++ - b[3]];
+; sum += g[*a++ - b[4]]; sum += g[*a++ - b[5]];
+; sum += g[*a++ - b[6]]; sum += g[*a++ - b[7]];
+; }
+; return sum;
+; }
+
+; GENERIC-LABEL: @gather_reduce_8x16_i32
+;
+; GENERIC: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
+; GENERIC: zext <8 x i16> [[L]] to <8 x i32>
+; GENERIC: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
+; GENERIC: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
+; GENERIC: sext i32 [[X]] to i64
+;
+define i32 @gather_reduce_8x16_i32(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
+entry:
+ %cmp.99 = icmp sgt i32 %n, 0
+ br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
+ ret i32 %sum.0.lcssa
+
+for.body:
+ %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
+ %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
+ %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
+ %0 = load i16, i16* %a.addr.0101, align 2
+ %conv = zext i16 %0 to i32
+ %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
+ %1 = load i16, i16* %b, align 2
+ %conv2 = zext i16 %1 to i32
+ %sub = sub nsw i32 %conv, %conv2
+ %arrayidx = getelementptr inbounds i16, i16* %g, i32 %sub
+ %2 = load i16, i16* %arrayidx, align 2
+ %conv3 = zext i16 %2 to i32
+ %add = add nsw i32 %conv3, %sum.0102
+ %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
+ %3 = load i16, i16* %incdec.ptr, align 2
+ %conv5 = zext i16 %3 to i32
+ %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
+ %4 = load i16, i16* %incdec.ptr1, align 2
+ %conv7 = zext i16 %4 to i32
+ %sub8 = sub nsw i32 %conv5, %conv7
+ %arrayidx10 = getelementptr inbounds i16, i16* %g, i32 %sub8
+ %5 = load i16, i16* %arrayidx10, align 2
+ %conv11 = zext i16 %5 to i32
+ %add12 = add nsw i32 %add, %conv11
+ %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
+ %6 = load i16, i16* %incdec.ptr4, align 2
+ %conv14 = zext i16 %6 to i32
+ %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
+ %7 = load i16, i16* %incdec.ptr6, align 2
+ %conv16 = zext i16 %7 to i32
+ %sub17 = sub nsw i32 %conv14, %conv16
+ %arrayidx19 = getelementptr inbounds i16, i16* %g, i32 %sub17
+ %8 = load i16, i16* %arrayidx19, align 2
+ %conv20 = zext i16 %8 to i32
+ %add21 = add nsw i32 %add12, %conv20
+ %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
+ %9 = load i16, i16* %incdec.ptr13, align 2
+ %conv23 = zext i16 %9 to i32
+ %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
+ %10 = load i16, i16* %incdec.ptr15, align 2
+ %conv25 = zext i16 %10 to i32
+ %sub26 = sub nsw i32 %conv23, %conv25
+ %arrayidx28 = getelementptr inbounds i16, i16* %g, i32 %sub26
+ %11 = load i16, i16* %arrayidx28, align 2
+ %conv29 = zext i16 %11 to i32
+ %add30 = add nsw i32 %add21, %conv29
+ %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
+ %12 = load i16, i16* %incdec.ptr22, align 2
+ %conv32 = zext i16 %12 to i32
+ %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
+ %13 = load i16, i16* %incdec.ptr24, align 2
+ %conv34 = zext i16 %13 to i32
+ %sub35 = sub nsw i32 %conv32, %conv34
+ %arrayidx37 = getelementptr inbounds i16, i16* %g, i32 %sub35
+ %14 = load i16, i16* %arrayidx37, align 2
+ %conv38 = zext i16 %14 to i32
+ %add39 = add nsw i32 %add30, %conv38
+ %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
+ %15 = load i16, i16* %incdec.ptr31, align 2
+ %conv41 = zext i16 %15 to i32
+ %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
+ %16 = load i16, i16* %incdec.ptr33, align 2
+ %conv43 = zext i16 %16 to i32
+ %sub44 = sub nsw i32 %conv41, %conv43
+ %arrayidx46 = getelementptr inbounds i16, i16* %g, i32 %sub44
+ %17 = load i16, i16* %arrayidx46, align 2
+ %conv47 = zext i16 %17 to i32
+ %add48 = add nsw i32 %add39, %conv47
+ %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
+ %18 = load i16, i16* %incdec.ptr40, align 2
+ %conv50 = zext i16 %18 to i32
+ %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
+ %19 = load i16, i16* %incdec.ptr42, align 2
+ %conv52 = zext i16 %19 to i32
+ %sub53 = sub nsw i32 %conv50, %conv52
+ %arrayidx55 = getelementptr inbounds i16, i16* %g, i32 %sub53
+ %20 = load i16, i16* %arrayidx55, align 2
+ %conv56 = zext i16 %20 to i32
+ %add57 = add nsw i32 %add48, %conv56
+ %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
+ %21 = load i16, i16* %incdec.ptr49, align 2
+ %conv59 = zext i16 %21 to i32
+ %22 = load i16, i16* %incdec.ptr51, align 2
+ %conv61 = zext i16 %22 to i32
+ %sub62 = sub nsw i32 %conv59, %conv61
+ %arrayidx64 = getelementptr inbounds i16, i16* %g, i32 %sub62
+ %23 = load i16, i16* %arrayidx64, align 2
+ %conv65 = zext i16 %23 to i32
+ %add66 = add nsw i32 %add57, %conv65
+ %inc = add nuw nsw i32 %i.0103, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; KRYO-LABEL: @gather_reduce_8x16_i64
+;
+; KRYO: [[L:%[a-zA-Z0-9.]+]] = load <8 x i16>
+; KRYO: zext <8 x i16> [[L]] to <8 x i32>
+; KRYO: [[S:%[a-zA-Z0-9.]+]] = sub nsw <8 x i32>
+; KRYO: [[X:%[a-zA-Z0-9.]+]] = extractelement <8 x i32> [[S]]
+; KRYO: sext i32 [[X]] to i64
+;
+define i32 @gather_reduce_8x16_i64(i16* nocapture readonly %a, i16* nocapture readonly %b, i16* nocapture readonly %g, i32 %n) {
+entry:
+ %cmp.99 = icmp sgt i32 %n, 0
+ br i1 %cmp.99, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add66, %for.cond.cleanup.loopexit ]
+ ret i32 %sum.0.lcssa
+
+for.body:
+ %i.0103 = phi i32 [ %inc, %for.body ], [ 0, %for.body.preheader ]
+ %sum.0102 = phi i32 [ %add66, %for.body ], [ 0, %for.body.preheader ]
+ %a.addr.0101 = phi i16* [ %incdec.ptr58, %for.body ], [ %a, %for.body.preheader ]
+ %incdec.ptr = getelementptr inbounds i16, i16* %a.addr.0101, i64 1
+ %0 = load i16, i16* %a.addr.0101, align 2
+ %conv = zext i16 %0 to i64
+ %incdec.ptr1 = getelementptr inbounds i16, i16* %b, i64 1
+ %1 = load i16, i16* %b, align 2
+ %conv2 = zext i16 %1 to i64
+ %sub = sub nsw i64 %conv, %conv2
+ %arrayidx = getelementptr inbounds i16, i16* %g, i64 %sub
+ %2 = load i16, i16* %arrayidx, align 2
+ %conv3 = zext i16 %2 to i32
+ %add = add nsw i32 %conv3, %sum.0102
+ %incdec.ptr4 = getelementptr inbounds i16, i16* %a.addr.0101, i64 2
+ %3 = load i16, i16* %incdec.ptr, align 2
+ %conv5 = zext i16 %3 to i64
+ %incdec.ptr6 = getelementptr inbounds i16, i16* %b, i64 2
+ %4 = load i16, i16* %incdec.ptr1, align 2
+ %conv7 = zext i16 %4 to i64
+ %sub8 = sub nsw i64 %conv5, %conv7
+ %arrayidx10 = getelementptr inbounds i16, i16* %g, i64 %sub8
+ %5 = load i16, i16* %arrayidx10, align 2
+ %conv11 = zext i16 %5 to i32
+ %add12 = add nsw i32 %add, %conv11
+ %incdec.ptr13 = getelementptr inbounds i16, i16* %a.addr.0101, i64 3
+ %6 = load i16, i16* %incdec.ptr4, align 2
+ %conv14 = zext i16 %6 to i64
+ %incdec.ptr15 = getelementptr inbounds i16, i16* %b, i64 3
+ %7 = load i16, i16* %incdec.ptr6, align 2
+ %conv16 = zext i16 %7 to i64
+ %sub17 = sub nsw i64 %conv14, %conv16
+ %arrayidx19 = getelementptr inbounds i16, i16* %g, i64 %sub17
+ %8 = load i16, i16* %arrayidx19, align 2
+ %conv20 = zext i16 %8 to i32
+ %add21 = add nsw i32 %add12, %conv20
+ %incdec.ptr22 = getelementptr inbounds i16, i16* %a.addr.0101, i64 4
+ %9 = load i16, i16* %incdec.ptr13, align 2
+ %conv23 = zext i16 %9 to i64
+ %incdec.ptr24 = getelementptr inbounds i16, i16* %b, i64 4
+ %10 = load i16, i16* %incdec.ptr15, align 2
+ %conv25 = zext i16 %10 to i64
+ %sub26 = sub nsw i64 %conv23, %conv25
+ %arrayidx28 = getelementptr inbounds i16, i16* %g, i64 %sub26
+ %11 = load i16, i16* %arrayidx28, align 2
+ %conv29 = zext i16 %11 to i32
+ %add30 = add nsw i32 %add21, %conv29
+ %incdec.ptr31 = getelementptr inbounds i16, i16* %a.addr.0101, i64 5
+ %12 = load i16, i16* %incdec.ptr22, align 2
+ %conv32 = zext i16 %12 to i64
+ %incdec.ptr33 = getelementptr inbounds i16, i16* %b, i64 5
+ %13 = load i16, i16* %incdec.ptr24, align 2
+ %conv34 = zext i16 %13 to i64
+ %sub35 = sub nsw i64 %conv32, %conv34
+ %arrayidx37 = getelementptr inbounds i16, i16* %g, i64 %sub35
+ %14 = load i16, i16* %arrayidx37, align 2
+ %conv38 = zext i16 %14 to i32
+ %add39 = add nsw i32 %add30, %conv38
+ %incdec.ptr40 = getelementptr inbounds i16, i16* %a.addr.0101, i64 6
+ %15 = load i16, i16* %incdec.ptr31, align 2
+ %conv41 = zext i16 %15 to i64
+ %incdec.ptr42 = getelementptr inbounds i16, i16* %b, i64 6
+ %16 = load i16, i16* %incdec.ptr33, align 2
+ %conv43 = zext i16 %16 to i64
+ %sub44 = sub nsw i64 %conv41, %conv43
+ %arrayidx46 = getelementptr inbounds i16, i16* %g, i64 %sub44
+ %17 = load i16, i16* %arrayidx46, align 2
+ %conv47 = zext i16 %17 to i32
+ %add48 = add nsw i32 %add39, %conv47
+ %incdec.ptr49 = getelementptr inbounds i16, i16* %a.addr.0101, i64 7
+ %18 = load i16, i16* %incdec.ptr40, align 2
+ %conv50 = zext i16 %18 to i64
+ %incdec.ptr51 = getelementptr inbounds i16, i16* %b, i64 7
+ %19 = load i16, i16* %incdec.ptr42, align 2
+ %conv52 = zext i16 %19 to i64
+ %sub53 = sub nsw i64 %conv50, %conv52
+ %arrayidx55 = getelementptr inbounds i16, i16* %g, i64 %sub53
+ %20 = load i16, i16* %arrayidx55, align 2
+ %conv56 = zext i16 %20 to i32
+ %add57 = add nsw i32 %add48, %conv56
+ %incdec.ptr58 = getelementptr inbounds i16, i16* %a.addr.0101, i64 8
+ %21 = load i16, i16* %incdec.ptr49, align 2
+ %conv59 = zext i16 %21 to i64
+ %22 = load i16, i16* %incdec.ptr51, align 2
+ %conv61 = zext i16 %22 to i64
+ %sub62 = sub nsw i64 %conv59, %conv61
+ %arrayidx64 = getelementptr inbounds i16, i16* %g, i64 %sub62
+ %23 = load i16, i16* %arrayidx64, align 2
+ %conv65 = zext i16 %23 to i32
+ %add66 = add nsw i32 %add57, %conv65
+ %inc = add nuw nsw i32 %i.0103, 1
+ %exitcond = icmp eq i32 %inc, %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
new file mode 100644
index 000000000000..e9b71963530c
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/getelementptr.ll
@@ -0,0 +1,111 @@
+; RUN: opt -S -slp-vectorizer -slp-threshold=-18 -dce -instcombine < %s | FileCheck %s
+
+target datalayout = "e-m:e-i32:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; These tests check that we remove from consideration pairs of seed
+; getelementptrs when they are known to have a constant difference. Such pairs
+; are likely not good candidates for vectorization since one can be computed
+; from the other. We use an unprofitable threshold to force vectorization.
+;
+; int getelementptr(int *g, int n, int w, int x, int y, int z) {
+; int sum = 0;
+; for (int i = 0; i < n ; ++i) {
+; sum += g[2*i + w]; sum += g[2*i + x];
+; sum += g[2*i + y]; sum += g[2*i + z];
+; }
+; return sum;
+; }
+;
+
+; CHECK-LABEL: @getelementptr_4x32
+;
+; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <4 x i32>
+; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <4 x i32> [[A]]
+; CHECK: sext i32 [[X]] to i64
+;
+define i32 @getelementptr_4x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+entry:
+ %cmp31 = icmp sgt i32 %n, 0
+ br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
+ ret i32 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
+ %t4 = shl nsw i32 %indvars.iv, 1
+ %t5 = add nsw i32 %t4, 0
+ %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
+ %t6 = load i32, i32* %arrayidx, align 4
+ %add1 = add nsw i32 %t6, %sum.032
+ %t7 = add nsw i32 %t4, %x
+ %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
+ %t8 = load i32, i32* %arrayidx5, align 4
+ %add6 = add nsw i32 %add1, %t8
+ %t9 = add nsw i32 %t4, %y
+ %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
+ %t10 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %add6, %t10
+ %t11 = add nsw i32 %t4, %z
+ %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
+ %t12 = load i32, i32* %arrayidx15, align 4
+ %add16 = add nsw i32 %add11, %t12
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next , %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
+
+; CHECK-LABEL: @getelementptr_2x32
+;
+; CHECK: [[A:%[a-zA-Z0-9.]+]] = add nsw <2 x i32>
+; CHECK: [[X:%[a-zA-Z0-9.]+]] = extractelement <2 x i32> [[A]]
+; CHECK: sext i32 [[X]] to i64
+;
+define i32 @getelementptr_2x32(i32* nocapture readonly %g, i32 %n, i32 %x, i32 %y, i32 %z) {
+entry:
+ %cmp31 = icmp sgt i32 %n, 0
+ br i1 %cmp31, label %for.body.preheader, label %for.cond.cleanup
+
+for.body.preheader:
+ br label %for.body
+
+for.cond.cleanup.loopexit:
+ br label %for.cond.cleanup
+
+for.cond.cleanup:
+ %sum.0.lcssa = phi i32 [ 0, %entry ], [ %add16, %for.cond.cleanup.loopexit ]
+ ret i32 %sum.0.lcssa
+
+for.body:
+ %indvars.iv = phi i32 [ 0, %for.body.preheader ], [ %indvars.iv.next, %for.body ]
+ %sum.032 = phi i32 [ 0, %for.body.preheader ], [ %add16, %for.body ]
+ %t4 = shl nsw i32 %indvars.iv, 1
+ %t5 = add nsw i32 %t4, 0
+ %arrayidx = getelementptr inbounds i32, i32* %g, i32 %t5
+ %t6 = load i32, i32* %arrayidx, align 4
+ %add1 = add nsw i32 %t6, %sum.032
+ %t7 = add nsw i32 %t4, 1
+ %arrayidx5 = getelementptr inbounds i32, i32* %g, i32 %t7
+ %t8 = load i32, i32* %arrayidx5, align 4
+ %add6 = add nsw i32 %add1, %t8
+ %t9 = add nsw i32 %t4, %y
+ %arrayidx10 = getelementptr inbounds i32, i32* %g, i32 %t9
+ %t10 = load i32, i32* %arrayidx10, align 4
+ %add11 = add nsw i32 %add6, %t10
+ %t11 = add nsw i32 %t4, %z
+ %arrayidx15 = getelementptr inbounds i32, i32* %g, i32 %t11
+ %t12 = load i32, i32* %arrayidx15, align 4
+ %add16 = add nsw i32 %add11, %t12
+ %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
+ %exitcond = icmp eq i32 %indvars.iv.next , %n
+ br i1 %exitcond, label %for.cond.cleanup.loopexit, label %for.body
+}
diff --git a/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll b/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
new file mode 100644
index 000000000000..7e1d67095664
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/AArch64/minimum-sizes.ll
@@ -0,0 +1,55 @@
+; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
+target triple = "aarch64--linux-gnu"
+
+; This test ensures that we do not regress due to PR26364. The vectorizer
+; should not compute a smaller size for %k.13 since it is in a use-def cycle
+; and cannot be demoted.
+;
+; CHECK-LABEL: @PR26364
+; CHECK: %k.13 = phi i32
+;
+define fastcc void @PR26364() {
+entry:
+ br i1 undef, label %for.end11, label %for.cond4
+
+for.cond4:
+ %k.13 = phi i32 [ undef, %entry ], [ %k.3, %for.cond4 ]
+ %e.02 = phi i32 [ 1, %entry ], [ 0, %for.cond4 ]
+ %e.1 = select i1 undef, i32 %e.02, i32 0
+ %k.3 = select i1 undef, i32 %k.13, i32 undef
+ br label %for.cond4
+
+for.end11:
+ ret void
+}
+
+; This test ensures that we do not regress due to PR26629. We must look at
+; every root in the vectorizable tree when computing minimum sizes since one
+; root may require fewer bits than another.
+;
+; CHECK-LABEL: @PR26629
+; CHECK-NOT: {{.*}} and <2 x i72>
+;
+define void @PR26629(i32* %c) {
+entry:
+ br i1 undef, label %for.ph, label %for.end
+
+for.ph:
+ %0 = load i32, i32* %c, align 4
+ br label %for.body
+
+for.body:
+ %d = phi i72 [ 576507472957710340, %for.ph ], [ %bf.set17, %for.body ]
+ %sub = sub i32 %0, undef
+ %bf.clear13 = and i72 %d, -576460748008464384
+ %1 = zext i32 %sub to i72
+ %bf.value15 = and i72 %1, 8191
+ %bf.clear16 = or i72 %bf.value15, %bf.clear13
+ %bf.set17 = or i72 %bf.clear16, undef
+ br label %for.body
+
+for.end:
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg b/test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg
new file mode 100644
index 000000000000..091332439b18
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/PowerPC/lit.local.cfg
@@ -0,0 +1,2 @@
+if not 'PowerPC' in config.root.targets:
+ config.unsupported = True
diff --git a/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll b/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
new file mode 100644
index 000000000000..dabb3380ef1c
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/PowerPC/pr27897.ll
@@ -0,0 +1,29 @@
+; RUN: opt -S -mtriple=powerpc64-linux-gnu -mcpu=pwr8 -mattr=+vsx -slp-vectorizer < %s | FileCheck %s
+
+%struct.A = type { i8*, i8* }
+
+define i64 @foo(%struct.A* nocapture readonly %this) {
+entry:
+ %end.i = getelementptr inbounds %struct.A, %struct.A* %this, i64 0, i32 1
+ %0 = bitcast i8** %end.i to i64*
+ %1 = load i64, i64* %0, align 8
+ %2 = bitcast %struct.A* %this to i64*
+ %3 = load i64, i64* %2, align 8
+ %sub.ptr.sub.i = sub i64 %1, %3
+ %cmp = icmp sgt i64 %sub.ptr.sub.i, 9
+ br i1 %cmp, label %return, label %lor.lhs.false
+
+lor.lhs.false:
+ %4 = inttoptr i64 %3 to i8*
+ %5 = inttoptr i64 %1 to i8*
+ %cmp2 = icmp ugt i8* %5, %4
+ %. = select i1 %cmp2, i64 2, i64 -1
+ ret i64 %.
+
+return:
+ ret i64 2
+}
+
+; CHECK: load i64
+; CHECK-NOT: load <2 x i64>
+; CHECK-NOT: extractelement
diff --git a/test/Transforms/SLPVectorizer/X86/bitreverse.ll b/test/Transforms/SLPVectorizer/X86/bitreverse.ll
new file mode 100644
index 000000000000..c6d65bbe6840
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/bitreverse.ll
@@ -0,0 +1,741 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver4 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=XOP
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@src64 = common global [4 x i64] zeroinitializer, align 32
+@dst64 = common global [4 x i64] zeroinitializer, align 32
+@src32 = common global [8 x i32] zeroinitializer, align 32
+@dst32 = common global [8 x i32] zeroinitializer, align 32
+@src16 = common global [16 x i16] zeroinitializer, align 32
+@dst16 = common global [16 x i16] zeroinitializer, align 32
+@src8 = common global [32 x i8] zeroinitializer, align 32
+@dst8 = common global [32 x i8] zeroinitializer, align 32
+
+declare i64 @llvm.bitreverse.i64(i64)
+declare i32 @llvm.bitreverse.i32(i32)
+declare i16 @llvm.bitreverse.i16(i16)
+declare i8 @llvm.bitreverse.i8(i8)
+
+define void @bitreverse_2i64() #0 {
+; SSE-LABEL: @bitreverse_2i64(
+; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD1]])
+; SSE-NEXT: store i64 [[BITREVERSE0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; SSE-NEXT: store i64 [[BITREVERSE1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bitreverse_2i64(
+; AVX-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; AVX-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; AVX-NEXT: [[BITREVERSE0:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD0]])
+; AVX-NEXT: [[BITREVERSE1:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD1]])
+; AVX-NEXT: store i64 [[BITREVERSE0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; AVX-NEXT: store i64 [[BITREVERSE1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; AVX-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_2i64(
+; XOP-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; XOP-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bitreverse.v2i64(<2 x i64> [[TMP1]])
+; XOP-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; XOP-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+ %bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0)
+ %bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1)
+ store i64 %bitreverse0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+ store i64 %bitreverse1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @bitreverse_4i64() #0 {
+; SSE-LABEL: @bitreverse_4i64(
+; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD1]])
+; SSE-NEXT: [[BITREVERSE2:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD2]])
+; SSE-NEXT: [[BITREVERSE3:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD3]])
+; SSE-NEXT: store i64 [[BITREVERSE0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; SSE-NEXT: store i64 [[BITREVERSE1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; SSE-NEXT: store i64 [[BITREVERSE2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; SSE-NEXT: store i64 [[BITREVERSE3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE-NEXT: ret void
+;
+; AVX1-LABEL: @bitreverse_4i64(
+; AVX1-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; AVX1-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; AVX1-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; AVX1-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; AVX1-NEXT: [[BITREVERSE0:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD0]])
+; AVX1-NEXT: [[BITREVERSE1:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD1]])
+; AVX1-NEXT: [[BITREVERSE2:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD2]])
+; AVX1-NEXT: [[BITREVERSE3:%.*]] = call i64 @llvm.bitreverse.i64(i64 [[LD3]])
+; AVX1-NEXT: store i64 [[BITREVERSE0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; AVX1-NEXT: store i64 [[BITREVERSE1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; AVX1-NEXT: store i64 [[BITREVERSE2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; AVX1-NEXT: store i64 [[BITREVERSE3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @bitreverse_4i64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX2-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]])
+; AVX2-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX2-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_4i64(
+; XOP-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; XOP-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bitreverse.v4i64(<4 x i64> [[TMP1]])
+; XOP-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; XOP-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+ %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+ %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+ %bitreverse0 = call i64 @llvm.bitreverse.i64(i64 %ld0)
+ %bitreverse1 = call i64 @llvm.bitreverse.i64(i64 %ld1)
+ %bitreverse2 = call i64 @llvm.bitreverse.i64(i64 %ld2)
+ %bitreverse3 = call i64 @llvm.bitreverse.i64(i64 %ld3)
+ store i64 %bitreverse0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+ store i64 %bitreverse1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+ store i64 %bitreverse2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+ store i64 %bitreverse3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+ ret void
+}
+
+define void @bitreverse_4i32() #0 {
+; SSE-LABEL: @bitreverse_4i32(
+; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD1]])
+; SSE-NEXT: [[BITREVERSE2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD2]])
+; SSE-NEXT: [[BITREVERSE3:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD3]])
+; SSE-NEXT: store i32 [[BITREVERSE0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+; SSE-NEXT: store i32 [[BITREVERSE1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+; SSE-NEXT: store i32 [[BITREVERSE2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+; SSE-NEXT: store i32 [[BITREVERSE3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bitreverse_4i32(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]])
+; AVX-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; AVX-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_4i32(
+; XOP-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; XOP-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bitreverse.v4i32(<4 x i32> [[TMP1]])
+; XOP-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; XOP-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+ %bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0)
+ %bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1)
+ %bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2)
+ %bitreverse3 = call i32 @llvm.bitreverse.i32(i32 %ld3)
+ store i32 %bitreverse0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+ store i32 %bitreverse1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+ store i32 %bitreverse2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+ store i32 %bitreverse3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @bitreverse_8i32() #0 {
+; SSE-LABEL: @bitreverse_8i32(
+; SSE-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+; SSE-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+; SSE-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+; SSE-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+; SSE-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+; SSE-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+; SSE-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+; SSE-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD1]])
+; SSE-NEXT: [[BITREVERSE2:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD2]])
+; SSE-NEXT: [[BITREVERSE3:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD3]])
+; SSE-NEXT: [[BITREVERSE4:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD4]])
+; SSE-NEXT: [[BITREVERSE5:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD5]])
+; SSE-NEXT: [[BITREVERSE6:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD6]])
+; SSE-NEXT: [[BITREVERSE7:%.*]] = call i32 @llvm.bitreverse.i32(i32 [[LD7]])
+; SSE-NEXT: store i32 [[BITREVERSE0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+; SSE-NEXT: store i32 [[BITREVERSE1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+; SSE-NEXT: store i32 [[BITREVERSE2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+; SSE-NEXT: store i32 [[BITREVERSE3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+; SSE-NEXT: store i32 [[BITREVERSE4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+; SSE-NEXT: store i32 [[BITREVERSE5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+; SSE-NEXT: store i32 [[BITREVERSE6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+; SSE-NEXT: store i32 [[BITREVERSE7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bitreverse_8i32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]])
+; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_8i32(
+; XOP-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; XOP-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bitreverse.v8i32(<8 x i32> [[TMP1]])
+; XOP-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; XOP-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+ %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+ %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+ %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+ %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+ %bitreverse0 = call i32 @llvm.bitreverse.i32(i32 %ld0)
+ %bitreverse1 = call i32 @llvm.bitreverse.i32(i32 %ld1)
+ %bitreverse2 = call i32 @llvm.bitreverse.i32(i32 %ld2)
+ %bitreverse3 = call i32 @llvm.bitreverse.i32(i32 %ld3)
+ %bitreverse4 = call i32 @llvm.bitreverse.i32(i32 %ld4)
+ %bitreverse5 = call i32 @llvm.bitreverse.i32(i32 %ld5)
+ %bitreverse6 = call i32 @llvm.bitreverse.i32(i32 %ld6)
+ %bitreverse7 = call i32 @llvm.bitreverse.i32(i32 %ld7)
+ store i32 %bitreverse0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+ store i32 %bitreverse1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+ store i32 %bitreverse2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+ store i32 %bitreverse3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+ store i32 %bitreverse4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+ store i32 %bitreverse5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+ store i32 %bitreverse6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+ store i32 %bitreverse7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+ ret void
+}
+
+define void @bitreverse_8i16() #0 {
+; SSE-LABEL: @bitreverse_8i16(
+; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD1]])
+; SSE-NEXT: [[BITREVERSE2:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD2]])
+; SSE-NEXT: [[BITREVERSE3:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD3]])
+; SSE-NEXT: [[BITREVERSE4:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD4]])
+; SSE-NEXT: [[BITREVERSE5:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD5]])
+; SSE-NEXT: [[BITREVERSE6:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD6]])
+; SSE-NEXT: [[BITREVERSE7:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD7]])
+; SSE-NEXT: store i16 [[BITREVERSE0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; SSE-NEXT: store i16 [[BITREVERSE1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; SSE-NEXT: store i16 [[BITREVERSE2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; SSE-NEXT: store i16 [[BITREVERSE3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; SSE-NEXT: store i16 [[BITREVERSE4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; SSE-NEXT: store i16 [[BITREVERSE5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; SSE-NEXT: store i16 [[BITREVERSE6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; SSE-NEXT: store i16 [[BITREVERSE7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bitreverse_8i16(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]])
+; AVX-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; AVX-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_8i16(
+; XOP-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; XOP-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bitreverse.v8i16(<8 x i16> [[TMP1]])
+; XOP-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; XOP-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %bitreverse0 = call i16 @llvm.bitreverse.i16(i16 %ld0)
+ %bitreverse1 = call i16 @llvm.bitreverse.i16(i16 %ld1)
+ %bitreverse2 = call i16 @llvm.bitreverse.i16(i16 %ld2)
+ %bitreverse3 = call i16 @llvm.bitreverse.i16(i16 %ld3)
+ %bitreverse4 = call i16 @llvm.bitreverse.i16(i16 %ld4)
+ %bitreverse5 = call i16 @llvm.bitreverse.i16(i16 %ld5)
+ %bitreverse6 = call i16 @llvm.bitreverse.i16(i16 %ld6)
+ %bitreverse7 = call i16 @llvm.bitreverse.i16(i16 %ld7)
+ store i16 %bitreverse0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %bitreverse1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %bitreverse2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %bitreverse3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %bitreverse4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %bitreverse5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %bitreverse6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %bitreverse7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ ret void
+}
+
+define void @bitreverse_16i16() #0 {
+; SSE-LABEL: @bitreverse_16i16(
+; SSE-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; SSE-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; SSE-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; SSE-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; SSE-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; SSE-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; SSE-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; SSE-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; SSE-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+; SSE-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+; SSE-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+; SSE-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+; SSE-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+; SSE-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+; SSE-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+; SSE-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD1]])
+; SSE-NEXT: [[BITREVERSE2:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD2]])
+; SSE-NEXT: [[BITREVERSE3:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD3]])
+; SSE-NEXT: [[BITREVERSE4:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD4]])
+; SSE-NEXT: [[BITREVERSE5:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD5]])
+; SSE-NEXT: [[BITREVERSE6:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD6]])
+; SSE-NEXT: [[BITREVERSE7:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD7]])
+; SSE-NEXT: [[BITREVERSE8:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD8]])
+; SSE-NEXT: [[BITREVERSE9:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD9]])
+; SSE-NEXT: [[BITREVERSE10:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD10]])
+; SSE-NEXT: [[BITREVERSE11:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD11]])
+; SSE-NEXT: [[BITREVERSE12:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD12]])
+; SSE-NEXT: [[BITREVERSE13:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD13]])
+; SSE-NEXT: [[BITREVERSE14:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD14]])
+; SSE-NEXT: [[BITREVERSE15:%.*]] = call i16 @llvm.bitreverse.i16(i16 [[LD15]])
+; SSE-NEXT: store i16 [[BITREVERSE0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; SSE-NEXT: store i16 [[BITREVERSE1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; SSE-NEXT: store i16 [[BITREVERSE2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; SSE-NEXT: store i16 [[BITREVERSE3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; SSE-NEXT: store i16 [[BITREVERSE4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; SSE-NEXT: store i16 [[BITREVERSE5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; SSE-NEXT: store i16 [[BITREVERSE6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; SSE-NEXT: store i16 [[BITREVERSE7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; SSE-NEXT: store i16 [[BITREVERSE8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+; SSE-NEXT: store i16 [[BITREVERSE9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+; SSE-NEXT: store i16 [[BITREVERSE10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+; SSE-NEXT: store i16 [[BITREVERSE11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+; SSE-NEXT: store i16 [[BITREVERSE12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+; SSE-NEXT: store i16 [[BITREVERSE13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+; SSE-NEXT: store i16 [[BITREVERSE14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+; SSE-NEXT: store i16 [[BITREVERSE15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bitreverse_16i16(
+; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]])
+; AVX-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_16i16(
+; XOP-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; XOP-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bitreverse.v16i16(<16 x i16> [[TMP1]])
+; XOP-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; XOP-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+ %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+ %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+ %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+ %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+ %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+ %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+ %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+ %bitreverse0 = call i16 @llvm.bitreverse.i16(i16 %ld0)
+ %bitreverse1 = call i16 @llvm.bitreverse.i16(i16 %ld1)
+ %bitreverse2 = call i16 @llvm.bitreverse.i16(i16 %ld2)
+ %bitreverse3 = call i16 @llvm.bitreverse.i16(i16 %ld3)
+ %bitreverse4 = call i16 @llvm.bitreverse.i16(i16 %ld4)
+ %bitreverse5 = call i16 @llvm.bitreverse.i16(i16 %ld5)
+ %bitreverse6 = call i16 @llvm.bitreverse.i16(i16 %ld6)
+ %bitreverse7 = call i16 @llvm.bitreverse.i16(i16 %ld7)
+ %bitreverse8 = call i16 @llvm.bitreverse.i16(i16 %ld8)
+ %bitreverse9 = call i16 @llvm.bitreverse.i16(i16 %ld9)
+ %bitreverse10 = call i16 @llvm.bitreverse.i16(i16 %ld10)
+ %bitreverse11 = call i16 @llvm.bitreverse.i16(i16 %ld11)
+ %bitreverse12 = call i16 @llvm.bitreverse.i16(i16 %ld12)
+ %bitreverse13 = call i16 @llvm.bitreverse.i16(i16 %ld13)
+ %bitreverse14 = call i16 @llvm.bitreverse.i16(i16 %ld14)
+ %bitreverse15 = call i16 @llvm.bitreverse.i16(i16 %ld15)
+ store i16 %bitreverse0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %bitreverse1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %bitreverse2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %bitreverse3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %bitreverse4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %bitreverse5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %bitreverse6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %bitreverse7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ store i16 %bitreverse8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+ store i16 %bitreverse9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+ store i16 %bitreverse10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+ store i16 %bitreverse11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+ store i16 %bitreverse12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+ store i16 %bitreverse13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+ store i16 %bitreverse14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+ store i16 %bitreverse15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+ ret void
+}
+
+define void @bitreverse_16i8() #0 {
+; SSE-LABEL: @bitreverse_16i8(
+; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; SSE-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; SSE-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; SSE-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; SSE-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; SSE-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; SSE-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; SSE-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; SSE-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; SSE-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; SSE-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; SSE-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; SSE-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD1]])
+; SSE-NEXT: [[BITREVERSE2:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD2]])
+; SSE-NEXT: [[BITREVERSE3:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD3]])
+; SSE-NEXT: [[BITREVERSE4:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD4]])
+; SSE-NEXT: [[BITREVERSE5:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD5]])
+; SSE-NEXT: [[BITREVERSE6:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD6]])
+; SSE-NEXT: [[BITREVERSE7:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD7]])
+; SSE-NEXT: [[BITREVERSE8:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD8]])
+; SSE-NEXT: [[BITREVERSE9:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD9]])
+; SSE-NEXT: [[BITREVERSE10:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD10]])
+; SSE-NEXT: [[BITREVERSE11:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD11]])
+; SSE-NEXT: [[BITREVERSE12:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD12]])
+; SSE-NEXT: [[BITREVERSE13:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD13]])
+; SSE-NEXT: [[BITREVERSE14:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD14]])
+; SSE-NEXT: [[BITREVERSE15:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD15]])
+; SSE-NEXT: store i8 [[BITREVERSE0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; SSE-NEXT: store i8 [[BITREVERSE1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; SSE-NEXT: store i8 [[BITREVERSE2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; SSE-NEXT: store i8 [[BITREVERSE3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; SSE-NEXT: store i8 [[BITREVERSE4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; SSE-NEXT: store i8 [[BITREVERSE5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; SSE-NEXT: store i8 [[BITREVERSE6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; SSE-NEXT: store i8 [[BITREVERSE7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; SSE-NEXT: store i8 [[BITREVERSE8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; SSE-NEXT: store i8 [[BITREVERSE9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; SSE-NEXT: store i8 [[BITREVERSE10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; SSE-NEXT: store i8 [[BITREVERSE11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; SSE-NEXT: store i8 [[BITREVERSE12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; SSE-NEXT: store i8 [[BITREVERSE13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; SSE-NEXT: store i8 [[BITREVERSE14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; SSE-NEXT: store i8 [[BITREVERSE15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bitreverse_16i8(
+; AVX-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; AVX-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
+; AVX-NEXT: store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; AVX-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_16i8(
+; XOP-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; XOP-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
+; XOP-NEXT: store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; XOP-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %bitreverse0 = call i8 @llvm.bitreverse.i8(i8 %ld0)
+ %bitreverse1 = call i8 @llvm.bitreverse.i8(i8 %ld1)
+ %bitreverse2 = call i8 @llvm.bitreverse.i8(i8 %ld2)
+ %bitreverse3 = call i8 @llvm.bitreverse.i8(i8 %ld3)
+ %bitreverse4 = call i8 @llvm.bitreverse.i8(i8 %ld4)
+ %bitreverse5 = call i8 @llvm.bitreverse.i8(i8 %ld5)
+ %bitreverse6 = call i8 @llvm.bitreverse.i8(i8 %ld6)
+ %bitreverse7 = call i8 @llvm.bitreverse.i8(i8 %ld7)
+ %bitreverse8 = call i8 @llvm.bitreverse.i8(i8 %ld8)
+ %bitreverse9 = call i8 @llvm.bitreverse.i8(i8 %ld9)
+ %bitreverse10 = call i8 @llvm.bitreverse.i8(i8 %ld10)
+ %bitreverse11 = call i8 @llvm.bitreverse.i8(i8 %ld11)
+ %bitreverse12 = call i8 @llvm.bitreverse.i8(i8 %ld12)
+ %bitreverse13 = call i8 @llvm.bitreverse.i8(i8 %ld13)
+ %bitreverse14 = call i8 @llvm.bitreverse.i8(i8 %ld14)
+ %bitreverse15 = call i8 @llvm.bitreverse.i8(i8 %ld15)
+ store i8 %bitreverse0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %bitreverse1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %bitreverse2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %bitreverse3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %bitreverse4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %bitreverse5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %bitreverse6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %bitreverse7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %bitreverse8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %bitreverse9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %bitreverse10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %bitreverse11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %bitreverse12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %bitreverse13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %bitreverse14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %bitreverse15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ ret void
+}
+
+define void @bitreverse_32i8() #0 {
+; SSE-LABEL: @bitreverse_32i8(
+; SSE-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; SSE-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; SSE-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; SSE-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; SSE-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; SSE-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; SSE-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; SSE-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; SSE-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; SSE-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; SSE-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; SSE-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; SSE-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; SSE-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; SSE-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; SSE-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; SSE-NEXT: [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+; SSE-NEXT: [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+; SSE-NEXT: [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+; SSE-NEXT: [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+; SSE-NEXT: [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+; SSE-NEXT: [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+; SSE-NEXT: [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+; SSE-NEXT: [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+; SSE-NEXT: [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+; SSE-NEXT: [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+; SSE-NEXT: [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+; SSE-NEXT: [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+; SSE-NEXT: [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+; SSE-NEXT: [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+; SSE-NEXT: [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+; SSE-NEXT: [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+; SSE-NEXT: [[BITREVERSE0:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD0]])
+; SSE-NEXT: [[BITREVERSE1:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD1]])
+; SSE-NEXT: [[BITREVERSE2:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD2]])
+; SSE-NEXT: [[BITREVERSE3:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD3]])
+; SSE-NEXT: [[BITREVERSE4:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD4]])
+; SSE-NEXT: [[BITREVERSE5:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD5]])
+; SSE-NEXT: [[BITREVERSE6:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD6]])
+; SSE-NEXT: [[BITREVERSE7:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD7]])
+; SSE-NEXT: [[BITREVERSE8:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD8]])
+; SSE-NEXT: [[BITREVERSE9:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD9]])
+; SSE-NEXT: [[BITREVERSE10:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD10]])
+; SSE-NEXT: [[BITREVERSE11:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD11]])
+; SSE-NEXT: [[BITREVERSE12:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD12]])
+; SSE-NEXT: [[BITREVERSE13:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD13]])
+; SSE-NEXT: [[BITREVERSE14:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD14]])
+; SSE-NEXT: [[BITREVERSE15:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD15]])
+; SSE-NEXT: [[BITREVERSE16:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD16]])
+; SSE-NEXT: [[BITREVERSE17:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD17]])
+; SSE-NEXT: [[BITREVERSE18:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD18]])
+; SSE-NEXT: [[BITREVERSE19:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD19]])
+; SSE-NEXT: [[BITREVERSE20:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD20]])
+; SSE-NEXT: [[BITREVERSE21:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD21]])
+; SSE-NEXT: [[BITREVERSE22:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD22]])
+; SSE-NEXT: [[BITREVERSE23:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD23]])
+; SSE-NEXT: [[BITREVERSE24:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD24]])
+; SSE-NEXT: [[BITREVERSE25:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD25]])
+; SSE-NEXT: [[BITREVERSE26:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD26]])
+; SSE-NEXT: [[BITREVERSE27:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD27]])
+; SSE-NEXT: [[BITREVERSE28:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD28]])
+; SSE-NEXT: [[BITREVERSE29:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD29]])
+; SSE-NEXT: [[BITREVERSE30:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD30]])
+; SSE-NEXT: [[BITREVERSE31:%.*]] = call i8 @llvm.bitreverse.i8(i8 [[LD31]])
+; SSE-NEXT: store i8 [[BITREVERSE0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; SSE-NEXT: store i8 [[BITREVERSE1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; SSE-NEXT: store i8 [[BITREVERSE2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; SSE-NEXT: store i8 [[BITREVERSE3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; SSE-NEXT: store i8 [[BITREVERSE4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; SSE-NEXT: store i8 [[BITREVERSE5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; SSE-NEXT: store i8 [[BITREVERSE6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; SSE-NEXT: store i8 [[BITREVERSE7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; SSE-NEXT: store i8 [[BITREVERSE8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; SSE-NEXT: store i8 [[BITREVERSE9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; SSE-NEXT: store i8 [[BITREVERSE10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; SSE-NEXT: store i8 [[BITREVERSE11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; SSE-NEXT: store i8 [[BITREVERSE12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; SSE-NEXT: store i8 [[BITREVERSE13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; SSE-NEXT: store i8 [[BITREVERSE14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; SSE-NEXT: store i8 [[BITREVERSE15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; SSE-NEXT: store i8 [[BITREVERSE16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+; SSE-NEXT: store i8 [[BITREVERSE17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+; SSE-NEXT: store i8 [[BITREVERSE18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+; SSE-NEXT: store i8 [[BITREVERSE19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+; SSE-NEXT: store i8 [[BITREVERSE20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+; SSE-NEXT: store i8 [[BITREVERSE21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+; SSE-NEXT: store i8 [[BITREVERSE22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+; SSE-NEXT: store i8 [[BITREVERSE23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+; SSE-NEXT: store i8 [[BITREVERSE24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+; SSE-NEXT: store i8 [[BITREVERSE25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+; SSE-NEXT: store i8 [[BITREVERSE26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+; SSE-NEXT: store i8 [[BITREVERSE27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+; SSE-NEXT: store i8 [[BITREVERSE28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+; SSE-NEXT: store i8 [[BITREVERSE29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+; SSE-NEXT: store i8 [[BITREVERSE30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+; SSE-NEXT: store i8 [[BITREVERSE31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bitreverse_32i8(
+; AVX-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; AVX-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; AVX-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
+; AVX-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP2]])
+; AVX-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; AVX-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; AVX-NEXT: ret void
+;
+; XOP-LABEL: @bitreverse_32i8(
+; XOP-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; XOP-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; XOP-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP1]])
+; XOP-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.bitreverse.v16i8(<16 x i8> [[TMP2]])
+; XOP-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; XOP-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; XOP-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+ %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+ %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+ %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+ %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+ %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+ %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+ %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+ %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+ %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+ %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+ %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+ %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+ %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+ %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+ %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+ %bitreverse0 = call i8 @llvm.bitreverse.i8(i8 %ld0)
+ %bitreverse1 = call i8 @llvm.bitreverse.i8(i8 %ld1)
+ %bitreverse2 = call i8 @llvm.bitreverse.i8(i8 %ld2)
+ %bitreverse3 = call i8 @llvm.bitreverse.i8(i8 %ld3)
+ %bitreverse4 = call i8 @llvm.bitreverse.i8(i8 %ld4)
+ %bitreverse5 = call i8 @llvm.bitreverse.i8(i8 %ld5)
+ %bitreverse6 = call i8 @llvm.bitreverse.i8(i8 %ld6)
+ %bitreverse7 = call i8 @llvm.bitreverse.i8(i8 %ld7)
+ %bitreverse8 = call i8 @llvm.bitreverse.i8(i8 %ld8)
+ %bitreverse9 = call i8 @llvm.bitreverse.i8(i8 %ld9)
+ %bitreverse10 = call i8 @llvm.bitreverse.i8(i8 %ld10)
+ %bitreverse11 = call i8 @llvm.bitreverse.i8(i8 %ld11)
+ %bitreverse12 = call i8 @llvm.bitreverse.i8(i8 %ld12)
+ %bitreverse13 = call i8 @llvm.bitreverse.i8(i8 %ld13)
+ %bitreverse14 = call i8 @llvm.bitreverse.i8(i8 %ld14)
+ %bitreverse15 = call i8 @llvm.bitreverse.i8(i8 %ld15)
+ %bitreverse16 = call i8 @llvm.bitreverse.i8(i8 %ld16)
+ %bitreverse17 = call i8 @llvm.bitreverse.i8(i8 %ld17)
+ %bitreverse18 = call i8 @llvm.bitreverse.i8(i8 %ld18)
+ %bitreverse19 = call i8 @llvm.bitreverse.i8(i8 %ld19)
+ %bitreverse20 = call i8 @llvm.bitreverse.i8(i8 %ld20)
+ %bitreverse21 = call i8 @llvm.bitreverse.i8(i8 %ld21)
+ %bitreverse22 = call i8 @llvm.bitreverse.i8(i8 %ld22)
+ %bitreverse23 = call i8 @llvm.bitreverse.i8(i8 %ld23)
+ %bitreverse24 = call i8 @llvm.bitreverse.i8(i8 %ld24)
+ %bitreverse25 = call i8 @llvm.bitreverse.i8(i8 %ld25)
+ %bitreverse26 = call i8 @llvm.bitreverse.i8(i8 %ld26)
+ %bitreverse27 = call i8 @llvm.bitreverse.i8(i8 %ld27)
+ %bitreverse28 = call i8 @llvm.bitreverse.i8(i8 %ld28)
+ %bitreverse29 = call i8 @llvm.bitreverse.i8(i8 %ld29)
+ %bitreverse30 = call i8 @llvm.bitreverse.i8(i8 %ld30)
+ %bitreverse31 = call i8 @llvm.bitreverse.i8(i8 %ld31)
+ store i8 %bitreverse0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %bitreverse1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %bitreverse2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %bitreverse3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %bitreverse4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %bitreverse5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %bitreverse6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %bitreverse7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %bitreverse8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %bitreverse9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %bitreverse10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %bitreverse11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %bitreverse12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %bitreverse13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %bitreverse14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %bitreverse15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ store i8 %bitreverse16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+ store i8 %bitreverse17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+ store i8 %bitreverse18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+ store i8 %bitreverse19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+ store i8 %bitreverse20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+ store i8 %bitreverse21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+ store i8 %bitreverse22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+ store i8 %bitreverse23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+ store i8 %bitreverse24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+ store i8 %bitreverse25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+ store i8 %bitreverse26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+ store i8 %bitreverse27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+ store i8 %bitreverse28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+ store i8 %bitreverse29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+ store i8 %bitreverse30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+ store i8 %bitreverse31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/SLPVectorizer/X86/bswap.ll b/test/Transforms/SLPVectorizer/X86/bswap.ll
new file mode 100644
index 000000000000..79ce24f59119
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/bswap.ll
@@ -0,0 +1,247 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@src64 = common global [4 x i64] zeroinitializer, align 32
+@dst64 = common global [4 x i64] zeroinitializer, align 32
+@src32 = common global [8 x i32] zeroinitializer, align 32
+@dst32 = common global [8 x i32] zeroinitializer, align 32
+@src16 = common global [16 x i16] zeroinitializer, align 32
+@dst16 = common global [16 x i16] zeroinitializer, align 32
+
+declare i64 @llvm.bswap.i64(i64)
+declare i32 @llvm.bswap.i32(i32)
+declare i16 @llvm.bswap.i16(i16)
+
+define void @bswap_2i64() #0 {
+; SSE-LABEL: @bswap_2i64(
+; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; SSE-NEXT: [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]])
+; SSE-NEXT: [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]])
+; SSE-NEXT: store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; SSE-NEXT: store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bswap_2i64(
+; AVX-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.bswap.v2i64(<2 x i64> [[TMP1]])
+; AVX-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+ %bswap0 = call i64 @llvm.bswap.i64(i64 %ld0)
+ %bswap1 = call i64 @llvm.bswap.i64(i64 %ld1)
+ store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+ store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @bswap_4i64() #0 {
+; SSE-LABEL: @bswap_4i64(
+; SSE-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; SSE-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; SSE-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; SSE-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; SSE-NEXT: [[BSWAP0:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD0]])
+; SSE-NEXT: [[BSWAP1:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD1]])
+; SSE-NEXT: [[BSWAP2:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD2]])
+; SSE-NEXT: [[BSWAP3:%.*]] = call i64 @llvm.bswap.i64(i64 [[LD3]])
+; SSE-NEXT: store i64 [[BSWAP0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; SSE-NEXT: store i64 [[BSWAP1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; SSE-NEXT: store i64 [[BSWAP2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; SSE-NEXT: store i64 [[BSWAP3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bswap_4i64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.bswap.v4i64(<4 x i64> [[TMP1]])
+; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+ %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+ %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+ %bswap0 = call i64 @llvm.bswap.i64(i64 %ld0)
+ %bswap1 = call i64 @llvm.bswap.i64(i64 %ld1)
+ %bswap2 = call i64 @llvm.bswap.i64(i64 %ld2)
+ %bswap3 = call i64 @llvm.bswap.i64(i64 %ld3)
+ store i64 %bswap0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+ store i64 %bswap1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+ store i64 %bswap2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+ store i64 %bswap3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+ ret void
+}
+
+define void @bswap_4i32() #0 {
+; CHECK-LABEL: @bswap_4i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+ %bswap0 = call i32 @llvm.bswap.i32(i32 %ld0)
+ %bswap1 = call i32 @llvm.bswap.i32(i32 %ld1)
+ %bswap2 = call i32 @llvm.bswap.i32(i32 %ld2)
+ %bswap3 = call i32 @llvm.bswap.i32(i32 %ld3)
+ store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+ store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+ store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+ store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @bswap_8i32() #0 {
+; SSE-LABEL: @bswap_8i32(
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP1]])
+; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.bswap.v4i32(<4 x i32> [[TMP2]])
+; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
+; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bswap_8i32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.bswap.v8i32(<8 x i32> [[TMP1]])
+; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+ %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+ %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+ %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+ %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+ %bswap0 = call i32 @llvm.bswap.i32(i32 %ld0)
+ %bswap1 = call i32 @llvm.bswap.i32(i32 %ld1)
+ %bswap2 = call i32 @llvm.bswap.i32(i32 %ld2)
+ %bswap3 = call i32 @llvm.bswap.i32(i32 %ld3)
+ %bswap4 = call i32 @llvm.bswap.i32(i32 %ld4)
+ %bswap5 = call i32 @llvm.bswap.i32(i32 %ld5)
+ %bswap6 = call i32 @llvm.bswap.i32(i32 %ld6)
+ %bswap7 = call i32 @llvm.bswap.i32(i32 %ld7)
+ store i32 %bswap0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+ store i32 %bswap1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+ store i32 %bswap2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+ store i32 %bswap3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+ store i32 %bswap4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+ store i32 %bswap5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+ store i32 %bswap6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+ store i32 %bswap7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+ ret void
+}
+
+define void @bswap_8i16() #0 {
+; CHECK-LABEL: @bswap_8i16(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %bswap0 = call i16 @llvm.bswap.i16(i16 %ld0)
+ %bswap1 = call i16 @llvm.bswap.i16(i16 %ld1)
+ %bswap2 = call i16 @llvm.bswap.i16(i16 %ld2)
+ %bswap3 = call i16 @llvm.bswap.i16(i16 %ld3)
+ %bswap4 = call i16 @llvm.bswap.i16(i16 %ld4)
+ %bswap5 = call i16 @llvm.bswap.i16(i16 %ld5)
+ %bswap6 = call i16 @llvm.bswap.i16(i16 %ld6)
+ %bswap7 = call i16 @llvm.bswap.i16(i16 %ld7)
+ store i16 %bswap0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %bswap1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %bswap2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %bswap3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %bswap4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %bswap5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %bswap6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %bswap7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ ret void
+}
+
+define void @bswap_16i16() #0 {
+; SSE-LABEL: @bswap_16i16(
+; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP1]])
+; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.bswap.v8i16(<8 x i16> [[TMP2]])
+; SSE-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; SSE-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @bswap_16i16(
+; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.bswap.v16i16(<16 x i16> [[TMP1]])
+; AVX-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+ %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+ %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+ %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+ %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+ %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+ %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+ %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+ %bswap0 = call i16 @llvm.bswap.i16(i16 %ld0)
+ %bswap1 = call i16 @llvm.bswap.i16(i16 %ld1)
+ %bswap2 = call i16 @llvm.bswap.i16(i16 %ld2)
+ %bswap3 = call i16 @llvm.bswap.i16(i16 %ld3)
+ %bswap4 = call i16 @llvm.bswap.i16(i16 %ld4)
+ %bswap5 = call i16 @llvm.bswap.i16(i16 %ld5)
+ %bswap6 = call i16 @llvm.bswap.i16(i16 %ld6)
+ %bswap7 = call i16 @llvm.bswap.i16(i16 %ld7)
+ %bswap8 = call i16 @llvm.bswap.i16(i16 %ld8)
+ %bswap9 = call i16 @llvm.bswap.i16(i16 %ld9)
+ %bswap10 = call i16 @llvm.bswap.i16(i16 %ld10)
+ %bswap11 = call i16 @llvm.bswap.i16(i16 %ld11)
+ %bswap12 = call i16 @llvm.bswap.i16(i16 %ld12)
+ %bswap13 = call i16 @llvm.bswap.i16(i16 %ld13)
+ %bswap14 = call i16 @llvm.bswap.i16(i16 %ld14)
+ %bswap15 = call i16 @llvm.bswap.i16(i16 %ld15)
+ store i16 %bswap0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %bswap1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %bswap2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %bswap3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %bswap4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %bswap5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %bswap6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %bswap7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ store i16 %bswap8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+ store i16 %bswap9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+ store i16 %bswap10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+ store i16 %bswap11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+ store i16 %bswap12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+ store i16 %bswap13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+ store i16 %bswap14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+ store i16 %bswap15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/X86/call.ll b/test/Transforms/SLPVectorizer/X86/call.ll
index a55cc1bf793f..d6c0ebd6b075 100644
--- a/test/Transforms/SLPVectorizer/X86/call.ll
+++ b/test/Transforms/SLPVectorizer/X86/call.ll
@@ -7,6 +7,7 @@ declare double @sin(double)
declare double @cos(double)
declare double @pow(double, double)
declare double @exp2(double)
+declare double @sqrt(double)
declare i64 @round(i64)
@@ -96,6 +97,28 @@ entry:
}
+; CHECK: sqrt_libm
+; CHECK: call <2 x double> @llvm.sqrt.v2f64
+; CHECK: ret void
+define void @sqrt_libm(double* %a, double* %b, double* %c) {
+entry:
+ %i0 = load double, double* %a, align 8
+ %i1 = load double, double* %b, align 8
+ %mul = fmul double %i0, %i1
+ %call = tail call nnan double @sqrt(double %mul) nounwind readnone
+ %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
+ %i3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
+ %i4 = load double, double* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ %call5 = tail call nnan double @sqrt(double %mul5) nounwind readnone
+ store double %call, double* %c, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
+ store double %call5, double* %arrayidx5, align 8
+ ret void
+}
+
+
; Negative test case
; CHECK: round_custom
; CHECK-NOT: load <4 x i64>
diff --git a/test/Transforms/SLPVectorizer/X86/cast.ll b/test/Transforms/SLPVectorizer/X86/cast.ll
index 044db5d694b6..5d7118753e92 100644
--- a/test/Transforms/SLPVectorizer/X86/cast.ll
+++ b/test/Transforms/SLPVectorizer/X86/cast.ll
@@ -1,19 +1,26 @@
-; RUN: opt < %s -basicaa -slp-vectorizer -dce -S -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7 -basicaa -slp-vectorizer -dce -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-apple-macosx10.8.0 -mcpu=corei7-avx -basicaa -slp-vectorizer -dce -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-apple-macosx10.9.0"
-; int foo(int * restrict A, char * restrict B) {
+; int test_sext_4i8_to_4i32(int * restrict A, char * restrict B) {
; A[0] = B[0];
; A[1] = B[1];
; A[2] = B[2];
; A[3] = B[3];
; }
-;CHECK-LABEL: @foo(
-;CHECK: load <4 x i8>
-;CHECK: sext
-;CHECK: store <4 x i32>
-define i32 @foo(i32* noalias nocapture %A, i8* noalias nocapture %B) {
+
+define i32 @test_sext_4i8_to_4i32(i32* noalias nocapture %A, i8* noalias nocapture %B) {
+; CHECK-LABEL: @test_sext_4i8_to_4i32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i8* %B to <4 x i8>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i8>, <4 x i8>* [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = sext <4 x i8> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* %A to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT: ret i32 undef
+;
entry:
%0 = load i8, i8* %B, align 1
%conv = sext i8 %0 to i32
@@ -36,3 +43,82 @@ entry:
ret i32 undef
}
+define i32 @test_zext_4i16_to_4i32(i32* noalias nocapture %A, i16* noalias nocapture %B) {
+; CHECK-LABEL: @test_zext_4i16_to_4i32(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP0:%.*]] = bitcast i16* %B to <4 x i16>*
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
+; CHECK-NEXT: [[TMP2:%.*]] = zext <4 x i16> [[TMP1]] to <4 x i32>
+; CHECK-NEXT: [[TMP3:%.*]] = bitcast i32* %A to <4 x i32>*
+; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* [[TMP3]], align 4
+; CHECK-NEXT: ret i32 undef
+;
+entry:
+ %0 = load i16, i16* %B, align 1
+ %conv = zext i16 %0 to i32
+ store i32 %conv, i32* %A, align 4
+ %arrayidx2 = getelementptr inbounds i16, i16* %B, i64 1
+ %1 = load i16, i16* %arrayidx2, align 1
+ %conv3 = zext i16 %1 to i32
+ %arrayidx4 = getelementptr inbounds i32, i32* %A, i64 1
+ store i32 %conv3, i32* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i16, i16* %B, i64 2
+ %2 = load i16, i16* %arrayidx5, align 1
+ %conv6 = zext i16 %2 to i32
+ %arrayidx7 = getelementptr inbounds i32, i32* %A, i64 2
+ store i32 %conv6, i32* %arrayidx7, align 4
+ %arrayidx8 = getelementptr inbounds i16, i16* %B, i64 3
+ %3 = load i16, i16* %arrayidx8, align 1
+ %conv9 = zext i16 %3 to i32
+ %arrayidx10 = getelementptr inbounds i32, i32* %A, i64 3
+ store i32 %conv9, i32* %arrayidx10, align 4
+ ret i32 undef
+}
+
+define i64 @test_sext_4i16_to_4i64(i64* noalias nocapture %A, i16* noalias nocapture %B) {
+; SSE-LABEL: @test_sext_4i16_to_4i64(
+; SSE-NEXT: entry:
+; SSE-NEXT: [[TMP0:%.*]] = bitcast i16* %B to <2 x i16>*
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x i16>, <2 x i16>* [[TMP0]], align 1
+; SSE-NEXT: [[TMP2:%.*]] = sext <2 x i16> [[TMP1]] to <2 x i64>
+; SSE-NEXT: [[TMP3:%.*]] = bitcast i64* %A to <2 x i64>*
+; SSE-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* [[TMP3]], align 4
+; SSE-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* %B, i64 2
+; SSE-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i64, i64* %A, i64 2
+; SSE-NEXT: [[TMP4:%.*]] = bitcast i16* [[ARRAYIDX5]] to <2 x i16>*
+; SSE-NEXT: [[TMP5:%.*]] = load <2 x i16>, <2 x i16>* [[TMP4]], align 1
+; SSE-NEXT: [[TMP6:%.*]] = sext <2 x i16> [[TMP5]] to <2 x i64>
+; SSE-NEXT: [[TMP7:%.*]] = bitcast i64* [[ARRAYIDX7]] to <2 x i64>*
+; SSE-NEXT: store <2 x i64> [[TMP6]], <2 x i64>* [[TMP7]], align 4
+; SSE-NEXT: ret i64 undef
+;
+; AVX-LABEL: @test_sext_4i16_to_4i64(
+; AVX-NEXT: entry:
+; AVX-NEXT: [[TMP0:%.*]] = bitcast i16* %B to <4 x i16>*
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x i16>, <4 x i16>* [[TMP0]], align 1
+; AVX-NEXT: [[TMP2:%.*]] = sext <4 x i16> [[TMP1]] to <4 x i64>
+; AVX-NEXT: [[TMP3:%.*]] = bitcast i64* %A to <4 x i64>*
+; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* [[TMP3]], align 4
+; AVX-NEXT: ret i64 undef
+;
+entry:
+ %0 = load i16, i16* %B, align 1
+ %conv = sext i16 %0 to i64
+ store i64 %conv, i64* %A, align 4
+ %arrayidx2 = getelementptr inbounds i16, i16* %B, i64 1
+ %1 = load i16, i16* %arrayidx2, align 1
+ %conv3 = sext i16 %1 to i64
+ %arrayidx4 = getelementptr inbounds i64, i64* %A, i64 1
+ store i64 %conv3, i64* %arrayidx4, align 4
+ %arrayidx5 = getelementptr inbounds i16, i16* %B, i64 2
+ %2 = load i16, i16* %arrayidx5, align 1
+ %conv6 = sext i16 %2 to i64
+ %arrayidx7 = getelementptr inbounds i64, i64* %A, i64 2
+ store i64 %conv6, i64* %arrayidx7, align 4
+ %arrayidx8 = getelementptr inbounds i16, i16* %B, i64 3
+ %3 = load i16, i16* %arrayidx8, align 1
+ %conv9 = sext i16 %3 to i64
+ %arrayidx10 = getelementptr inbounds i64, i64* %A, i64 3
+ store i64 %conv9, i64* %arrayidx10, align 4
+ ret i64 undef
+}
diff --git a/test/Transforms/SLPVectorizer/X86/ctlz.ll b/test/Transforms/SLPVectorizer/X86/ctlz.ll
new file mode 100644
index 000000000000..8e281971fd18
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/ctlz.ll
@@ -0,0 +1,1222 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@src64 = common global [4 x i64] zeroinitializer, align 32
+@dst64 = common global [4 x i64] zeroinitializer, align 32
+@src32 = common global [8 x i32] zeroinitializer, align 32
+@dst32 = common global [8 x i32] zeroinitializer, align 32
+@src16 = common global [16 x i16] zeroinitializer, align 32
+@dst16 = common global [16 x i16] zeroinitializer, align 32
+@src8 = common global [32 x i8] zeroinitializer, align 32
+@dst8 = common global [32 x i8] zeroinitializer, align 32
+
+declare i64 @llvm.ctlz.i64(i64, i1)
+declare i32 @llvm.ctlz.i32(i32, i1)
+declare i16 @llvm.ctlz.i16(i16, i1)
+declare i8 @llvm.ctlz.i8(i8, i1)
+
+;
+; CTLZ
+;
+
+define void @ctlz_2i64() #0 {
+; CHECK-LABEL: @ctlz_2i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
+; CHECK-NEXT: store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; CHECK-NEXT: store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+ %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
+ %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
+ store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+ store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @ctlz_4i64() #0 {
+; CHECK-LABEL: @ctlz_4i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 false)
+; CHECK-NEXT: store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; CHECK-NEXT: store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; CHECK-NEXT: store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; CHECK-NEXT: store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+ %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+ %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+ %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 0)
+ %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 0)
+ %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 0)
+ %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 0)
+ store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+ store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+ store i64 %ctlz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+ store i64 %ctlz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+ ret void
+}
+
+define void @ctlz_4i32() #0 {
+; CHECK-LABEL: @ctlz_4i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false)
+; CHECK-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+; CHECK-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+; CHECK-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+; CHECK-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+ %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
+ %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
+ %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
+ %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
+ store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+ store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+ store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+ store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @ctlz_8i32() #0 {
+; CHECK-LABEL: @ctlz_8i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 false)
+; CHECK-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+; CHECK-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+; CHECK-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+; CHECK-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+; CHECK-NEXT: store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+; CHECK-NEXT: store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+; CHECK-NEXT: store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+; CHECK-NEXT: store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+ %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+ %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+ %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+ %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+ %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 0)
+ %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 0)
+ %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 0)
+ %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 0)
+ %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 0)
+ %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 0)
+ %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 0)
+ %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 0)
+ store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+ store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+ store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+ store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+ store i32 %ctlz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+ store i32 %ctlz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+ store i32 %ctlz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+ store i32 %ctlz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+ ret void
+}
+
+define void @ctlz_8i16() #0 {
+; CHECK-LABEL: @ctlz_8i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 false)
+; CHECK-NEXT: store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
+ %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
+ %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
+ %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
+ %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
+ %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
+ %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
+ %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
+ store i16 %ctlz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %ctlz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %ctlz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %ctlz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %ctlz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %ctlz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %ctlz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %ctlz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ ret void
+}
+
+define void @ctlz_16i16() #0 {
+; CHECK-LABEL: @ctlz_16i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+; CHECK-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+; CHECK-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+; CHECK-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+; CHECK-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+; CHECK-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+; CHECK-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+; CHECK-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 false)
+; CHECK-NEXT: [[CTLZ8:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD8]], i1 false)
+; CHECK-NEXT: [[CTLZ9:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD9]], i1 false)
+; CHECK-NEXT: [[CTLZ10:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD10]], i1 false)
+; CHECK-NEXT: [[CTLZ11:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD11]], i1 false)
+; CHECK-NEXT: [[CTLZ12:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD12]], i1 false)
+; CHECK-NEXT: [[CTLZ13:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD13]], i1 false)
+; CHECK-NEXT: [[CTLZ14:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD14]], i1 false)
+; CHECK-NEXT: [[CTLZ15:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD15]], i1 false)
+; CHECK-NEXT: store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: store i16 [[CTLZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+; CHECK-NEXT: store i16 [[CTLZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+; CHECK-NEXT: store i16 [[CTLZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+; CHECK-NEXT: store i16 [[CTLZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+; CHECK-NEXT: store i16 [[CTLZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+; CHECK-NEXT: store i16 [[CTLZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+; CHECK-NEXT: store i16 [[CTLZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+; CHECK-NEXT: store i16 [[CTLZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+ %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+ %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+ %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+ %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+ %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+ %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+ %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+ %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 0)
+ %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 0)
+ %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 0)
+ %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 0)
+ %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 0)
+ %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 0)
+ %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 0)
+ %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 0)
+ %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 0)
+ %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 0)
+ %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 0)
+ %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 0)
+ %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 0)
+ %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 0)
+ %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 0)
+ %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 0)
+ store i16 %ctlz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %ctlz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %ctlz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %ctlz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %ctlz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %ctlz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %ctlz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %ctlz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ store i16 %ctlz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+ store i16 %ctlz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+ store i16 %ctlz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+ store i16 %ctlz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+ store i16 %ctlz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+ store i16 %ctlz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+ store i16 %ctlz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+ store i16 %ctlz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+ ret void
+}
+
+define void @ctlz_16i8() #0 {
+; CHECK-LABEL: @ctlz_16i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 false)
+; CHECK-NEXT: [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 false)
+; CHECK-NEXT: [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 false)
+; CHECK-NEXT: [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 false)
+; CHECK-NEXT: [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 false)
+; CHECK-NEXT: [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 false)
+; CHECK-NEXT: [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 false)
+; CHECK-NEXT: [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 false)
+; CHECK-NEXT: [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 false)
+; CHECK-NEXT: store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
+ %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
+ %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
+ %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
+ %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
+ %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
+ %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
+ %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
+ %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
+ %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
+ %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
+ %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
+ %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
+ %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
+ %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
+ %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
+ store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ ret void
+}
+
+define void @ctlz_32i8() #0 {
+; CHECK-LABEL: @ctlz_32i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+; CHECK-NEXT: [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+; CHECK-NEXT: [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+; CHECK-NEXT: [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+; CHECK-NEXT: [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+; CHECK-NEXT: [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+; CHECK-NEXT: [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+; CHECK-NEXT: [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+; CHECK-NEXT: [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+; CHECK-NEXT: [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+; CHECK-NEXT: [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+; CHECK-NEXT: [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+; CHECK-NEXT: [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+; CHECK-NEXT: [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+; CHECK-NEXT: [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+; CHECK-NEXT: [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 false)
+; CHECK-NEXT: [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 false)
+; CHECK-NEXT: [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 false)
+; CHECK-NEXT: [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 false)
+; CHECK-NEXT: [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 false)
+; CHECK-NEXT: [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 false)
+; CHECK-NEXT: [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 false)
+; CHECK-NEXT: [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 false)
+; CHECK-NEXT: [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 false)
+; CHECK-NEXT: [[CTLZ16:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD16]], i1 false)
+; CHECK-NEXT: [[CTLZ17:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD17]], i1 false)
+; CHECK-NEXT: [[CTLZ18:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD18]], i1 false)
+; CHECK-NEXT: [[CTLZ19:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD19]], i1 false)
+; CHECK-NEXT: [[CTLZ20:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD20]], i1 false)
+; CHECK-NEXT: [[CTLZ21:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD21]], i1 false)
+; CHECK-NEXT: [[CTLZ22:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD22]], i1 false)
+; CHECK-NEXT: [[CTLZ23:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD23]], i1 false)
+; CHECK-NEXT: [[CTLZ24:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD24]], i1 false)
+; CHECK-NEXT: [[CTLZ25:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD25]], i1 false)
+; CHECK-NEXT: [[CTLZ26:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD26]], i1 false)
+; CHECK-NEXT: [[CTLZ27:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD27]], i1 false)
+; CHECK-NEXT: [[CTLZ28:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD28]], i1 false)
+; CHECK-NEXT: [[CTLZ29:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD29]], i1 false)
+; CHECK-NEXT: [[CTLZ30:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD30]], i1 false)
+; CHECK-NEXT: [[CTLZ31:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD31]], i1 false)
+; CHECK-NEXT: store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: store i8 [[CTLZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+; CHECK-NEXT: store i8 [[CTLZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+; CHECK-NEXT: store i8 [[CTLZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+; CHECK-NEXT: store i8 [[CTLZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+; CHECK-NEXT: store i8 [[CTLZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+; CHECK-NEXT: store i8 [[CTLZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+; CHECK-NEXT: store i8 [[CTLZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+; CHECK-NEXT: store i8 [[CTLZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+; CHECK-NEXT: store i8 [[CTLZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+; CHECK-NEXT: store i8 [[CTLZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+; CHECK-NEXT: store i8 [[CTLZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+; CHECK-NEXT: store i8 [[CTLZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+; CHECK-NEXT: store i8 [[CTLZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+; CHECK-NEXT: store i8 [[CTLZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+; CHECK-NEXT: store i8 [[CTLZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+; CHECK-NEXT: store i8 [[CTLZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+ %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+ %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+ %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+ %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+ %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+ %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+ %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+ %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+ %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+ %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+ %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+ %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+ %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+ %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+ %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+ %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 0)
+ %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 0)
+ %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 0)
+ %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 0)
+ %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 0)
+ %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 0)
+ %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 0)
+ %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 0)
+ %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 0)
+ %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 0)
+ %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 0)
+ %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 0)
+ %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 0)
+ %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 0)
+ %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 0)
+ %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 0)
+ %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 0)
+ %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 0)
+ %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 0)
+ %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 0)
+ %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 0)
+ %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 0)
+ %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 0)
+ %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 0)
+ %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 0)
+ %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 0)
+ %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 0)
+ %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 0)
+ %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 0)
+ %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 0)
+ %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 0)
+ %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 0)
+ store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ store i8 %ctlz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+ store i8 %ctlz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+ store i8 %ctlz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+ store i8 %ctlz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+ store i8 %ctlz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+ store i8 %ctlz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+ store i8 %ctlz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+ store i8 %ctlz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+ store i8 %ctlz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+ store i8 %ctlz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+ store i8 %ctlz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+ store i8 %ctlz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+ store i8 %ctlz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+ store i8 %ctlz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+ store i8 %ctlz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+ store i8 %ctlz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+ ret void
+}
+
+;
+; CTLZ_ZERO_UNDEF
+;
+
+define void @ctlz_undef_2i64() #0 {
+; CHECK-LABEL: @ctlz_undef_2i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
+; CHECK-NEXT: store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; CHECK-NEXT: store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+ %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
+ %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
+ store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+ store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @ctlz_undef_4i64() #0 {
+; CHECK-LABEL: @ctlz_undef_4i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i64 @llvm.ctlz.i64(i64 [[LD3]], i1 true)
+; CHECK-NEXT: store i64 [[CTLZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; CHECK-NEXT: store i64 [[CTLZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; CHECK-NEXT: store i64 [[CTLZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; CHECK-NEXT: store i64 [[CTLZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+ %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+ %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+ %ctlz0 = call i64 @llvm.ctlz.i64(i64 %ld0, i1 -1)
+ %ctlz1 = call i64 @llvm.ctlz.i64(i64 %ld1, i1 -1)
+ %ctlz2 = call i64 @llvm.ctlz.i64(i64 %ld2, i1 -1)
+ %ctlz3 = call i64 @llvm.ctlz.i64(i64 %ld3, i1 -1)
+ store i64 %ctlz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+ store i64 %ctlz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+ store i64 %ctlz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+ store i64 %ctlz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+ ret void
+}
+
+define void @ctlz_undef_4i32() #0 {
+; CHECK-LABEL: @ctlz_undef_4i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
+; CHECK-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+; CHECK-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+; CHECK-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+; CHECK-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+ %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
+ %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
+ %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
+ %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
+ store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+ store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+ store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+ store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @ctlz_undef_8i32() #0 {
+; CHECK-LABEL: @ctlz_undef_8i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i32 @llvm.ctlz.i32(i32 [[LD7]], i1 true)
+; CHECK-NEXT: store i32 [[CTLZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+; CHECK-NEXT: store i32 [[CTLZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+; CHECK-NEXT: store i32 [[CTLZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+; CHECK-NEXT: store i32 [[CTLZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+; CHECK-NEXT: store i32 [[CTLZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+; CHECK-NEXT: store i32 [[CTLZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+; CHECK-NEXT: store i32 [[CTLZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+; CHECK-NEXT: store i32 [[CTLZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+ %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+ %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+ %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+ %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+ %ctlz0 = call i32 @llvm.ctlz.i32(i32 %ld0, i1 -1)
+ %ctlz1 = call i32 @llvm.ctlz.i32(i32 %ld1, i1 -1)
+ %ctlz2 = call i32 @llvm.ctlz.i32(i32 %ld2, i1 -1)
+ %ctlz3 = call i32 @llvm.ctlz.i32(i32 %ld3, i1 -1)
+ %ctlz4 = call i32 @llvm.ctlz.i32(i32 %ld4, i1 -1)
+ %ctlz5 = call i32 @llvm.ctlz.i32(i32 %ld5, i1 -1)
+ %ctlz6 = call i32 @llvm.ctlz.i32(i32 %ld6, i1 -1)
+ %ctlz7 = call i32 @llvm.ctlz.i32(i32 %ld7, i1 -1)
+ store i32 %ctlz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+ store i32 %ctlz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+ store i32 %ctlz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+ store i32 %ctlz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+ store i32 %ctlz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+ store i32 %ctlz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+ store i32 %ctlz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+ store i32 %ctlz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+ ret void
+}
+
+define void @ctlz_undef_8i16() #0 {
+; CHECK-LABEL: @ctlz_undef_8i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 true)
+; CHECK-NEXT: store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
+ %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
+ %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
+ %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
+ %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
+ %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
+ %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
+ %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
+ store i16 %ctlz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %ctlz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %ctlz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %ctlz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %ctlz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %ctlz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %ctlz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %ctlz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ ret void
+}
+
+define void @ctlz_undef_16i16() #0 {
+; CHECK-LABEL: @ctlz_undef_16i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+; CHECK-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+; CHECK-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+; CHECK-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+; CHECK-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+; CHECK-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+; CHECK-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+; CHECK-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD7]], i1 true)
+; CHECK-NEXT: [[CTLZ8:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD8]], i1 true)
+; CHECK-NEXT: [[CTLZ9:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD9]], i1 true)
+; CHECK-NEXT: [[CTLZ10:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD10]], i1 true)
+; CHECK-NEXT: [[CTLZ11:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD11]], i1 true)
+; CHECK-NEXT: [[CTLZ12:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD12]], i1 true)
+; CHECK-NEXT: [[CTLZ13:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD13]], i1 true)
+; CHECK-NEXT: [[CTLZ14:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD14]], i1 true)
+; CHECK-NEXT: [[CTLZ15:%.*]] = call i16 @llvm.ctlz.i16(i16 [[LD15]], i1 true)
+; CHECK-NEXT: store i16 [[CTLZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTLZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTLZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTLZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTLZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTLZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTLZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTLZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: store i16 [[CTLZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+; CHECK-NEXT: store i16 [[CTLZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+; CHECK-NEXT: store i16 [[CTLZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+; CHECK-NEXT: store i16 [[CTLZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+; CHECK-NEXT: store i16 [[CTLZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+; CHECK-NEXT: store i16 [[CTLZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+; CHECK-NEXT: store i16 [[CTLZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+; CHECK-NEXT: store i16 [[CTLZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+ %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+ %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+ %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+ %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+ %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+ %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+ %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+ %ctlz0 = call i16 @llvm.ctlz.i16(i16 %ld0, i1 -1)
+ %ctlz1 = call i16 @llvm.ctlz.i16(i16 %ld1, i1 -1)
+ %ctlz2 = call i16 @llvm.ctlz.i16(i16 %ld2, i1 -1)
+ %ctlz3 = call i16 @llvm.ctlz.i16(i16 %ld3, i1 -1)
+ %ctlz4 = call i16 @llvm.ctlz.i16(i16 %ld4, i1 -1)
+ %ctlz5 = call i16 @llvm.ctlz.i16(i16 %ld5, i1 -1)
+ %ctlz6 = call i16 @llvm.ctlz.i16(i16 %ld6, i1 -1)
+ %ctlz7 = call i16 @llvm.ctlz.i16(i16 %ld7, i1 -1)
+ %ctlz8 = call i16 @llvm.ctlz.i16(i16 %ld8, i1 -1)
+ %ctlz9 = call i16 @llvm.ctlz.i16(i16 %ld9, i1 -1)
+ %ctlz10 = call i16 @llvm.ctlz.i16(i16 %ld10, i1 -1)
+ %ctlz11 = call i16 @llvm.ctlz.i16(i16 %ld11, i1 -1)
+ %ctlz12 = call i16 @llvm.ctlz.i16(i16 %ld12, i1 -1)
+ %ctlz13 = call i16 @llvm.ctlz.i16(i16 %ld13, i1 -1)
+ %ctlz14 = call i16 @llvm.ctlz.i16(i16 %ld14, i1 -1)
+ %ctlz15 = call i16 @llvm.ctlz.i16(i16 %ld15, i1 -1)
+ store i16 %ctlz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %ctlz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %ctlz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %ctlz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %ctlz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %ctlz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %ctlz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %ctlz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ store i16 %ctlz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+ store i16 %ctlz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+ store i16 %ctlz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+ store i16 %ctlz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+ store i16 %ctlz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+ store i16 %ctlz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+ store i16 %ctlz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+ store i16 %ctlz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+ ret void
+}
+
+define void @ctlz_undef_16i8() #0 {
+; CHECK-LABEL: @ctlz_undef_16i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 true)
+; CHECK-NEXT: [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 true)
+; CHECK-NEXT: [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 true)
+; CHECK-NEXT: [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 true)
+; CHECK-NEXT: [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 true)
+; CHECK-NEXT: [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 true)
+; CHECK-NEXT: [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 true)
+; CHECK-NEXT: [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 true)
+; CHECK-NEXT: [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 true)
+; CHECK-NEXT: store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
+ %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
+ %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
+ %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
+ %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
+ %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
+ %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
+ %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
+ %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
+ %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
+ %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
+ %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
+ %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
+ %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
+ %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
+ %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
+ store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ ret void
+}
+
+define void @ctlz_undef_32i8() #0 {
+; CHECK-LABEL: @ctlz_undef_32i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+; CHECK-NEXT: [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+; CHECK-NEXT: [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+; CHECK-NEXT: [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+; CHECK-NEXT: [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+; CHECK-NEXT: [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+; CHECK-NEXT: [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+; CHECK-NEXT: [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+; CHECK-NEXT: [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+; CHECK-NEXT: [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+; CHECK-NEXT: [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+; CHECK-NEXT: [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+; CHECK-NEXT: [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+; CHECK-NEXT: [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+; CHECK-NEXT: [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+; CHECK-NEXT: [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+; CHECK-NEXT: [[CTLZ0:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTLZ1:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTLZ2:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTLZ3:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTLZ4:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTLZ5:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTLZ6:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTLZ7:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD7]], i1 true)
+; CHECK-NEXT: [[CTLZ8:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD8]], i1 true)
+; CHECK-NEXT: [[CTLZ9:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD9]], i1 true)
+; CHECK-NEXT: [[CTLZ10:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD10]], i1 true)
+; CHECK-NEXT: [[CTLZ11:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD11]], i1 true)
+; CHECK-NEXT: [[CTLZ12:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD12]], i1 true)
+; CHECK-NEXT: [[CTLZ13:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD13]], i1 true)
+; CHECK-NEXT: [[CTLZ14:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD14]], i1 true)
+; CHECK-NEXT: [[CTLZ15:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD15]], i1 true)
+; CHECK-NEXT: [[CTLZ16:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD16]], i1 true)
+; CHECK-NEXT: [[CTLZ17:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD17]], i1 true)
+; CHECK-NEXT: [[CTLZ18:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD18]], i1 true)
+; CHECK-NEXT: [[CTLZ19:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD19]], i1 true)
+; CHECK-NEXT: [[CTLZ20:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD20]], i1 true)
+; CHECK-NEXT: [[CTLZ21:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD21]], i1 true)
+; CHECK-NEXT: [[CTLZ22:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD22]], i1 true)
+; CHECK-NEXT: [[CTLZ23:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD23]], i1 true)
+; CHECK-NEXT: [[CTLZ24:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD24]], i1 true)
+; CHECK-NEXT: [[CTLZ25:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD25]], i1 true)
+; CHECK-NEXT: [[CTLZ26:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD26]], i1 true)
+; CHECK-NEXT: [[CTLZ27:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD27]], i1 true)
+; CHECK-NEXT: [[CTLZ28:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD28]], i1 true)
+; CHECK-NEXT: [[CTLZ29:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD29]], i1 true)
+; CHECK-NEXT: [[CTLZ30:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD30]], i1 true)
+; CHECK-NEXT: [[CTLZ31:%.*]] = call i8 @llvm.ctlz.i8(i8 [[LD31]], i1 true)
+; CHECK-NEXT: store i8 [[CTLZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTLZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTLZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTLZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTLZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTLZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTLZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTLZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTLZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTLZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTLZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTLZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTLZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTLZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTLZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTLZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: store i8 [[CTLZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+; CHECK-NEXT: store i8 [[CTLZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+; CHECK-NEXT: store i8 [[CTLZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+; CHECK-NEXT: store i8 [[CTLZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+; CHECK-NEXT: store i8 [[CTLZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+; CHECK-NEXT: store i8 [[CTLZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+; CHECK-NEXT: store i8 [[CTLZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+; CHECK-NEXT: store i8 [[CTLZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+; CHECK-NEXT: store i8 [[CTLZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+; CHECK-NEXT: store i8 [[CTLZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+; CHECK-NEXT: store i8 [[CTLZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+; CHECK-NEXT: store i8 [[CTLZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+; CHECK-NEXT: store i8 [[CTLZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+; CHECK-NEXT: store i8 [[CTLZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+; CHECK-NEXT: store i8 [[CTLZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+; CHECK-NEXT: store i8 [[CTLZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+ %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+ %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+ %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+ %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+ %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+ %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+ %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+ %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+ %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+ %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+ %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+ %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+ %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+ %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+ %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+ %ctlz0 = call i8 @llvm.ctlz.i8(i8 %ld0, i1 -1)
+ %ctlz1 = call i8 @llvm.ctlz.i8(i8 %ld1, i1 -1)
+ %ctlz2 = call i8 @llvm.ctlz.i8(i8 %ld2, i1 -1)
+ %ctlz3 = call i8 @llvm.ctlz.i8(i8 %ld3, i1 -1)
+ %ctlz4 = call i8 @llvm.ctlz.i8(i8 %ld4, i1 -1)
+ %ctlz5 = call i8 @llvm.ctlz.i8(i8 %ld5, i1 -1)
+ %ctlz6 = call i8 @llvm.ctlz.i8(i8 %ld6, i1 -1)
+ %ctlz7 = call i8 @llvm.ctlz.i8(i8 %ld7, i1 -1)
+ %ctlz8 = call i8 @llvm.ctlz.i8(i8 %ld8, i1 -1)
+ %ctlz9 = call i8 @llvm.ctlz.i8(i8 %ld9, i1 -1)
+ %ctlz10 = call i8 @llvm.ctlz.i8(i8 %ld10, i1 -1)
+ %ctlz11 = call i8 @llvm.ctlz.i8(i8 %ld11, i1 -1)
+ %ctlz12 = call i8 @llvm.ctlz.i8(i8 %ld12, i1 -1)
+ %ctlz13 = call i8 @llvm.ctlz.i8(i8 %ld13, i1 -1)
+ %ctlz14 = call i8 @llvm.ctlz.i8(i8 %ld14, i1 -1)
+ %ctlz15 = call i8 @llvm.ctlz.i8(i8 %ld15, i1 -1)
+ %ctlz16 = call i8 @llvm.ctlz.i8(i8 %ld16, i1 -1)
+ %ctlz17 = call i8 @llvm.ctlz.i8(i8 %ld17, i1 -1)
+ %ctlz18 = call i8 @llvm.ctlz.i8(i8 %ld18, i1 -1)
+ %ctlz19 = call i8 @llvm.ctlz.i8(i8 %ld19, i1 -1)
+ %ctlz20 = call i8 @llvm.ctlz.i8(i8 %ld20, i1 -1)
+ %ctlz21 = call i8 @llvm.ctlz.i8(i8 %ld21, i1 -1)
+ %ctlz22 = call i8 @llvm.ctlz.i8(i8 %ld22, i1 -1)
+ %ctlz23 = call i8 @llvm.ctlz.i8(i8 %ld23, i1 -1)
+ %ctlz24 = call i8 @llvm.ctlz.i8(i8 %ld24, i1 -1)
+ %ctlz25 = call i8 @llvm.ctlz.i8(i8 %ld25, i1 -1)
+ %ctlz26 = call i8 @llvm.ctlz.i8(i8 %ld26, i1 -1)
+ %ctlz27 = call i8 @llvm.ctlz.i8(i8 %ld27, i1 -1)
+ %ctlz28 = call i8 @llvm.ctlz.i8(i8 %ld28, i1 -1)
+ %ctlz29 = call i8 @llvm.ctlz.i8(i8 %ld29, i1 -1)
+ %ctlz30 = call i8 @llvm.ctlz.i8(i8 %ld30, i1 -1)
+ %ctlz31 = call i8 @llvm.ctlz.i8(i8 %ld31, i1 -1)
+ store i8 %ctlz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %ctlz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %ctlz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %ctlz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %ctlz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %ctlz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %ctlz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %ctlz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %ctlz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %ctlz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %ctlz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %ctlz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %ctlz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %ctlz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %ctlz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %ctlz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ store i8 %ctlz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+ store i8 %ctlz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+ store i8 %ctlz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+ store i8 %ctlz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+ store i8 %ctlz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+ store i8 %ctlz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+ store i8 %ctlz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+ store i8 %ctlz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+ store i8 %ctlz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+ store i8 %ctlz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+ store i8 %ctlz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+ store i8 %ctlz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+ store i8 %ctlz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+ store i8 %ctlz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+ store i8 %ctlz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+ store i8 %ctlz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/X86/ctpop.ll b/test/Transforms/SLPVectorizer/X86/ctpop.ll
new file mode 100644
index 000000000000..b3cfc6a77de5
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/ctpop.ll
@@ -0,0 +1,403 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@src64 = common global [4 x i64] zeroinitializer, align 32
+@dst64 = common global [4 x i64] zeroinitializer, align 32
+@src32 = common global [8 x i32] zeroinitializer, align 32
+@dst32 = common global [8 x i32] zeroinitializer, align 32
+@src16 = common global [16 x i16] zeroinitializer, align 32
+@dst16 = common global [16 x i16] zeroinitializer, align 32
+@src8 = common global [32 x i8] zeroinitializer, align 32
+@dst8 = common global [32 x i8] zeroinitializer, align 32
+
+declare i64 @llvm.ctpop.i64(i64)
+declare i32 @llvm.ctpop.i32(i32)
+declare i16 @llvm.ctpop.i16(i16)
+declare i8 @llvm.ctpop.i8(i8)
+
+define void @ctpop_2i64() #0 {
+; CHECK-LABEL: @ctpop_2i64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP1]])
+; CHECK-NEXT: store <2 x i64> [[TMP2]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 8
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+ %ctpop0 = call i64 @llvm.ctpop.i64(i64 %ld0)
+ %ctpop1 = call i64 @llvm.ctpop.i64(i64 %ld1)
+ store i64 %ctpop0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+ store i64 %ctpop1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @ctpop_4i64() #0 {
+; SSE-LABEL: @ctpop_4i64(
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x i64>, <2 x i64>* bitcast ([4 x i64]* @src64 to <2 x i64>*), align 4
+; SSE-NEXT: [[TMP2:%.*]] = load <2 x i64>, <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE-NEXT: [[TMP3:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP1]])
+; SSE-NEXT: [[TMP4:%.*]] = call <2 x i64> @llvm.ctpop.v2i64(<2 x i64> [[TMP2]])
+; SSE-NEXT: store <2 x i64> [[TMP3]], <2 x i64>* bitcast ([4 x i64]* @dst64 to <2 x i64>*), align 4
+; SSE-NEXT: store <2 x i64> [[TMP4]], <2 x i64>* bitcast (i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2) to <2 x i64>*), align 4
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @ctpop_4i64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x i64>, <4 x i64>* bitcast ([4 x i64]* @src64 to <4 x i64>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x i64> @llvm.ctpop.v4i64(<4 x i64> [[TMP1]])
+; AVX-NEXT: store <4 x i64> [[TMP2]], <4 x i64>* bitcast ([4 x i64]* @dst64 to <4 x i64>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+ %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+ %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+ %ctpop0 = call i64 @llvm.ctpop.i64(i64 %ld0)
+ %ctpop1 = call i64 @llvm.ctpop.i64(i64 %ld1)
+ %ctpop2 = call i64 @llvm.ctpop.i64(i64 %ld2)
+ %ctpop3 = call i64 @llvm.ctpop.i64(i64 %ld3)
+ store i64 %ctpop0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+ store i64 %ctpop1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+ store i64 %ctpop2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+ store i64 %ctpop3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+ ret void
+}
+
+define void @ctpop_4i32() #0 {
+; CHECK-LABEL: @ctpop_4i32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
+; CHECK-NEXT: store <4 x i32> [[TMP2]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+ %ctpop0 = call i32 @llvm.ctpop.i32(i32 %ld0)
+ %ctpop1 = call i32 @llvm.ctpop.i32(i32 %ld1)
+ %ctpop2 = call i32 @llvm.ctpop.i32(i32 %ld2)
+ %ctpop3 = call i32 @llvm.ctpop.i32(i32 %ld3)
+ store i32 %ctpop0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+ store i32 %ctpop1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+ store i32 %ctpop2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+ store i32 %ctpop3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @ctpop_8i32() #0 {
+; SSE-LABEL: @ctpop_8i32(
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* bitcast ([8 x i32]* @src32 to <4 x i32>*), align 2
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x i32>, <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT: [[TMP3:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP1]])
+; SSE-NEXT: [[TMP4:%.*]] = call <4 x i32> @llvm.ctpop.v4i32(<4 x i32> [[TMP2]])
+; SSE-NEXT: store <4 x i32> [[TMP3]], <4 x i32>* bitcast ([8 x i32]* @dst32 to <4 x i32>*), align 2
+; SSE-NEXT: store <4 x i32> [[TMP4]], <4 x i32>* bitcast (i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4) to <4 x i32>*), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @ctpop_8i32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x i32>, <8 x i32>* bitcast ([8 x i32]* @src32 to <8 x i32>*), align 2
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x i32> @llvm.ctpop.v8i32(<8 x i32> [[TMP1]])
+; AVX-NEXT: store <8 x i32> [[TMP2]], <8 x i32>* bitcast ([8 x i32]* @dst32 to <8 x i32>*), align 2
+; AVX-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+ %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+ %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+ %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+ %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+ %ctpop0 = call i32 @llvm.ctpop.i32(i32 %ld0)
+ %ctpop1 = call i32 @llvm.ctpop.i32(i32 %ld1)
+ %ctpop2 = call i32 @llvm.ctpop.i32(i32 %ld2)
+ %ctpop3 = call i32 @llvm.ctpop.i32(i32 %ld3)
+ %ctpop4 = call i32 @llvm.ctpop.i32(i32 %ld4)
+ %ctpop5 = call i32 @llvm.ctpop.i32(i32 %ld5)
+ %ctpop6 = call i32 @llvm.ctpop.i32(i32 %ld6)
+ %ctpop7 = call i32 @llvm.ctpop.i32(i32 %ld7)
+ store i32 %ctpop0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+ store i32 %ctpop1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+ store i32 %ctpop2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+ store i32 %ctpop3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+ store i32 %ctpop4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+ store i32 %ctpop5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+ store i32 %ctpop6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+ store i32 %ctpop7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+ ret void
+}
+
+define void @ctpop_8i16() #0 {
+; CHECK-LABEL: @ctpop_8i16(
+; CHECK-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; CHECK-NEXT: [[TMP2:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[TMP1]])
+; CHECK-NEXT: store <8 x i16> [[TMP2]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ctpop0 = call i16 @llvm.ctpop.i16(i16 %ld0)
+ %ctpop1 = call i16 @llvm.ctpop.i16(i16 %ld1)
+ %ctpop2 = call i16 @llvm.ctpop.i16(i16 %ld2)
+ %ctpop3 = call i16 @llvm.ctpop.i16(i16 %ld3)
+ %ctpop4 = call i16 @llvm.ctpop.i16(i16 %ld4)
+ %ctpop5 = call i16 @llvm.ctpop.i16(i16 %ld5)
+ %ctpop6 = call i16 @llvm.ctpop.i16(i16 %ld6)
+ %ctpop7 = call i16 @llvm.ctpop.i16(i16 %ld7)
+ store i16 %ctpop0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %ctpop1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %ctpop2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %ctpop3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %ctpop4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %ctpop5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %ctpop6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %ctpop7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ ret void
+}
+
+define void @ctpop_16i16() #0 {
+; SSE-LABEL: @ctpop_16i16(
+; SSE-NEXT: [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* bitcast ([16 x i16]* @src16 to <8 x i16>*), align 2
+; SSE-NEXT: [[TMP2:%.*]] = load <8 x i16>, <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT: [[TMP3:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[TMP1]])
+; SSE-NEXT: [[TMP4:%.*]] = call <8 x i16> @llvm.ctpop.v8i16(<8 x i16> [[TMP2]])
+; SSE-NEXT: store <8 x i16> [[TMP3]], <8 x i16>* bitcast ([16 x i16]* @dst16 to <8 x i16>*), align 2
+; SSE-NEXT: store <8 x i16> [[TMP4]], <8 x i16>* bitcast (i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8) to <8 x i16>*), align 2
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @ctpop_16i16(
+; AVX-NEXT: [[TMP1:%.*]] = load <16 x i16>, <16 x i16>* bitcast ([16 x i16]* @src16 to <16 x i16>*), align 2
+; AVX-NEXT: [[TMP2:%.*]] = call <16 x i16> @llvm.ctpop.v16i16(<16 x i16> [[TMP1]])
+; AVX-NEXT: store <16 x i16> [[TMP2]], <16 x i16>* bitcast ([16 x i16]* @dst16 to <16 x i16>*), align 2
+; AVX-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+ %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+ %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+ %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+ %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+ %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+ %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+ %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+ %ctpop0 = call i16 @llvm.ctpop.i16(i16 %ld0)
+ %ctpop1 = call i16 @llvm.ctpop.i16(i16 %ld1)
+ %ctpop2 = call i16 @llvm.ctpop.i16(i16 %ld2)
+ %ctpop3 = call i16 @llvm.ctpop.i16(i16 %ld3)
+ %ctpop4 = call i16 @llvm.ctpop.i16(i16 %ld4)
+ %ctpop5 = call i16 @llvm.ctpop.i16(i16 %ld5)
+ %ctpop6 = call i16 @llvm.ctpop.i16(i16 %ld6)
+ %ctpop7 = call i16 @llvm.ctpop.i16(i16 %ld7)
+ %ctpop8 = call i16 @llvm.ctpop.i16(i16 %ld8)
+ %ctpop9 = call i16 @llvm.ctpop.i16(i16 %ld9)
+ %ctpop10 = call i16 @llvm.ctpop.i16(i16 %ld10)
+ %ctpop11 = call i16 @llvm.ctpop.i16(i16 %ld11)
+ %ctpop12 = call i16 @llvm.ctpop.i16(i16 %ld12)
+ %ctpop13 = call i16 @llvm.ctpop.i16(i16 %ld13)
+ %ctpop14 = call i16 @llvm.ctpop.i16(i16 %ld14)
+ %ctpop15 = call i16 @llvm.ctpop.i16(i16 %ld15)
+ store i16 %ctpop0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %ctpop1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %ctpop2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %ctpop3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %ctpop4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %ctpop5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %ctpop6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %ctpop7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ store i16 %ctpop8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+ store i16 %ctpop9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+ store i16 %ctpop10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+ store i16 %ctpop11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+ store i16 %ctpop12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+ store i16 %ctpop13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+ store i16 %ctpop14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+ store i16 %ctpop15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+ ret void
+}
+
+define void @ctpop_16i8() #0 {
+; CHECK-LABEL: @ctpop_16i8(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT: [[TMP2:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT: store <16 x i8> [[TMP2]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ctpop0 = call i8 @llvm.ctpop.i8(i8 %ld0)
+ %ctpop1 = call i8 @llvm.ctpop.i8(i8 %ld1)
+ %ctpop2 = call i8 @llvm.ctpop.i8(i8 %ld2)
+ %ctpop3 = call i8 @llvm.ctpop.i8(i8 %ld3)
+ %ctpop4 = call i8 @llvm.ctpop.i8(i8 %ld4)
+ %ctpop5 = call i8 @llvm.ctpop.i8(i8 %ld5)
+ %ctpop6 = call i8 @llvm.ctpop.i8(i8 %ld6)
+ %ctpop7 = call i8 @llvm.ctpop.i8(i8 %ld7)
+ %ctpop8 = call i8 @llvm.ctpop.i8(i8 %ld8)
+ %ctpop9 = call i8 @llvm.ctpop.i8(i8 %ld9)
+ %ctpop10 = call i8 @llvm.ctpop.i8(i8 %ld10)
+ %ctpop11 = call i8 @llvm.ctpop.i8(i8 %ld11)
+ %ctpop12 = call i8 @llvm.ctpop.i8(i8 %ld12)
+ %ctpop13 = call i8 @llvm.ctpop.i8(i8 %ld13)
+ %ctpop14 = call i8 @llvm.ctpop.i8(i8 %ld14)
+ %ctpop15 = call i8 @llvm.ctpop.i8(i8 %ld15)
+ store i8 %ctpop0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %ctpop1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %ctpop2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %ctpop3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %ctpop4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %ctpop5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %ctpop6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %ctpop7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %ctpop8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %ctpop9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %ctpop10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %ctpop11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %ctpop12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %ctpop13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %ctpop14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %ctpop15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ ret void
+}
+
+define void @ctpop_32i8() #0 {
+; CHECK-LABEL: @ctpop_32i8(
+; CHECK-NEXT: [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* bitcast ([32 x i8]* @src8 to <16 x i8>*), align 1
+; CHECK-NEXT: [[TMP2:%.*]] = load <16 x i8>, <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16) to <16 x i8>*), align 1
+; CHECK-NEXT: [[TMP3:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP1]])
+; CHECK-NEXT: [[TMP4:%.*]] = call <16 x i8> @llvm.ctpop.v16i8(<16 x i8> [[TMP2]])
+; CHECK-NEXT: store <16 x i8> [[TMP3]], <16 x i8>* bitcast ([32 x i8]* @dst8 to <16 x i8>*), align 1
+; CHECK-NEXT: store <16 x i8> [[TMP4]], <16 x i8>* bitcast (i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16) to <16 x i8>*), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+ %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+ %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+ %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+ %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+ %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+ %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+ %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+ %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+ %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+ %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+ %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+ %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+ %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+ %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+ %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+ %ctpop0 = call i8 @llvm.ctpop.i8(i8 %ld0)
+ %ctpop1 = call i8 @llvm.ctpop.i8(i8 %ld1)
+ %ctpop2 = call i8 @llvm.ctpop.i8(i8 %ld2)
+ %ctpop3 = call i8 @llvm.ctpop.i8(i8 %ld3)
+ %ctpop4 = call i8 @llvm.ctpop.i8(i8 %ld4)
+ %ctpop5 = call i8 @llvm.ctpop.i8(i8 %ld5)
+ %ctpop6 = call i8 @llvm.ctpop.i8(i8 %ld6)
+ %ctpop7 = call i8 @llvm.ctpop.i8(i8 %ld7)
+ %ctpop8 = call i8 @llvm.ctpop.i8(i8 %ld8)
+ %ctpop9 = call i8 @llvm.ctpop.i8(i8 %ld9)
+ %ctpop10 = call i8 @llvm.ctpop.i8(i8 %ld10)
+ %ctpop11 = call i8 @llvm.ctpop.i8(i8 %ld11)
+ %ctpop12 = call i8 @llvm.ctpop.i8(i8 %ld12)
+ %ctpop13 = call i8 @llvm.ctpop.i8(i8 %ld13)
+ %ctpop14 = call i8 @llvm.ctpop.i8(i8 %ld14)
+ %ctpop15 = call i8 @llvm.ctpop.i8(i8 %ld15)
+ %ctpop16 = call i8 @llvm.ctpop.i8(i8 %ld16)
+ %ctpop17 = call i8 @llvm.ctpop.i8(i8 %ld17)
+ %ctpop18 = call i8 @llvm.ctpop.i8(i8 %ld18)
+ %ctpop19 = call i8 @llvm.ctpop.i8(i8 %ld19)
+ %ctpop20 = call i8 @llvm.ctpop.i8(i8 %ld20)
+ %ctpop21 = call i8 @llvm.ctpop.i8(i8 %ld21)
+ %ctpop22 = call i8 @llvm.ctpop.i8(i8 %ld22)
+ %ctpop23 = call i8 @llvm.ctpop.i8(i8 %ld23)
+ %ctpop24 = call i8 @llvm.ctpop.i8(i8 %ld24)
+ %ctpop25 = call i8 @llvm.ctpop.i8(i8 %ld25)
+ %ctpop26 = call i8 @llvm.ctpop.i8(i8 %ld26)
+ %ctpop27 = call i8 @llvm.ctpop.i8(i8 %ld27)
+ %ctpop28 = call i8 @llvm.ctpop.i8(i8 %ld28)
+ %ctpop29 = call i8 @llvm.ctpop.i8(i8 %ld29)
+ %ctpop30 = call i8 @llvm.ctpop.i8(i8 %ld30)
+ %ctpop31 = call i8 @llvm.ctpop.i8(i8 %ld31)
+ store i8 %ctpop0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %ctpop1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %ctpop2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %ctpop3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %ctpop4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %ctpop5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %ctpop6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %ctpop7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %ctpop8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %ctpop9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %ctpop10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %ctpop11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %ctpop12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %ctpop13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %ctpop14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %ctpop15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ store i8 %ctpop16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+ store i8 %ctpop17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+ store i8 %ctpop18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+ store i8 %ctpop19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+ store i8 %ctpop20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+ store i8 %ctpop21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+ store i8 %ctpop22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+ store i8 %ctpop23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+ store i8 %ctpop24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+ store i8 %ctpop25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+ store i8 %ctpop26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+ store i8 %ctpop27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+ store i8 %ctpop28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+ store i8 %ctpop29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+ store i8 %ctpop30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+ store i8 %ctpop31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/SLPVectorizer/X86/cttz.ll b/test/Transforms/SLPVectorizer/X86/cttz.ll
new file mode 100644
index 000000000000..1ede06524935
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/cttz.ll
@@ -0,0 +1,1222 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@src64 = common global [4 x i64] zeroinitializer, align 32
+@dst64 = common global [4 x i64] zeroinitializer, align 32
+@src32 = common global [8 x i32] zeroinitializer, align 32
+@dst32 = common global [8 x i32] zeroinitializer, align 32
+@src16 = common global [16 x i16] zeroinitializer, align 32
+@dst16 = common global [16 x i16] zeroinitializer, align 32
+@src8 = common global [32 x i8] zeroinitializer, align 32
+@dst8 = common global [32 x i8] zeroinitializer, align 32
+
+declare i64 @llvm.cttz.i64(i64, i1)
+declare i32 @llvm.cttz.i32(i32, i1)
+declare i16 @llvm.cttz.i16(i16, i1)
+declare i8 @llvm.cttz.i8(i8, i1)
+
+;
+; CTTZ
+;
+
+define void @cttz_2i64() #0 {
+; CHECK-LABEL: @cttz_2i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+ %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
+ %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
+ store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+ store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @cttz_4i64() #0 {
+; CHECK-LABEL: @cttz_4i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 false)
+; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; CHECK-NEXT: store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; CHECK-NEXT: store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+ %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+ %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+ %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 0)
+ %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 0)
+ %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 0)
+ %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 0)
+ store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+ store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+ store i64 %cttz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+ store i64 %cttz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+ ret void
+}
+
+define void @cttz_4i32() #0 {
+; CHECK-LABEL: @cttz_4i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
+; CHECK-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+; CHECK-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+; CHECK-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+; CHECK-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+ %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
+ %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
+ %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
+ %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 0)
+ store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+ store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+ store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+ store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @cttz_8i32() #0 {
+; CHECK-LABEL: @cttz_8i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 false)
+; CHECK-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+; CHECK-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+; CHECK-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+; CHECK-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+; CHECK-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+; CHECK-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+; CHECK-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+; CHECK-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+ %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+ %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+ %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+ %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+ %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 0)
+ %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 0)
+ %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 0)
+ %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 0)
+ %cttz4 = call i32 @llvm.cttz.i32(i32 %ld4, i1 0)
+ %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 0)
+ %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 0)
+ %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 0)
+ store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+ store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+ store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+ store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+ store i32 %cttz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+ store i32 %cttz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+ store i32 %cttz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+ store i32 %cttz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+ ret void
+}
+
+define void @cttz_8i16() #0 {
+; CHECK-LABEL: @cttz_8i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 false)
+; CHECK-NEXT: store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
+ %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
+ %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
+ %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 0)
+ %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 0)
+ %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 0)
+ %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 0)
+ %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 0)
+ store i16 %cttz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %cttz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %cttz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %cttz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %cttz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %cttz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %cttz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %cttz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ ret void
+}
+
+define void @cttz_16i16() #0 {
+; CHECK-LABEL: @cttz_16i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+; CHECK-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+; CHECK-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+; CHECK-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+; CHECK-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+; CHECK-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+; CHECK-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+; CHECK-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 false)
+; CHECK-NEXT: [[CTTZ8:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD8]], i1 false)
+; CHECK-NEXT: [[CTTZ9:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD9]], i1 false)
+; CHECK-NEXT: [[CTTZ10:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD10]], i1 false)
+; CHECK-NEXT: [[CTTZ11:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD11]], i1 false)
+; CHECK-NEXT: [[CTTZ12:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD12]], i1 false)
+; CHECK-NEXT: [[CTTZ13:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD13]], i1 false)
+; CHECK-NEXT: [[CTTZ14:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD14]], i1 false)
+; CHECK-NEXT: [[CTTZ15:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD15]], i1 false)
+; CHECK-NEXT: store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: store i16 [[CTTZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+; CHECK-NEXT: store i16 [[CTTZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+; CHECK-NEXT: store i16 [[CTTZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+; CHECK-NEXT: store i16 [[CTTZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+; CHECK-NEXT: store i16 [[CTTZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+; CHECK-NEXT: store i16 [[CTTZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+; CHECK-NEXT: store i16 [[CTTZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+; CHECK-NEXT: store i16 [[CTTZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+ %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+ %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+ %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+ %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+ %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+ %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+ %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+ %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 0)
+ %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 0)
+ %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 0)
+ %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 0)
+ %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 0)
+ %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 0)
+ %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 0)
+ %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 0)
+ %cttz8 = call i16 @llvm.cttz.i16(i16 %ld8, i1 0)
+ %cttz9 = call i16 @llvm.cttz.i16(i16 %ld9, i1 0)
+ %cttz10 = call i16 @llvm.cttz.i16(i16 %ld10, i1 0)
+ %cttz11 = call i16 @llvm.cttz.i16(i16 %ld11, i1 0)
+ %cttz12 = call i16 @llvm.cttz.i16(i16 %ld12, i1 0)
+ %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 0)
+ %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 0)
+ %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 0)
+ store i16 %cttz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %cttz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %cttz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %cttz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %cttz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %cttz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %cttz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %cttz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ store i16 %cttz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+ store i16 %cttz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+ store i16 %cttz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+ store i16 %cttz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+ store i16 %cttz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+ store i16 %cttz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+ store i16 %cttz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+ store i16 %cttz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+ ret void
+}
+
+define void @cttz_16i8() #0 {
+; CHECK-LABEL: @cttz_16i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 false)
+; CHECK-NEXT: [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 false)
+; CHECK-NEXT: [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 false)
+; CHECK-NEXT: [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 false)
+; CHECK-NEXT: [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 false)
+; CHECK-NEXT: [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 false)
+; CHECK-NEXT: [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 false)
+; CHECK-NEXT: [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 false)
+; CHECK-NEXT: [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 false)
+; CHECK-NEXT: store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
+ %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
+ %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
+ %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 0)
+ %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 0)
+ %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 0)
+ %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 0)
+ %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 0)
+ %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 0)
+ %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 0)
+ %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 0)
+ %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 0)
+ %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 0)
+ %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 0)
+ %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 0)
+ %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 0)
+ store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ ret void
+}
+
+define void @cttz_32i8() #0 {
+; CHECK-LABEL: @cttz_32i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+; CHECK-NEXT: [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+; CHECK-NEXT: [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+; CHECK-NEXT: [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+; CHECK-NEXT: [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+; CHECK-NEXT: [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+; CHECK-NEXT: [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+; CHECK-NEXT: [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+; CHECK-NEXT: [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+; CHECK-NEXT: [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+; CHECK-NEXT: [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+; CHECK-NEXT: [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+; CHECK-NEXT: [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+; CHECK-NEXT: [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+; CHECK-NEXT: [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+; CHECK-NEXT: [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 false)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 false)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 false)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 false)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 false)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 false)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 false)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 false)
+; CHECK-NEXT: [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 false)
+; CHECK-NEXT: [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 false)
+; CHECK-NEXT: [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 false)
+; CHECK-NEXT: [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 false)
+; CHECK-NEXT: [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 false)
+; CHECK-NEXT: [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 false)
+; CHECK-NEXT: [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 false)
+; CHECK-NEXT: [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 false)
+; CHECK-NEXT: [[CTTZ16:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD16]], i1 false)
+; CHECK-NEXT: [[CTTZ17:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD17]], i1 false)
+; CHECK-NEXT: [[CTTZ18:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD18]], i1 false)
+; CHECK-NEXT: [[CTTZ19:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD19]], i1 false)
+; CHECK-NEXT: [[CTTZ20:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD20]], i1 false)
+; CHECK-NEXT: [[CTTZ21:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD21]], i1 false)
+; CHECK-NEXT: [[CTTZ22:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD22]], i1 false)
+; CHECK-NEXT: [[CTTZ23:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD23]], i1 false)
+; CHECK-NEXT: [[CTTZ24:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD24]], i1 false)
+; CHECK-NEXT: [[CTTZ25:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD25]], i1 false)
+; CHECK-NEXT: [[CTTZ26:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD26]], i1 false)
+; CHECK-NEXT: [[CTTZ27:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD27]], i1 false)
+; CHECK-NEXT: [[CTTZ28:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD28]], i1 false)
+; CHECK-NEXT: [[CTTZ29:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD29]], i1 false)
+; CHECK-NEXT: [[CTTZ30:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD30]], i1 false)
+; CHECK-NEXT: [[CTTZ31:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD31]], i1 false)
+; CHECK-NEXT: store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: store i8 [[CTTZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+; CHECK-NEXT: store i8 [[CTTZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+; CHECK-NEXT: store i8 [[CTTZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+; CHECK-NEXT: store i8 [[CTTZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+; CHECK-NEXT: store i8 [[CTTZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+; CHECK-NEXT: store i8 [[CTTZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+; CHECK-NEXT: store i8 [[CTTZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+; CHECK-NEXT: store i8 [[CTTZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+; CHECK-NEXT: store i8 [[CTTZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+; CHECK-NEXT: store i8 [[CTTZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+; CHECK-NEXT: store i8 [[CTTZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+; CHECK-NEXT: store i8 [[CTTZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+; CHECK-NEXT: store i8 [[CTTZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+; CHECK-NEXT: store i8 [[CTTZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+; CHECK-NEXT: store i8 [[CTTZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+; CHECK-NEXT: store i8 [[CTTZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+ %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+ %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+ %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+ %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+ %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+ %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+ %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+ %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+ %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+ %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+ %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+ %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+ %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+ %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+ %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+ %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 0)
+ %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 0)
+ %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 0)
+ %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 0)
+ %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 0)
+ %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 0)
+ %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 0)
+ %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 0)
+ %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 0)
+ %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 0)
+ %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 0)
+ %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 0)
+ %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 0)
+ %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 0)
+ %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 0)
+ %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 0)
+ %cttz16 = call i8 @llvm.cttz.i8(i8 %ld16, i1 0)
+ %cttz17 = call i8 @llvm.cttz.i8(i8 %ld17, i1 0)
+ %cttz18 = call i8 @llvm.cttz.i8(i8 %ld18, i1 0)
+ %cttz19 = call i8 @llvm.cttz.i8(i8 %ld19, i1 0)
+ %cttz20 = call i8 @llvm.cttz.i8(i8 %ld20, i1 0)
+ %cttz21 = call i8 @llvm.cttz.i8(i8 %ld21, i1 0)
+ %cttz22 = call i8 @llvm.cttz.i8(i8 %ld22, i1 0)
+ %cttz23 = call i8 @llvm.cttz.i8(i8 %ld23, i1 0)
+ %cttz24 = call i8 @llvm.cttz.i8(i8 %ld24, i1 0)
+ %cttz25 = call i8 @llvm.cttz.i8(i8 %ld25, i1 0)
+ %cttz26 = call i8 @llvm.cttz.i8(i8 %ld26, i1 0)
+ %cttz27 = call i8 @llvm.cttz.i8(i8 %ld27, i1 0)
+ %cttz28 = call i8 @llvm.cttz.i8(i8 %ld28, i1 0)
+ %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 0)
+ %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 0)
+ %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 0)
+ store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ store i8 %cttz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+ store i8 %cttz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+ store i8 %cttz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+ store i8 %cttz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+ store i8 %cttz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+ store i8 %cttz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+ store i8 %cttz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+ store i8 %cttz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+ store i8 %cttz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+ store i8 %cttz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+ store i8 %cttz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+ store i8 %cttz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+ store i8 %cttz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+ store i8 %cttz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+ store i8 %cttz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+ store i8 %cttz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+ ret void
+}
+
+;
+; CTTZ_ZERO_UNDEF
+;
+
+define void @cttz_undef_2i64() #0 {
+; CHECK-LABEL: @cttz_undef_2i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 0), align 8
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i32 0, i64 1), align 8
+ %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
+ %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
+ store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 0), align 8
+ store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @cttz_undef_4i64() #0 {
+; CHECK-LABEL: @cttz_undef_4i64(
+; CHECK-NEXT: [[LD0:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i64 @llvm.cttz.i64(i64 [[LD3]], i1 true)
+; CHECK-NEXT: store i64 [[CTTZ0]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+; CHECK-NEXT: store i64 [[CTTZ1]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+; CHECK-NEXT: store i64 [[CTTZ2]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+; CHECK-NEXT: store i64 [[CTTZ3]], i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 0), align 4
+ %ld1 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 1), align 4
+ %ld2 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 2), align 4
+ %ld3 = load i64, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @src64, i64 0, i64 3), align 4
+ %cttz0 = call i64 @llvm.cttz.i64(i64 %ld0, i1 -1)
+ %cttz1 = call i64 @llvm.cttz.i64(i64 %ld1, i1 -1)
+ %cttz2 = call i64 @llvm.cttz.i64(i64 %ld2, i1 -1)
+ %cttz3 = call i64 @llvm.cttz.i64(i64 %ld3, i1 -1)
+ store i64 %cttz0, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 0), align 4
+ store i64 %cttz1, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 1), align 4
+ store i64 %cttz2, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 2), align 4
+ store i64 %cttz3, i64* getelementptr inbounds ([4 x i64], [4 x i64]* @dst64, i64 0, i64 3), align 4
+ ret void
+}
+
+define void @cttz_undef_4i32() #0 {
+; CHECK-LABEL: @cttz_undef_4i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
+; CHECK-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+; CHECK-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+; CHECK-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+; CHECK-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 4
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 4
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 4
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 4
+ %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
+ %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
+ %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
+ %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 -1)
+ store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 4
+ store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 4
+ store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 4
+ store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @cttz_undef_8i32() #0 {
+; CHECK-LABEL: @cttz_undef_8i32(
+; CHECK-NEXT: [[LD0:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i32 @llvm.cttz.i32(i32 [[LD7]], i1 true)
+; CHECK-NEXT: store i32 [[CTTZ0]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+; CHECK-NEXT: store i32 [[CTTZ1]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+; CHECK-NEXT: store i32 [[CTTZ2]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+; CHECK-NEXT: store i32 [[CTTZ3]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+; CHECK-NEXT: store i32 [[CTTZ4]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+; CHECK-NEXT: store i32 [[CTTZ5]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+; CHECK-NEXT: store i32 [[CTTZ6]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+; CHECK-NEXT: store i32 [[CTTZ7]], i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 0), align 2
+ %ld1 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 1), align 2
+ %ld2 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 2), align 2
+ %ld3 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 3), align 2
+ %ld4 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 4), align 2
+ %ld5 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 5), align 2
+ %ld6 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 6), align 2
+ %ld7 = load i32, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @src32, i32 0, i64 7), align 2
+ %cttz0 = call i32 @llvm.cttz.i32(i32 %ld0, i1 -1)
+ %cttz1 = call i32 @llvm.cttz.i32(i32 %ld1, i1 -1)
+ %cttz2 = call i32 @llvm.cttz.i32(i32 %ld2, i1 -1)
+ %cttz3 = call i32 @llvm.cttz.i32(i32 %ld3, i1 -1)
+ %cttz4 = call i32 @llvm.cttz.i32(i32 %ld4, i1 -1)
+ %cttz5 = call i32 @llvm.cttz.i32(i32 %ld5, i1 -1)
+ %cttz6 = call i32 @llvm.cttz.i32(i32 %ld6, i1 -1)
+ %cttz7 = call i32 @llvm.cttz.i32(i32 %ld7, i1 -1)
+ store i32 %cttz0, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 0), align 2
+ store i32 %cttz1, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 1), align 2
+ store i32 %cttz2, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 2), align 2
+ store i32 %cttz3, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 3), align 2
+ store i32 %cttz4, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 4), align 2
+ store i32 %cttz5, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 5), align 2
+ store i32 %cttz6, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 6), align 2
+ store i32 %cttz7, i32* getelementptr inbounds ([8 x i32], [8 x i32]* @dst32, i32 0, i64 7), align 2
+ ret void
+}
+
+define void @cttz_undef_8i16() #0 {
+; CHECK-LABEL: @cttz_undef_8i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 true)
+; CHECK-NEXT: store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
+ %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
+ %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
+ %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 -1)
+ %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 -1)
+ %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 -1)
+ %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 -1)
+ %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 -1)
+ store i16 %cttz0, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %cttz1, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %cttz2, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %cttz3, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %cttz4, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %cttz5, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %cttz6, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %cttz7, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ ret void
+}
+
+define void @cttz_undef_16i16() #0 {
+; CHECK-LABEL: @cttz_undef_16i16(
+; CHECK-NEXT: [[LD0:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+; CHECK-NEXT: [[LD1:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+; CHECK-NEXT: [[LD2:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+; CHECK-NEXT: [[LD3:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+; CHECK-NEXT: [[LD4:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+; CHECK-NEXT: [[LD5:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+; CHECK-NEXT: [[LD6:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+; CHECK-NEXT: [[LD7:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+; CHECK-NEXT: [[LD8:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+; CHECK-NEXT: [[LD9:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+; CHECK-NEXT: [[LD10:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+; CHECK-NEXT: [[LD11:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+; CHECK-NEXT: [[LD12:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+; CHECK-NEXT: [[LD13:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+; CHECK-NEXT: [[LD14:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+; CHECK-NEXT: [[LD15:%.*]] = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD7]], i1 true)
+; CHECK-NEXT: [[CTTZ8:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD8]], i1 true)
+; CHECK-NEXT: [[CTTZ9:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD9]], i1 true)
+; CHECK-NEXT: [[CTTZ10:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD10]], i1 true)
+; CHECK-NEXT: [[CTTZ11:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD11]], i1 true)
+; CHECK-NEXT: [[CTTZ12:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD12]], i1 true)
+; CHECK-NEXT: [[CTTZ13:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD13]], i1 true)
+; CHECK-NEXT: [[CTTZ14:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD14]], i1 true)
+; CHECK-NEXT: [[CTTZ15:%.*]] = call i16 @llvm.cttz.i16(i16 [[LD15]], i1 true)
+; CHECK-NEXT: store i16 [[CTTZ0]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+; CHECK-NEXT: store i16 [[CTTZ1]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+; CHECK-NEXT: store i16 [[CTTZ2]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+; CHECK-NEXT: store i16 [[CTTZ3]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+; CHECK-NEXT: store i16 [[CTTZ4]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+; CHECK-NEXT: store i16 [[CTTZ5]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+; CHECK-NEXT: store i16 [[CTTZ6]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+; CHECK-NEXT: store i16 [[CTTZ7]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+; CHECK-NEXT: store i16 [[CTTZ8]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+; CHECK-NEXT: store i16 [[CTTZ9]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+; CHECK-NEXT: store i16 [[CTTZ10]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+; CHECK-NEXT: store i16 [[CTTZ11]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+; CHECK-NEXT: store i16 [[CTTZ12]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+; CHECK-NEXT: store i16 [[CTTZ13]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+; CHECK-NEXT: store i16 [[CTTZ14]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+; CHECK-NEXT: store i16 [[CTTZ15]], i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 0), align 2
+ %ld1 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 1), align 2
+ %ld2 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 2), align 2
+ %ld3 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 3), align 2
+ %ld4 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 4), align 2
+ %ld5 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 5), align 2
+ %ld6 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 6), align 2
+ %ld7 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 7), align 2
+ %ld8 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 8), align 2
+ %ld9 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 9), align 2
+ %ld10 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 10), align 2
+ %ld11 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 11), align 2
+ %ld12 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 12), align 2
+ %ld13 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 13), align 2
+ %ld14 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 14), align 2
+ %ld15 = load i16, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @src16, i16 0, i64 15), align 2
+ %cttz0 = call i16 @llvm.cttz.i16(i16 %ld0, i1 -1)
+ %cttz1 = call i16 @llvm.cttz.i16(i16 %ld1, i1 -1)
+ %cttz2 = call i16 @llvm.cttz.i16(i16 %ld2, i1 -1)
+ %cttz3 = call i16 @llvm.cttz.i16(i16 %ld3, i1 -1)
+ %cttz4 = call i16 @llvm.cttz.i16(i16 %ld4, i1 -1)
+ %cttz5 = call i16 @llvm.cttz.i16(i16 %ld5, i1 -1)
+ %cttz6 = call i16 @llvm.cttz.i16(i16 %ld6, i1 -1)
+ %cttz7 = call i16 @llvm.cttz.i16(i16 %ld7, i1 -1)
+ %cttz8 = call i16 @llvm.cttz.i16(i16 %ld8, i1 -1)
+ %cttz9 = call i16 @llvm.cttz.i16(i16 %ld9, i1 -1)
+ %cttz10 = call i16 @llvm.cttz.i16(i16 %ld10, i1 -1)
+ %cttz11 = call i16 @llvm.cttz.i16(i16 %ld11, i1 -1)
+ %cttz12 = call i16 @llvm.cttz.i16(i16 %ld12, i1 -1)
+ %cttz13 = call i16 @llvm.cttz.i16(i16 %ld13, i1 -1)
+ %cttz14 = call i16 @llvm.cttz.i16(i16 %ld14, i1 -1)
+ %cttz15 = call i16 @llvm.cttz.i16(i16 %ld15, i1 -1)
+ store i16 %cttz0 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 0), align 2
+ store i16 %cttz1 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 1), align 2
+ store i16 %cttz2 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 2), align 2
+ store i16 %cttz3 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 3), align 2
+ store i16 %cttz4 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 4), align 2
+ store i16 %cttz5 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 5), align 2
+ store i16 %cttz6 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 6), align 2
+ store i16 %cttz7 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 7), align 2
+ store i16 %cttz8 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 8), align 2
+ store i16 %cttz9 , i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 9), align 2
+ store i16 %cttz10, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 10), align 2
+ store i16 %cttz11, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 11), align 2
+ store i16 %cttz12, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 12), align 2
+ store i16 %cttz13, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 13), align 2
+ store i16 %cttz14, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 14), align 2
+ store i16 %cttz15, i16* getelementptr inbounds ([16 x i16], [16 x i16]* @dst16, i16 0, i64 15), align 2
+ ret void
+}
+
+define void @cttz_undef_16i8() #0 {
+; CHECK-LABEL: @cttz_undef_16i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 true)
+; CHECK-NEXT: [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 true)
+; CHECK-NEXT: [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 true)
+; CHECK-NEXT: [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 true)
+; CHECK-NEXT: [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 true)
+; CHECK-NEXT: [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 true)
+; CHECK-NEXT: [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 true)
+; CHECK-NEXT: [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 true)
+; CHECK-NEXT: [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 true)
+; CHECK-NEXT: store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
+ %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
+ %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
+ %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 -1)
+ %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 -1)
+ %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 -1)
+ %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 -1)
+ %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 -1)
+ %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 -1)
+ %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 -1)
+ %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 -1)
+ %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 -1)
+ %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 -1)
+ %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 -1)
+ %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 -1)
+ %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 -1)
+ store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ ret void
+}
+
+define void @cttz_undef_32i8() #0 {
+; CHECK-LABEL: @cttz_undef_32i8(
+; CHECK-NEXT: [[LD0:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+; CHECK-NEXT: [[LD1:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+; CHECK-NEXT: [[LD2:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+; CHECK-NEXT: [[LD3:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+; CHECK-NEXT: [[LD4:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+; CHECK-NEXT: [[LD5:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+; CHECK-NEXT: [[LD6:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+; CHECK-NEXT: [[LD7:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+; CHECK-NEXT: [[LD8:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+; CHECK-NEXT: [[LD9:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+; CHECK-NEXT: [[LD10:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+; CHECK-NEXT: [[LD11:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+; CHECK-NEXT: [[LD12:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+; CHECK-NEXT: [[LD13:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+; CHECK-NEXT: [[LD14:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+; CHECK-NEXT: [[LD15:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+; CHECK-NEXT: [[LD16:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+; CHECK-NEXT: [[LD17:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+; CHECK-NEXT: [[LD18:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+; CHECK-NEXT: [[LD19:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+; CHECK-NEXT: [[LD20:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+; CHECK-NEXT: [[LD21:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+; CHECK-NEXT: [[LD22:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+; CHECK-NEXT: [[LD23:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+; CHECK-NEXT: [[LD24:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+; CHECK-NEXT: [[LD25:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+; CHECK-NEXT: [[LD26:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+; CHECK-NEXT: [[LD27:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+; CHECK-NEXT: [[LD28:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+; CHECK-NEXT: [[LD29:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+; CHECK-NEXT: [[LD30:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+; CHECK-NEXT: [[LD31:%.*]] = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+; CHECK-NEXT: [[CTTZ0:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD0]], i1 true)
+; CHECK-NEXT: [[CTTZ1:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD1]], i1 true)
+; CHECK-NEXT: [[CTTZ2:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD2]], i1 true)
+; CHECK-NEXT: [[CTTZ3:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD3]], i1 true)
+; CHECK-NEXT: [[CTTZ4:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD4]], i1 true)
+; CHECK-NEXT: [[CTTZ5:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD5]], i1 true)
+; CHECK-NEXT: [[CTTZ6:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD6]], i1 true)
+; CHECK-NEXT: [[CTTZ7:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD7]], i1 true)
+; CHECK-NEXT: [[CTTZ8:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD8]], i1 true)
+; CHECK-NEXT: [[CTTZ9:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD9]], i1 true)
+; CHECK-NEXT: [[CTTZ10:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD10]], i1 true)
+; CHECK-NEXT: [[CTTZ11:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD11]], i1 true)
+; CHECK-NEXT: [[CTTZ12:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD12]], i1 true)
+; CHECK-NEXT: [[CTTZ13:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD13]], i1 true)
+; CHECK-NEXT: [[CTTZ14:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD14]], i1 true)
+; CHECK-NEXT: [[CTTZ15:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD15]], i1 true)
+; CHECK-NEXT: [[CTTZ16:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD16]], i1 true)
+; CHECK-NEXT: [[CTTZ17:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD17]], i1 true)
+; CHECK-NEXT: [[CTTZ18:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD18]], i1 true)
+; CHECK-NEXT: [[CTTZ19:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD19]], i1 true)
+; CHECK-NEXT: [[CTTZ20:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD20]], i1 true)
+; CHECK-NEXT: [[CTTZ21:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD21]], i1 true)
+; CHECK-NEXT: [[CTTZ22:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD22]], i1 true)
+; CHECK-NEXT: [[CTTZ23:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD23]], i1 true)
+; CHECK-NEXT: [[CTTZ24:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD24]], i1 true)
+; CHECK-NEXT: [[CTTZ25:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD25]], i1 true)
+; CHECK-NEXT: [[CTTZ26:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD26]], i1 true)
+; CHECK-NEXT: [[CTTZ27:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD27]], i1 true)
+; CHECK-NEXT: [[CTTZ28:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD28]], i1 true)
+; CHECK-NEXT: [[CTTZ29:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD29]], i1 true)
+; CHECK-NEXT: [[CTTZ30:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD30]], i1 true)
+; CHECK-NEXT: [[CTTZ31:%.*]] = call i8 @llvm.cttz.i8(i8 [[LD31]], i1 true)
+; CHECK-NEXT: store i8 [[CTTZ0]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+; CHECK-NEXT: store i8 [[CTTZ1]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+; CHECK-NEXT: store i8 [[CTTZ2]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+; CHECK-NEXT: store i8 [[CTTZ3]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+; CHECK-NEXT: store i8 [[CTTZ4]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+; CHECK-NEXT: store i8 [[CTTZ5]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+; CHECK-NEXT: store i8 [[CTTZ6]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+; CHECK-NEXT: store i8 [[CTTZ7]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+; CHECK-NEXT: store i8 [[CTTZ8]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+; CHECK-NEXT: store i8 [[CTTZ9]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+; CHECK-NEXT: store i8 [[CTTZ10]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+; CHECK-NEXT: store i8 [[CTTZ11]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+; CHECK-NEXT: store i8 [[CTTZ12]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+; CHECK-NEXT: store i8 [[CTTZ13]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+; CHECK-NEXT: store i8 [[CTTZ14]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+; CHECK-NEXT: store i8 [[CTTZ15]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+; CHECK-NEXT: store i8 [[CTTZ16]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+; CHECK-NEXT: store i8 [[CTTZ17]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+; CHECK-NEXT: store i8 [[CTTZ18]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+; CHECK-NEXT: store i8 [[CTTZ19]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+; CHECK-NEXT: store i8 [[CTTZ20]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+; CHECK-NEXT: store i8 [[CTTZ21]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+; CHECK-NEXT: store i8 [[CTTZ22]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+; CHECK-NEXT: store i8 [[CTTZ23]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+; CHECK-NEXT: store i8 [[CTTZ24]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+; CHECK-NEXT: store i8 [[CTTZ25]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+; CHECK-NEXT: store i8 [[CTTZ26]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+; CHECK-NEXT: store i8 [[CTTZ27]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+; CHECK-NEXT: store i8 [[CTTZ28]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+; CHECK-NEXT: store i8 [[CTTZ29]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+; CHECK-NEXT: store i8 [[CTTZ30]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+; CHECK-NEXT: store i8 [[CTTZ31]], i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+; CHECK-NEXT: ret void
+;
+ %ld0 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 0), align 1
+ %ld1 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 1), align 1
+ %ld2 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 2), align 1
+ %ld3 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 3), align 1
+ %ld4 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 4), align 1
+ %ld5 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 5), align 1
+ %ld6 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 6), align 1
+ %ld7 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 7), align 1
+ %ld8 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 8), align 1
+ %ld9 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 9), align 1
+ %ld10 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 10), align 1
+ %ld11 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 11), align 1
+ %ld12 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 12), align 1
+ %ld13 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 13), align 1
+ %ld14 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 14), align 1
+ %ld15 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 15), align 1
+ %ld16 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 16), align 1
+ %ld17 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 17), align 1
+ %ld18 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 18), align 1
+ %ld19 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 19), align 1
+ %ld20 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 20), align 1
+ %ld21 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 21), align 1
+ %ld22 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 22), align 1
+ %ld23 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 23), align 1
+ %ld24 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 24), align 1
+ %ld25 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 25), align 1
+ %ld26 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 26), align 1
+ %ld27 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 27), align 1
+ %ld28 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 28), align 1
+ %ld29 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 29), align 1
+ %ld30 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 30), align 1
+ %ld31 = load i8, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @src8, i8 0, i64 31), align 1
+ %cttz0 = call i8 @llvm.cttz.i8(i8 %ld0, i1 -1)
+ %cttz1 = call i8 @llvm.cttz.i8(i8 %ld1, i1 -1)
+ %cttz2 = call i8 @llvm.cttz.i8(i8 %ld2, i1 -1)
+ %cttz3 = call i8 @llvm.cttz.i8(i8 %ld3, i1 -1)
+ %cttz4 = call i8 @llvm.cttz.i8(i8 %ld4, i1 -1)
+ %cttz5 = call i8 @llvm.cttz.i8(i8 %ld5, i1 -1)
+ %cttz6 = call i8 @llvm.cttz.i8(i8 %ld6, i1 -1)
+ %cttz7 = call i8 @llvm.cttz.i8(i8 %ld7, i1 -1)
+ %cttz8 = call i8 @llvm.cttz.i8(i8 %ld8, i1 -1)
+ %cttz9 = call i8 @llvm.cttz.i8(i8 %ld9, i1 -1)
+ %cttz10 = call i8 @llvm.cttz.i8(i8 %ld10, i1 -1)
+ %cttz11 = call i8 @llvm.cttz.i8(i8 %ld11, i1 -1)
+ %cttz12 = call i8 @llvm.cttz.i8(i8 %ld12, i1 -1)
+ %cttz13 = call i8 @llvm.cttz.i8(i8 %ld13, i1 -1)
+ %cttz14 = call i8 @llvm.cttz.i8(i8 %ld14, i1 -1)
+ %cttz15 = call i8 @llvm.cttz.i8(i8 %ld15, i1 -1)
+ %cttz16 = call i8 @llvm.cttz.i8(i8 %ld16, i1 -1)
+ %cttz17 = call i8 @llvm.cttz.i8(i8 %ld17, i1 -1)
+ %cttz18 = call i8 @llvm.cttz.i8(i8 %ld18, i1 -1)
+ %cttz19 = call i8 @llvm.cttz.i8(i8 %ld19, i1 -1)
+ %cttz20 = call i8 @llvm.cttz.i8(i8 %ld20, i1 -1)
+ %cttz21 = call i8 @llvm.cttz.i8(i8 %ld21, i1 -1)
+ %cttz22 = call i8 @llvm.cttz.i8(i8 %ld22, i1 -1)
+ %cttz23 = call i8 @llvm.cttz.i8(i8 %ld23, i1 -1)
+ %cttz24 = call i8 @llvm.cttz.i8(i8 %ld24, i1 -1)
+ %cttz25 = call i8 @llvm.cttz.i8(i8 %ld25, i1 -1)
+ %cttz26 = call i8 @llvm.cttz.i8(i8 %ld26, i1 -1)
+ %cttz27 = call i8 @llvm.cttz.i8(i8 %ld27, i1 -1)
+ %cttz28 = call i8 @llvm.cttz.i8(i8 %ld28, i1 -1)
+ %cttz29 = call i8 @llvm.cttz.i8(i8 %ld29, i1 -1)
+ %cttz30 = call i8 @llvm.cttz.i8(i8 %ld30, i1 -1)
+ %cttz31 = call i8 @llvm.cttz.i8(i8 %ld31, i1 -1)
+ store i8 %cttz0 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 0), align 1
+ store i8 %cttz1 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 1), align 1
+ store i8 %cttz2 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 2), align 1
+ store i8 %cttz3 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 3), align 1
+ store i8 %cttz4 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 4), align 1
+ store i8 %cttz5 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 5), align 1
+ store i8 %cttz6 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 6), align 1
+ store i8 %cttz7 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 7), align 1
+ store i8 %cttz8 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 8), align 1
+ store i8 %cttz9 , i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 9), align 1
+ store i8 %cttz10, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 10), align 1
+ store i8 %cttz11, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 11), align 1
+ store i8 %cttz12, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 12), align 1
+ store i8 %cttz13, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 13), align 1
+ store i8 %cttz14, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 14), align 1
+ store i8 %cttz15, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 15), align 1
+ store i8 %cttz16, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 16), align 1
+ store i8 %cttz17, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 17), align 1
+ store i8 %cttz18, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 18), align 1
+ store i8 %cttz19, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 19), align 1
+ store i8 %cttz20, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 20), align 1
+ store i8 %cttz21, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 21), align 1
+ store i8 %cttz22, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 22), align 1
+ store i8 %cttz23, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 23), align 1
+ store i8 %cttz24, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 24), align 1
+ store i8 %cttz25, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 25), align 1
+ store i8 %cttz26, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 26), align 1
+ store i8 %cttz27, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 27), align 1
+ store i8 %cttz28, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 28), align 1
+ store i8 %cttz29, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 29), align 1
+ store i8 %cttz30, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 30), align 1
+ store i8 %cttz31, i8* getelementptr inbounds ([32 x i8], [32 x i8]* @dst8, i8 0, i64 31), align 1
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/X86/debug_info.ll b/test/Transforms/SLPVectorizer/X86/debug_info.ll
index 4472225811b1..5f2cdd295c20 100644
--- a/test/Transforms/SLPVectorizer/X86/debug_info.ll
+++ b/test/Transforms/SLPVectorizer/X86/debug_info.ll
@@ -25,8 +25,8 @@ define i32 @depth(double* nocapture %A, i32 %m) #0 !dbg !4 {
entry:
tail call void @llvm.dbg.value(metadata double* %A, i64 0, metadata !12, metadata !DIExpression()), !dbg !19
tail call void @llvm.dbg.value(metadata i32 %m, i64 0, metadata !13, metadata !DIExpression()), !dbg !19
- tail call void @llvm.dbg.value(metadata i32 00, i64 0, metadata !14, metadata !DIExpression()), !dbg !21
- tail call void @llvm.dbg.value(metadata i32 02, i64 0, metadata !15, metadata !DIExpression()), !dbg !21
+ tail call void @llvm.dbg.value(metadata double 0.0, i64 0, metadata !14, metadata !DIExpression()), !dbg !21
+ tail call void @llvm.dbg.value(metadata double 0.2, i64 0, metadata !15, metadata !DIExpression()), !dbg !21
tail call void @llvm.dbg.value(metadata i32 0, i64 0, metadata !16, metadata !DIExpression()), !dbg !23
%cmp8 = icmp sgt i32 %m, 0, !dbg !23
br i1 %cmp8, label %for.body.lr.ph, label %for.end, !dbg !23
@@ -57,11 +57,10 @@ attributes #1 = { nounwind readnone }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!18, !32}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 187335) (llvm/trunk 187335:187340M)", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "file.c", directory: "/Users/nadav")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !11)
+!4 = distinct !DISubprogram(name: "depth", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !11)
!5 = !DIFile(filename: "file.c", directory: "/Users/nadav")
!6 = !DISubroutineType(types: !7)
!7 = !{!8, !9, !8}
diff --git a/test/Transforms/SLPVectorizer/X86/fma.ll b/test/Transforms/SLPVectorizer/X86/fma.ll
new file mode 100644
index 000000000000..b30c34061611
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/fma.ll
@@ -0,0 +1,562 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=NO-FMA
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=NO-FMA
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=FMA --check-prefix=FMA256
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=FMA --check-prefix=FMA256
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=FMA --check-prefix=FMA512
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@srcA64 = common global [8 x double] zeroinitializer, align 64
+@srcB64 = common global [8 x double] zeroinitializer, align 64
+@srcC64 = common global [8 x double] zeroinitializer, align 64
+@srcA32 = common global [16 x float] zeroinitializer, align 64
+@srcB32 = common global [16 x float] zeroinitializer, align 64
+@srcC32 = common global [16 x float] zeroinitializer, align 64
+@dst64 = common global [8 x double] zeroinitializer, align 64
+@dst32 = common global [16 x float] zeroinitializer, align 64
+
+declare float @llvm.fma.f32(float, float, float)
+declare double @llvm.fma.f64(double, double, double)
+
+;
+; FMA
+;
+
+define void @fma_2f64() #0 {
+; NO-FMA-LABEL: @fma_2f64(
+; NO-FMA-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: [[B0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: [[B1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: [[C0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: [[C1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: [[FMA0:%.*]] = call double @llvm.fma.f64(double [[A0]], double [[B0]], double [[C0]])
+; NO-FMA-NEXT: [[FMA1:%.*]] = call double @llvm.fma.f64(double [[A1]], double [[B1]], double [[C1]])
+; NO-FMA-NEXT: store double [[FMA0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: store double [[FMA1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: ret void
+;
+; FMA-LABEL: @fma_2f64(
+; FMA-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcA64 to <2 x double>*), align 8
+; FMA-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcB64 to <2 x double>*), align 8
+; FMA-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @srcC64 to <2 x double>*), align 8
+; FMA-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.fma.v2f64(<2 x double> [[TMP1]], <2 x double> [[TMP2]], <2 x double> [[TMP3]])
+; FMA-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; FMA-NEXT: ret void
+;
+ %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
+ %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
+ %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
+ %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
+ %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
+ %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
+ %fma0 = call double @llvm.fma.f64(double %a0, double %b0, double %c0)
+ %fma1 = call double @llvm.fma.f64(double %a1, double %b1, double %c1)
+ store double %fma0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %fma1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @fma_4f64() #0 {
+; NO-FMA-LABEL: @fma_4f64(
+; NO-FMA-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
+; NO-FMA-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
+; NO-FMA-NEXT: [[B0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: [[B1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: [[B2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
+; NO-FMA-NEXT: [[B3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
+; NO-FMA-NEXT: [[C0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: [[C1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: [[C2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 8
+; NO-FMA-NEXT: [[C3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 8
+; NO-FMA-NEXT: [[FMA0:%.*]] = call double @llvm.fma.f64(double [[A0]], double [[B0]], double [[C0]])
+; NO-FMA-NEXT: [[FMA1:%.*]] = call double @llvm.fma.f64(double [[A1]], double [[B1]], double [[C1]])
+; NO-FMA-NEXT: [[FMA2:%.*]] = call double @llvm.fma.f64(double [[A2]], double [[B2]], double [[C2]])
+; NO-FMA-NEXT: [[FMA3:%.*]] = call double @llvm.fma.f64(double [[A3]], double [[B3]], double [[C3]])
+; NO-FMA-NEXT: store double [[FMA0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; NO-FMA-NEXT: store double [[FMA1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; NO-FMA-NEXT: store double [[FMA2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; NO-FMA-NEXT: store double [[FMA3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; NO-FMA-NEXT: ret void
+;
+; FMA-LABEL: @fma_4f64(
+; FMA-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 8
+; FMA-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 8
+; FMA-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcC64 to <4 x double>*), align 8
+; FMA-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP2]], <4 x double> [[TMP3]])
+; FMA-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; FMA-NEXT: ret void
+;
+ %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 8
+ %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 8
+ %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 8
+ %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 8
+ %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 8
+ %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 8
+ %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 8
+ %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 8
+ %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 8
+ %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 8
+ %c2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 8
+ %c3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 8
+ %fma0 = call double @llvm.fma.f64(double %a0, double %b0, double %c0)
+ %fma1 = call double @llvm.fma.f64(double %a1, double %b1, double %c1)
+ %fma2 = call double @llvm.fma.f64(double %a2, double %b2, double %c2)
+ %fma3 = call double @llvm.fma.f64(double %a3, double %b3, double %c3)
+ store double %fma0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %fma1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %fma2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %fma3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ ret void
+}
+
+define void @fma_8f64() #0 {
+; NO-FMA-LABEL: @fma_8f64(
+; NO-FMA-NEXT: [[A0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[A1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[A2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[A3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[A4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[A5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[A6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[A7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[B0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[B1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[B2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[B3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[B4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[B5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[B6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[B7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[C0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[C1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[C2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[C3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[C4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[C5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[C6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[C7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[FMA0:%.*]] = call double @llvm.fma.f64(double [[A0]], double [[B0]], double [[C0]])
+; NO-FMA-NEXT: [[FMA1:%.*]] = call double @llvm.fma.f64(double [[A1]], double [[B1]], double [[C1]])
+; NO-FMA-NEXT: [[FMA2:%.*]] = call double @llvm.fma.f64(double [[A2]], double [[B2]], double [[C2]])
+; NO-FMA-NEXT: [[FMA3:%.*]] = call double @llvm.fma.f64(double [[A3]], double [[B3]], double [[C3]])
+; NO-FMA-NEXT: [[FMA4:%.*]] = call double @llvm.fma.f64(double [[A4]], double [[B4]], double [[C4]])
+; NO-FMA-NEXT: [[FMA5:%.*]] = call double @llvm.fma.f64(double [[A5]], double [[B5]], double [[C5]])
+; NO-FMA-NEXT: [[FMA6:%.*]] = call double @llvm.fma.f64(double [[A6]], double [[B6]], double [[C6]])
+; NO-FMA-NEXT: [[FMA7:%.*]] = call double @llvm.fma.f64(double [[A7]], double [[B7]], double [[C7]])
+; NO-FMA-NEXT: store double [[FMA0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
+; NO-FMA-NEXT: store double [[FMA1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
+; NO-FMA-NEXT: store double [[FMA2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
+; NO-FMA-NEXT: store double [[FMA3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
+; NO-FMA-NEXT: store double [[FMA4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
+; NO-FMA-NEXT: store double [[FMA5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
+; NO-FMA-NEXT: store double [[FMA6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
+; NO-FMA-NEXT: store double [[FMA7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+; NO-FMA-NEXT: ret void
+;
+; FMA256-LABEL: @fma_8f64(
+; FMA256-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcA64 to <4 x double>*), align 4
+; FMA256-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4) to <4 x double>*), align 4
+; FMA256-NEXT: [[TMP3:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcB64 to <4 x double>*), align 4
+; FMA256-NEXT: [[TMP4:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4) to <4 x double>*), align 4
+; FMA256-NEXT: [[TMP5:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @srcC64 to <4 x double>*), align 4
+; FMA256-NEXT: [[TMP6:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4) to <4 x double>*), align 4
+; FMA256-NEXT: [[TMP7:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> [[TMP1]], <4 x double> [[TMP3]], <4 x double> [[TMP5]])
+; FMA256-NEXT: [[TMP8:%.*]] = call <4 x double> @llvm.fma.v4f64(<4 x double> [[TMP2]], <4 x double> [[TMP4]], <4 x double> [[TMP6]])
+; FMA256-NEXT: store <4 x double> [[TMP7]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
+; FMA256-NEXT: store <4 x double> [[TMP8]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; FMA256-NEXT: ret void
+;
+; FMA512-LABEL: @fma_8f64(
+; FMA512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcA64 to <8 x double>*), align 4
+; FMA512-NEXT: [[TMP2:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcB64 to <8 x double>*), align 4
+; FMA512-NEXT: [[TMP3:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @srcC64 to <8 x double>*), align 4
+; FMA512-NEXT: [[TMP4:%.*]] = call <8 x double> @llvm.fma.v8f64(<8 x double> [[TMP1]], <8 x double> [[TMP2]], <8 x double> [[TMP3]])
+; FMA512-NEXT: store <8 x double> [[TMP4]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; FMA512-NEXT: ret void
+;
+ %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 0), align 4
+ %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 1), align 4
+ %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 2), align 4
+ %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 3), align 4
+ %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 4), align 4
+ %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 5), align 4
+ %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 6), align 4
+ %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcA64, i32 0, i64 7), align 4
+ %b0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 0), align 4
+ %b1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 1), align 4
+ %b2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 2), align 4
+ %b3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 3), align 4
+ %b4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 4), align 4
+ %b5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 5), align 4
+ %b6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 6), align 4
+ %b7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcB64, i32 0, i64 7), align 4
+ %c0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 0), align 4
+ %c1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 1), align 4
+ %c2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 2), align 4
+ %c3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 3), align 4
+ %c4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 4), align 4
+ %c5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 5), align 4
+ %c6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 6), align 4
+ %c7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @srcC64, i32 0, i64 7), align 4
+ %fma0 = call double @llvm.fma.f64(double %a0, double %b0, double %c0)
+ %fma1 = call double @llvm.fma.f64(double %a1, double %b1, double %c1)
+ %fma2 = call double @llvm.fma.f64(double %a2, double %b2, double %c2)
+ %fma3 = call double @llvm.fma.f64(double %a3, double %b3, double %c3)
+ %fma4 = call double @llvm.fma.f64(double %a4, double %b4, double %c4)
+ %fma5 = call double @llvm.fma.f64(double %a5, double %b5, double %c5)
+ %fma6 = call double @llvm.fma.f64(double %a6, double %b6, double %c6)
+ %fma7 = call double @llvm.fma.f64(double %a7, double %b7, double %c7)
+ store double %fma0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
+ store double %fma1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
+ store double %fma2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
+ store double %fma3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
+ store double %fma4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
+ store double %fma5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
+ store double %fma6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
+ store double %fma7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @fma_4f32() #0 {
+; NO-FMA-LABEL: @fma_4f32(
+; NO-FMA-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[B0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[B1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[B2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[B3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[C0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[C1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[C2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[C3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[FMA0:%.*]] = call float @llvm.fma.f32(float [[A0]], float [[B0]], float [[C0]])
+; NO-FMA-NEXT: [[FMA1:%.*]] = call float @llvm.fma.f32(float [[A1]], float [[B1]], float [[C1]])
+; NO-FMA-NEXT: [[FMA2:%.*]] = call float @llvm.fma.f32(float [[A2]], float [[B2]], float [[C2]])
+; NO-FMA-NEXT: [[FMA3:%.*]] = call float @llvm.fma.f32(float [[A3]], float [[B3]], float [[C3]])
+; NO-FMA-NEXT: store float [[FMA0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: store float [[FMA1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: store float [[FMA2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: store float [[FMA3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: ret void
+;
+; FMA-LABEL: @fma_4f32(
+; FMA-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcA32 to <4 x float>*), align 4
+; FMA-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcB32 to <4 x float>*), align 4
+; FMA-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @srcC32 to <4 x float>*), align 4
+; FMA-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.fma.v4f32(<4 x float> [[TMP1]], <4 x float> [[TMP2]], <4 x float> [[TMP3]])
+; FMA-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; FMA-NEXT: ret void
+;
+ %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
+ %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
+ %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
+ %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
+ %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
+ %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
+ %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
+ %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+ %c0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
+ %c1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
+ %c2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
+ %c3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+ %fma0 = call float @llvm.fma.f32(float %a0, float %b0, float %c0)
+ %fma1 = call float @llvm.fma.f32(float %a1, float %b1, float %c1)
+ %fma2 = call float @llvm.fma.f32(float %a2, float %b2, float %c2)
+ %fma3 = call float @llvm.fma.f32(float %a3, float %b3, float %c3)
+ store float %fma0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %fma1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %fma2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %fma3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @fma_8f32() #0 {
+; NO-FMA-LABEL: @fma_8f32(
+; NO-FMA-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[B0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[B1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[B2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[B3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[B4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[B5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[B6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[B7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[C0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[C1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[C2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[C3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[C4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[C5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[C6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[C7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[FMA0:%.*]] = call float @llvm.fma.f32(float [[A0]], float [[B0]], float [[C0]])
+; NO-FMA-NEXT: [[FMA1:%.*]] = call float @llvm.fma.f32(float [[A1]], float [[B1]], float [[C1]])
+; NO-FMA-NEXT: [[FMA2:%.*]] = call float @llvm.fma.f32(float [[A2]], float [[B2]], float [[C2]])
+; NO-FMA-NEXT: [[FMA3:%.*]] = call float @llvm.fma.f32(float [[A3]], float [[B3]], float [[C3]])
+; NO-FMA-NEXT: [[FMA4:%.*]] = call float @llvm.fma.f32(float [[A4]], float [[B4]], float [[C4]])
+; NO-FMA-NEXT: [[FMA5:%.*]] = call float @llvm.fma.f32(float [[A5]], float [[B5]], float [[C5]])
+; NO-FMA-NEXT: [[FMA6:%.*]] = call float @llvm.fma.f32(float [[A6]], float [[B6]], float [[C6]])
+; NO-FMA-NEXT: [[FMA7:%.*]] = call float @llvm.fma.f32(float [[A7]], float [[B7]], float [[C7]])
+; NO-FMA-NEXT: store float [[FMA0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: store float [[FMA1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: store float [[FMA2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: store float [[FMA3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: store float [[FMA4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: store float [[FMA5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: store float [[FMA6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: store float [[FMA7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: ret void
+;
+; FMA-LABEL: @fma_8f32(
+; FMA-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
+; FMA-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; FMA-NEXT: [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcC32 to <8 x float>*), align 4
+; FMA-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP2]], <8 x float> [[TMP3]])
+; FMA-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; FMA-NEXT: ret void
+;
+ %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
+ %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
+ %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
+ %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
+ %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
+ %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
+ %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
+ %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
+ %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
+ %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
+ %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
+ %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+ %b4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
+ %b5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
+ %b6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
+ %b7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
+ %c0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
+ %c1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
+ %c2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
+ %c3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+ %c4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
+ %c5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
+ %c6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
+ %c7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
+ %fma0 = call float @llvm.fma.f32(float %a0, float %b0, float %c0)
+ %fma1 = call float @llvm.fma.f32(float %a1, float %b1, float %c1)
+ %fma2 = call float @llvm.fma.f32(float %a2, float %b2, float %c2)
+ %fma3 = call float @llvm.fma.f32(float %a3, float %b3, float %c3)
+ %fma4 = call float @llvm.fma.f32(float %a4, float %b4, float %c4)
+ %fma5 = call float @llvm.fma.f32(float %a5, float %b5, float %c5)
+ %fma6 = call float @llvm.fma.f32(float %a6, float %b6, float %c6)
+ %fma7 = call float @llvm.fma.f32(float %a7, float %b7, float %c7)
+ store float %fma0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %fma1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %fma2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %fma3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %fma4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %fma5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %fma6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %fma7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @fma_16f32() #0 {
+; NO-FMA-LABEL: @fma_16f32(
+; NO-FMA-NEXT: [[A0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[A1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[A2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[A3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[A4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[A5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[A6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[A7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[A8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8), align 4
+; NO-FMA-NEXT: [[A9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 9), align 4
+; NO-FMA-NEXT: [[A10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
+; NO-FMA-NEXT: [[A11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
+; NO-FMA-NEXT: [[A12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
+; NO-FMA-NEXT: [[A13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
+; NO-FMA-NEXT: [[A14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
+; NO-FMA-NEXT: [[A15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
+; NO-FMA-NEXT: [[B0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[B1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[B2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[B3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[B4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[B5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[B6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[B7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[B8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8), align 4
+; NO-FMA-NEXT: [[B9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 9), align 4
+; NO-FMA-NEXT: [[B10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
+; NO-FMA-NEXT: [[B11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
+; NO-FMA-NEXT: [[B12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
+; NO-FMA-NEXT: [[B13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
+; NO-FMA-NEXT: [[B14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
+; NO-FMA-NEXT: [[B15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
+; NO-FMA-NEXT: [[C0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: [[C1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: [[C2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: [[C3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: [[C4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: [[C5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: [[C6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: [[C7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: [[C8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 8), align 4
+; NO-FMA-NEXT: [[C9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 9), align 4
+; NO-FMA-NEXT: [[C10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 10), align 4
+; NO-FMA-NEXT: [[C11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 11), align 4
+; NO-FMA-NEXT: [[C12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 12), align 4
+; NO-FMA-NEXT: [[C13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 13), align 4
+; NO-FMA-NEXT: [[C14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 14), align 4
+; NO-FMA-NEXT: [[C15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 15), align 4
+; NO-FMA-NEXT: [[FMA0:%.*]] = call float @llvm.fma.f32(float [[A0]], float [[B0]], float [[C0]])
+; NO-FMA-NEXT: [[FMA1:%.*]] = call float @llvm.fma.f32(float [[A1]], float [[B1]], float [[C1]])
+; NO-FMA-NEXT: [[FMA2:%.*]] = call float @llvm.fma.f32(float [[A2]], float [[B2]], float [[C2]])
+; NO-FMA-NEXT: [[FMA3:%.*]] = call float @llvm.fma.f32(float [[A3]], float [[B3]], float [[C3]])
+; NO-FMA-NEXT: [[FMA4:%.*]] = call float @llvm.fma.f32(float [[A4]], float [[B4]], float [[C4]])
+; NO-FMA-NEXT: [[FMA5:%.*]] = call float @llvm.fma.f32(float [[A5]], float [[B5]], float [[C5]])
+; NO-FMA-NEXT: [[FMA6:%.*]] = call float @llvm.fma.f32(float [[A6]], float [[B6]], float [[C6]])
+; NO-FMA-NEXT: [[FMA7:%.*]] = call float @llvm.fma.f32(float [[A7]], float [[B7]], float [[C7]])
+; NO-FMA-NEXT: [[FMA8:%.*]] = call float @llvm.fma.f32(float [[A8]], float [[B8]], float [[C8]])
+; NO-FMA-NEXT: [[FMA9:%.*]] = call float @llvm.fma.f32(float [[A9]], float [[B9]], float [[C9]])
+; NO-FMA-NEXT: [[FMA10:%.*]] = call float @llvm.fma.f32(float [[A10]], float [[B10]], float [[C10]])
+; NO-FMA-NEXT: [[FMA11:%.*]] = call float @llvm.fma.f32(float [[A11]], float [[B11]], float [[C11]])
+; NO-FMA-NEXT: [[FMA12:%.*]] = call float @llvm.fma.f32(float [[A12]], float [[B12]], float [[C12]])
+; NO-FMA-NEXT: [[FMA13:%.*]] = call float @llvm.fma.f32(float [[A13]], float [[B13]], float [[C13]])
+; NO-FMA-NEXT: [[FMA14:%.*]] = call float @llvm.fma.f32(float [[A14]], float [[B14]], float [[C14]])
+; NO-FMA-NEXT: [[FMA15:%.*]] = call float @llvm.fma.f32(float [[A15]], float [[B15]], float [[C15]])
+; NO-FMA-NEXT: store float [[FMA0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; NO-FMA-NEXT: store float [[FMA1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; NO-FMA-NEXT: store float [[FMA2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; NO-FMA-NEXT: store float [[FMA3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; NO-FMA-NEXT: store float [[FMA4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; NO-FMA-NEXT: store float [[FMA5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; NO-FMA-NEXT: store float [[FMA6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; NO-FMA-NEXT: store float [[FMA7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; NO-FMA-NEXT: store float [[FMA8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+; NO-FMA-NEXT: store float [[FMA9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+; NO-FMA-NEXT: store float [[FMA10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+; NO-FMA-NEXT: store float [[FMA11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+; NO-FMA-NEXT: store float [[FMA12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+; NO-FMA-NEXT: store float [[FMA13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+; NO-FMA-NEXT: store float [[FMA14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+; NO-FMA-NEXT: store float [[FMA15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; NO-FMA-NEXT: ret void
+;
+; FMA256-LABEL: @fma_16f32(
+; FMA256-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcA32 to <8 x float>*), align 4
+; FMA256-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8) to <8 x float>*), align 4
+; FMA256-NEXT: [[TMP3:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcB32 to <8 x float>*), align 4
+; FMA256-NEXT: [[TMP4:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8) to <8 x float>*), align 4
+; FMA256-NEXT: [[TMP5:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @srcC32 to <8 x float>*), align 4
+; FMA256-NEXT: [[TMP6:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 8) to <8 x float>*), align 4
+; FMA256-NEXT: [[TMP7:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP1]], <8 x float> [[TMP3]], <8 x float> [[TMP5]])
+; FMA256-NEXT: [[TMP8:%.*]] = call <8 x float> @llvm.fma.v8f32(<8 x float> [[TMP2]], <8 x float> [[TMP4]], <8 x float> [[TMP6]])
+; FMA256-NEXT: store <8 x float> [[TMP7]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; FMA256-NEXT: store <8 x float> [[TMP8]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; FMA256-NEXT: ret void
+;
+; FMA512-LABEL: @fma_16f32(
+; FMA512-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcA32 to <16 x float>*), align 4
+; FMA512-NEXT: [[TMP2:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcB32 to <16 x float>*), align 4
+; FMA512-NEXT: [[TMP3:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @srcC32 to <16 x float>*), align 4
+; FMA512-NEXT: [[TMP4:%.*]] = call <16 x float> @llvm.fma.v16f32(<16 x float> [[TMP1]], <16 x float> [[TMP2]], <16 x float> [[TMP3]])
+; FMA512-NEXT: store <16 x float> [[TMP4]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; FMA512-NEXT: ret void
+;
+ %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 0), align 4
+ %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 1), align 4
+ %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 2), align 4
+ %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 3), align 4
+ %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 4), align 4
+ %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 5), align 4
+ %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 6), align 4
+ %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 7), align 4
+ %a8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 8), align 4
+ %a9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 9), align 4
+ %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 10), align 4
+ %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 11), align 4
+ %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 12), align 4
+ %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 13), align 4
+ %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 14), align 4
+ %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcA32, i32 0, i64 15), align 4
+ %b0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 0), align 4
+ %b1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 1), align 4
+ %b2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 2), align 4
+ %b3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 3), align 4
+ %b4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 4), align 4
+ %b5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 5), align 4
+ %b6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 6), align 4
+ %b7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 7), align 4
+ %b8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 8), align 4
+ %b9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 9), align 4
+ %b10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 10), align 4
+ %b11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 11), align 4
+ %b12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 12), align 4
+ %b13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 13), align 4
+ %b14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 14), align 4
+ %b15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcB32, i32 0, i64 15), align 4
+ %c0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 0), align 4
+ %c1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 1), align 4
+ %c2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 2), align 4
+ %c3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 3), align 4
+ %c4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 4), align 4
+ %c5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 5), align 4
+ %c6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 6), align 4
+ %c7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 7), align 4
+ %c8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 8), align 4
+ %c9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 9), align 4
+ %c10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 10), align 4
+ %c11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 11), align 4
+ %c12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 12), align 4
+ %c13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 13), align 4
+ %c14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 14), align 4
+ %c15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @srcC32, i32 0, i64 15), align 4
+ %fma0 = call float @llvm.fma.f32(float %a0 , float %b0 , float %c0 )
+ %fma1 = call float @llvm.fma.f32(float %a1 , float %b1 , float %c1 )
+ %fma2 = call float @llvm.fma.f32(float %a2 , float %b2 , float %c2 )
+ %fma3 = call float @llvm.fma.f32(float %a3 , float %b3 , float %c3 )
+ %fma4 = call float @llvm.fma.f32(float %a4 , float %b4 , float %c4 )
+ %fma5 = call float @llvm.fma.f32(float %a5 , float %b5 , float %c5 )
+ %fma6 = call float @llvm.fma.f32(float %a6 , float %b6 , float %c6 )
+ %fma7 = call float @llvm.fma.f32(float %a7 , float %b7 , float %c7 )
+ %fma8 = call float @llvm.fma.f32(float %a8 , float %b8 , float %c8 )
+ %fma9 = call float @llvm.fma.f32(float %a9 , float %b9 , float %c9 )
+ %fma10 = call float @llvm.fma.f32(float %a10, float %b10, float %c10)
+ %fma11 = call float @llvm.fma.f32(float %a11, float %b11, float %c11)
+ %fma12 = call float @llvm.fma.f32(float %a12, float %b12, float %c12)
+ %fma13 = call float @llvm.fma.f32(float %a13, float %b13, float %c13)
+ %fma14 = call float @llvm.fma.f32(float %a14, float %b14, float %c14)
+ %fma15 = call float @llvm.fma.f32(float %a15, float %b15, float %c15)
+ store float %fma0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %fma1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %fma2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %fma3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %fma4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %fma5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %fma6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %fma7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ store float %fma8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+ store float %fma9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+ store float %fma10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+ store float %fma11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+ store float %fma12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+ store float %fma13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+ store float %fma14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+ store float %fma15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/X86/fround.ll b/test/Transforms/SLPVectorizer/X86/fround.ll
new file mode 100644
index 000000000000..c4e6e682b99e
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/fround.ll
@@ -0,0 +1,2158 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE2
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE --check-prefix=SSE41
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX1
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX2
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@src64 = common global [8 x double] zeroinitializer, align 64
+@dst64 = common global [8 x double] zeroinitializer, align 64
+@src32 = common global [16 x float] zeroinitializer, align 64
+@dst32 = common global [16 x float] zeroinitializer, align 64
+
+declare double @llvm.ceil.f64(double %p)
+declare double @llvm.floor.f64(double %p)
+declare double @llvm.nearbyint.f64(double %p)
+declare double @llvm.rint.f64(double %p)
+declare double @llvm.trunc.f64(double %p)
+
+declare float @llvm.ceil.f32(float %p)
+declare float @llvm.floor.f32(float %p)
+declare float @llvm.nearbyint.f32(float %p)
+declare float @llvm.rint.f32(float %p)
+declare float @llvm.trunc.f32(float %p)
+
+define void @ceil_2f64() #0 {
+; SSE2-LABEL: @ceil_2f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[CEIL0:%.*]] = call double @llvm.ceil.f64(double [[LD0]])
+; SSE2-NEXT: [[CEIL1:%.*]] = call double @llvm.ceil.f64(double [[LD1]])
+; SSE2-NEXT: store double [[CEIL0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[CEIL1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @ceil_2f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @ceil_2f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
+; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ceil0 = call double @llvm.ceil.f64(double %ld0)
+ %ceil1 = call double @llvm.ceil.f64(double %ld1)
+ store double %ceil0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %ceil1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @ceil_4f64() #0 {
+; SSE2-LABEL: @ceil_4f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[CEIL0:%.*]] = call double @llvm.ceil.f64(double [[LD0]])
+; SSE2-NEXT: [[CEIL1:%.*]] = call double @llvm.ceil.f64(double [[LD1]])
+; SSE2-NEXT: [[CEIL2:%.*]] = call double @llvm.ceil.f64(double [[LD2]])
+; SSE2-NEXT: [[CEIL3:%.*]] = call double @llvm.ceil.f64(double [[LD3]])
+; SSE2-NEXT: store double [[CEIL0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[CEIL1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[CEIL2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[CEIL3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @ceil_4f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @ceil_4f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP1]])
+; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %ceil0 = call double @llvm.ceil.f64(double %ld0)
+ %ceil1 = call double @llvm.ceil.f64(double %ld1)
+ %ceil2 = call double @llvm.ceil.f64(double %ld2)
+ %ceil3 = call double @llvm.ceil.f64(double %ld3)
+ store double %ceil0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %ceil1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %ceil2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %ceil3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ ret void
+}
+
+define void @ceil_8f64() #0 {
+; SSE2-LABEL: @ceil_8f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+; SSE2-NEXT: [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+; SSE2-NEXT: [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+; SSE2-NEXT: [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT: [[CEIL0:%.*]] = call double @llvm.ceil.f64(double [[LD0]])
+; SSE2-NEXT: [[CEIL1:%.*]] = call double @llvm.ceil.f64(double [[LD1]])
+; SSE2-NEXT: [[CEIL2:%.*]] = call double @llvm.ceil.f64(double [[LD2]])
+; SSE2-NEXT: [[CEIL3:%.*]] = call double @llvm.ceil.f64(double [[LD3]])
+; SSE2-NEXT: [[CEIL4:%.*]] = call double @llvm.ceil.f64(double [[LD4]])
+; SSE2-NEXT: [[CEIL5:%.*]] = call double @llvm.ceil.f64(double [[LD5]])
+; SSE2-NEXT: [[CEIL6:%.*]] = call double @llvm.ceil.f64(double [[LD6]])
+; SSE2-NEXT: [[CEIL7:%.*]] = call double @llvm.ceil.f64(double [[LD7]])
+; SSE2-NEXT: store double [[CEIL0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[CEIL1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[CEIL2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[CEIL3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: store double [[CEIL4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT: store double [[CEIL5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT: store double [[CEIL6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT: store double [[CEIL7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @ceil_8f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.ceil.v2f64(<2 x double> [[TMP4]])
+; SSE41-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @ceil_8f64(
+; AVX1-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP2]])
+; AVX1-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX1-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @ceil_8f64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.ceil.v4f64(<4 x double> [[TMP2]])
+; AVX2-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX2-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @ceil_8f64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.ceil.v8f64(<8 x double> [[TMP1]])
+; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+ %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+ %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+ %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+ %ceil0 = call double @llvm.ceil.f64(double %ld0)
+ %ceil1 = call double @llvm.ceil.f64(double %ld1)
+ %ceil2 = call double @llvm.ceil.f64(double %ld2)
+ %ceil3 = call double @llvm.ceil.f64(double %ld3)
+ %ceil4 = call double @llvm.ceil.f64(double %ld4)
+ %ceil5 = call double @llvm.ceil.f64(double %ld5)
+ %ceil6 = call double @llvm.ceil.f64(double %ld6)
+ %ceil7 = call double @llvm.ceil.f64(double %ld7)
+ store double %ceil0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %ceil1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %ceil2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %ceil3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ store double %ceil4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+ store double %ceil5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+ store double %ceil6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+ store double %ceil7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+ ret void
+}
+
+define void @floor_2f64() #0 {
+; SSE2-LABEL: @floor_2f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[FLOOR0:%.*]] = call double @llvm.floor.f64(double [[LD0]])
+; SSE2-NEXT: [[FLOOR1:%.*]] = call double @llvm.floor.f64(double [[LD1]])
+; SSE2-NEXT: store double [[FLOOR0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[FLOOR1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @floor_2f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @floor_2f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
+; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %floor0 = call double @llvm.floor.f64(double %ld0)
+ %floor1 = call double @llvm.floor.f64(double %ld1)
+ store double %floor0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %floor1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @floor_4f64() #0 {
+; SSE2-LABEL: @floor_4f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[FLOOR0:%.*]] = call double @llvm.floor.f64(double [[LD0]])
+; SSE2-NEXT: [[FLOOR1:%.*]] = call double @llvm.floor.f64(double [[LD1]])
+; SSE2-NEXT: [[FLOOR2:%.*]] = call double @llvm.floor.f64(double [[LD2]])
+; SSE2-NEXT: [[FLOOR3:%.*]] = call double @llvm.floor.f64(double [[LD3]])
+; SSE2-NEXT: store double [[FLOOR0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[FLOOR1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[FLOOR2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[FLOOR3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @floor_4f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @floor_4f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP1]])
+; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %floor0 = call double @llvm.floor.f64(double %ld0)
+ %floor1 = call double @llvm.floor.f64(double %ld1)
+ %floor2 = call double @llvm.floor.f64(double %ld2)
+ %floor3 = call double @llvm.floor.f64(double %ld3)
+ store double %floor0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %floor1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %floor2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %floor3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ ret void
+}
+
+define void @floor_8f64() #0 {
+; SSE2-LABEL: @floor_8f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+; SSE2-NEXT: [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+; SSE2-NEXT: [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+; SSE2-NEXT: [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT: [[FLOOR0:%.*]] = call double @llvm.floor.f64(double [[LD0]])
+; SSE2-NEXT: [[FLOOR1:%.*]] = call double @llvm.floor.f64(double [[LD1]])
+; SSE2-NEXT: [[FLOOR2:%.*]] = call double @llvm.floor.f64(double [[LD2]])
+; SSE2-NEXT: [[FLOOR3:%.*]] = call double @llvm.floor.f64(double [[LD3]])
+; SSE2-NEXT: [[FLOOR4:%.*]] = call double @llvm.floor.f64(double [[LD4]])
+; SSE2-NEXT: [[FLOOR5:%.*]] = call double @llvm.floor.f64(double [[LD5]])
+; SSE2-NEXT: [[FLOOR6:%.*]] = call double @llvm.floor.f64(double [[LD6]])
+; SSE2-NEXT: [[FLOOR7:%.*]] = call double @llvm.floor.f64(double [[LD7]])
+; SSE2-NEXT: store double [[FLOOR0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[FLOOR1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[FLOOR2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[FLOOR3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: store double [[FLOOR4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT: store double [[FLOOR5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT: store double [[FLOOR6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT: store double [[FLOOR7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @floor_8f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.floor.v2f64(<2 x double> [[TMP4]])
+; SSE41-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @floor_8f64(
+; AVX1-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP2]])
+; AVX1-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX1-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @floor_8f64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.floor.v4f64(<4 x double> [[TMP2]])
+; AVX2-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX2-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @floor_8f64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.floor.v8f64(<8 x double> [[TMP1]])
+; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+ %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+ %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+ %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+ %floor0 = call double @llvm.floor.f64(double %ld0)
+ %floor1 = call double @llvm.floor.f64(double %ld1)
+ %floor2 = call double @llvm.floor.f64(double %ld2)
+ %floor3 = call double @llvm.floor.f64(double %ld3)
+ %floor4 = call double @llvm.floor.f64(double %ld4)
+ %floor5 = call double @llvm.floor.f64(double %ld5)
+ %floor6 = call double @llvm.floor.f64(double %ld6)
+ %floor7 = call double @llvm.floor.f64(double %ld7)
+ store double %floor0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %floor1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %floor2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %floor3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ store double %floor4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+ store double %floor5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+ store double %floor6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+ store double %floor7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+ ret void
+}
+
+define void @nearbyint_2f64() #0 {
+; SSE2-LABEL: @nearbyint_2f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[NEARBYINT0:%.*]] = call double @llvm.nearbyint.f64(double [[LD0]])
+; SSE2-NEXT: [[NEARBYINT1:%.*]] = call double @llvm.nearbyint.f64(double [[LD1]])
+; SSE2-NEXT: store double [[NEARBYINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[NEARBYINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @nearbyint_2f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @nearbyint_2f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
+; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %nearbyint0 = call double @llvm.nearbyint.f64(double %ld0)
+ %nearbyint1 = call double @llvm.nearbyint.f64(double %ld1)
+ store double %nearbyint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %nearbyint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @nearbyint_4f64() #0 {
+; SSE2-LABEL: @nearbyint_4f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[NEARBYINT0:%.*]] = call double @llvm.nearbyint.f64(double [[LD0]])
+; SSE2-NEXT: [[NEARBYINT1:%.*]] = call double @llvm.nearbyint.f64(double [[LD1]])
+; SSE2-NEXT: [[NEARBYINT2:%.*]] = call double @llvm.nearbyint.f64(double [[LD2]])
+; SSE2-NEXT: [[NEARBYINT3:%.*]] = call double @llvm.nearbyint.f64(double [[LD3]])
+; SSE2-NEXT: store double [[NEARBYINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[NEARBYINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[NEARBYINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[NEARBYINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @nearbyint_4f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @nearbyint_4f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP1]])
+; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %nearbyint0 = call double @llvm.nearbyint.f64(double %ld0)
+ %nearbyint1 = call double @llvm.nearbyint.f64(double %ld1)
+ %nearbyint2 = call double @llvm.nearbyint.f64(double %ld2)
+ %nearbyint3 = call double @llvm.nearbyint.f64(double %ld3)
+ store double %nearbyint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %nearbyint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %nearbyint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %nearbyint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ ret void
+}
+
+define void @nearbyint_8f64() #0 {
+; SSE2-LABEL: @nearbyint_8f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+; SSE2-NEXT: [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+; SSE2-NEXT: [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+; SSE2-NEXT: [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT: [[NEARBYINT0:%.*]] = call double @llvm.nearbyint.f64(double [[LD0]])
+; SSE2-NEXT: [[NEARBYINT1:%.*]] = call double @llvm.nearbyint.f64(double [[LD1]])
+; SSE2-NEXT: [[NEARBYINT2:%.*]] = call double @llvm.nearbyint.f64(double [[LD2]])
+; SSE2-NEXT: [[NEARBYINT3:%.*]] = call double @llvm.nearbyint.f64(double [[LD3]])
+; SSE2-NEXT: [[NEARBYINT4:%.*]] = call double @llvm.nearbyint.f64(double [[LD4]])
+; SSE2-NEXT: [[NEARBYINT5:%.*]] = call double @llvm.nearbyint.f64(double [[LD5]])
+; SSE2-NEXT: [[NEARBYINT6:%.*]] = call double @llvm.nearbyint.f64(double [[LD6]])
+; SSE2-NEXT: [[NEARBYINT7:%.*]] = call double @llvm.nearbyint.f64(double [[LD7]])
+; SSE2-NEXT: store double [[NEARBYINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[NEARBYINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[NEARBYINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[NEARBYINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: store double [[NEARBYINT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT: store double [[NEARBYINT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT: store double [[NEARBYINT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT: store double [[NEARBYINT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @nearbyint_8f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> [[TMP4]])
+; SSE41-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @nearbyint_8f64(
+; AVX1-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP2]])
+; AVX1-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX1-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @nearbyint_8f64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> [[TMP2]])
+; AVX2-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX2-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @nearbyint_8f64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> [[TMP1]])
+; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+ %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+ %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+ %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+ %nearbyint0 = call double @llvm.nearbyint.f64(double %ld0)
+ %nearbyint1 = call double @llvm.nearbyint.f64(double %ld1)
+ %nearbyint2 = call double @llvm.nearbyint.f64(double %ld2)
+ %nearbyint3 = call double @llvm.nearbyint.f64(double %ld3)
+ %nearbyint4 = call double @llvm.nearbyint.f64(double %ld4)
+ %nearbyint5 = call double @llvm.nearbyint.f64(double %ld5)
+ %nearbyint6 = call double @llvm.nearbyint.f64(double %ld6)
+ %nearbyint7 = call double @llvm.nearbyint.f64(double %ld7)
+ store double %nearbyint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %nearbyint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %nearbyint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %nearbyint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ store double %nearbyint4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+ store double %nearbyint5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+ store double %nearbyint6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+ store double %nearbyint7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+ ret void
+}
+
+define void @rint_2f64() #0 {
+; SSE2-LABEL: @rint_2f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[RINT0:%.*]] = call double @llvm.rint.f64(double [[LD0]])
+; SSE2-NEXT: [[RINT1:%.*]] = call double @llvm.rint.f64(double [[LD1]])
+; SSE2-NEXT: store double [[RINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[RINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @rint_2f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @rint_2f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
+; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %rint0 = call double @llvm.rint.f64(double %ld0)
+ %rint1 = call double @llvm.rint.f64(double %ld1)
+ store double %rint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %rint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @rint_4f64() #0 {
+; SSE2-LABEL: @rint_4f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[RINT0:%.*]] = call double @llvm.rint.f64(double [[LD0]])
+; SSE2-NEXT: [[RINT1:%.*]] = call double @llvm.rint.f64(double [[LD1]])
+; SSE2-NEXT: [[RINT2:%.*]] = call double @llvm.rint.f64(double [[LD2]])
+; SSE2-NEXT: [[RINT3:%.*]] = call double @llvm.rint.f64(double [[LD3]])
+; SSE2-NEXT: store double [[RINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[RINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[RINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[RINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @rint_4f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @rint_4f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP1]])
+; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %rint0 = call double @llvm.rint.f64(double %ld0)
+ %rint1 = call double @llvm.rint.f64(double %ld1)
+ %rint2 = call double @llvm.rint.f64(double %ld2)
+ %rint3 = call double @llvm.rint.f64(double %ld3)
+ store double %rint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %rint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %rint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %rint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ ret void
+}
+
+define void @rint_8f64() #0 {
+; SSE2-LABEL: @rint_8f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+; SSE2-NEXT: [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+; SSE2-NEXT: [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+; SSE2-NEXT: [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT: [[RINT0:%.*]] = call double @llvm.rint.f64(double [[LD0]])
+; SSE2-NEXT: [[RINT1:%.*]] = call double @llvm.rint.f64(double [[LD1]])
+; SSE2-NEXT: [[RINT2:%.*]] = call double @llvm.rint.f64(double [[LD2]])
+; SSE2-NEXT: [[RINT3:%.*]] = call double @llvm.rint.f64(double [[LD3]])
+; SSE2-NEXT: [[RINT4:%.*]] = call double @llvm.rint.f64(double [[LD4]])
+; SSE2-NEXT: [[RINT5:%.*]] = call double @llvm.rint.f64(double [[LD5]])
+; SSE2-NEXT: [[RINT6:%.*]] = call double @llvm.rint.f64(double [[LD6]])
+; SSE2-NEXT: [[RINT7:%.*]] = call double @llvm.rint.f64(double [[LD7]])
+; SSE2-NEXT: store double [[RINT0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[RINT1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[RINT2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[RINT3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: store double [[RINT4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT: store double [[RINT5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT: store double [[RINT6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT: store double [[RINT7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @rint_8f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.rint.v2f64(<2 x double> [[TMP4]])
+; SSE41-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @rint_8f64(
+; AVX1-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP2]])
+; AVX1-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX1-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @rint_8f64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.rint.v4f64(<4 x double> [[TMP2]])
+; AVX2-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX2-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @rint_8f64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.rint.v8f64(<8 x double> [[TMP1]])
+; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+ %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+ %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+ %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+ %rint0 = call double @llvm.rint.f64(double %ld0)
+ %rint1 = call double @llvm.rint.f64(double %ld1)
+ %rint2 = call double @llvm.rint.f64(double %ld2)
+ %rint3 = call double @llvm.rint.f64(double %ld3)
+ %rint4 = call double @llvm.rint.f64(double %ld4)
+ %rint5 = call double @llvm.rint.f64(double %ld5)
+ %rint6 = call double @llvm.rint.f64(double %ld6)
+ %rint7 = call double @llvm.rint.f64(double %ld7)
+ store double %rint0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %rint1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %rint2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %rint3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ store double %rint4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+ store double %rint5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+ store double %rint6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+ store double %rint7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+ ret void
+}
+
+define void @trunc_2f64() #0 {
+; SSE2-LABEL: @trunc_2f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[TRUNC0:%.*]] = call double @llvm.trunc.f64(double [[LD0]])
+; SSE2-NEXT: [[TRUNC1:%.*]] = call double @llvm.trunc.f64(double [[LD1]])
+; SSE2-NEXT: store double [[TRUNC0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[TRUNC1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @trunc_2f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @trunc_2f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
+; AVX-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %trunc0 = call double @llvm.trunc.f64(double %ld0)
+ %trunc1 = call double @llvm.trunc.f64(double %ld1)
+ store double %trunc0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %trunc1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @trunc_4f64() #0 {
+; SSE2-LABEL: @trunc_4f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[TRUNC0:%.*]] = call double @llvm.trunc.f64(double [[LD0]])
+; SSE2-NEXT: [[TRUNC1:%.*]] = call double @llvm.trunc.f64(double [[LD1]])
+; SSE2-NEXT: [[TRUNC2:%.*]] = call double @llvm.trunc.f64(double [[LD2]])
+; SSE2-NEXT: [[TRUNC3:%.*]] = call double @llvm.trunc.f64(double [[LD3]])
+; SSE2-NEXT: store double [[TRUNC0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[TRUNC1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[TRUNC2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[TRUNC3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @trunc_4f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @trunc_4f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP1]])
+; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %trunc0 = call double @llvm.trunc.f64(double %ld0)
+ %trunc1 = call double @llvm.trunc.f64(double %ld1)
+ %trunc2 = call double @llvm.trunc.f64(double %ld2)
+ %trunc3 = call double @llvm.trunc.f64(double %ld3)
+ store double %trunc0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %trunc1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %trunc2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %trunc3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ ret void
+}
+
+define void @trunc_8f64() #0 {
+; SSE2-LABEL: @trunc_8f64(
+; SSE2-NEXT: [[LD0:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+; SSE2-NEXT: [[LD1:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+; SSE2-NEXT: [[LD2:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+; SSE2-NEXT: [[LD3:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+; SSE2-NEXT: [[LD4:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+; SSE2-NEXT: [[LD5:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+; SSE2-NEXT: [[LD6:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+; SSE2-NEXT: [[LD7:%.*]] = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+; SSE2-NEXT: [[TRUNC0:%.*]] = call double @llvm.trunc.f64(double [[LD0]])
+; SSE2-NEXT: [[TRUNC1:%.*]] = call double @llvm.trunc.f64(double [[LD1]])
+; SSE2-NEXT: [[TRUNC2:%.*]] = call double @llvm.trunc.f64(double [[LD2]])
+; SSE2-NEXT: [[TRUNC3:%.*]] = call double @llvm.trunc.f64(double [[LD3]])
+; SSE2-NEXT: [[TRUNC4:%.*]] = call double @llvm.trunc.f64(double [[LD4]])
+; SSE2-NEXT: [[TRUNC5:%.*]] = call double @llvm.trunc.f64(double [[LD5]])
+; SSE2-NEXT: [[TRUNC6:%.*]] = call double @llvm.trunc.f64(double [[LD6]])
+; SSE2-NEXT: [[TRUNC7:%.*]] = call double @llvm.trunc.f64(double [[LD7]])
+; SSE2-NEXT: store double [[TRUNC0]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+; SSE2-NEXT: store double [[TRUNC1]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+; SSE2-NEXT: store double [[TRUNC2]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+; SSE2-NEXT: store double [[TRUNC3]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+; SSE2-NEXT: store double [[TRUNC4]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+; SSE2-NEXT: store double [[TRUNC5]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+; SSE2-NEXT: store double [[TRUNC6]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+; SSE2-NEXT: store double [[TRUNC7]], double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @trunc_8f64(
+; SSE41-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.trunc.v2f64(<2 x double> [[TMP4]])
+; SSE41-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 8
+; SSE41-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 8
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @trunc_8f64(
+; AVX1-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP2]])
+; AVX1-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX1-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @trunc_8f64(
+; AVX2-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.trunc.v4f64(<4 x double> [[TMP2]])
+; AVX2-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX2-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 8
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @trunc_8f64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 8
+; AVX512-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.trunc.v8f64(<8 x double> [[TMP1]])
+; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 8
+; AVX512-NEXT: ret void
+;
+ %ld0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %ld1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %ld2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %ld3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %ld4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 8
+ %ld5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 8
+ %ld6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 8
+ %ld7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 8
+ %trunc0 = call double @llvm.trunc.f64(double %ld0)
+ %trunc1 = call double @llvm.trunc.f64(double %ld1)
+ %trunc2 = call double @llvm.trunc.f64(double %ld2)
+ %trunc3 = call double @llvm.trunc.f64(double %ld3)
+ %trunc4 = call double @llvm.trunc.f64(double %ld4)
+ %trunc5 = call double @llvm.trunc.f64(double %ld5)
+ %trunc6 = call double @llvm.trunc.f64(double %ld6)
+ %trunc7 = call double @llvm.trunc.f64(double %ld7)
+ store double %trunc0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %trunc1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %trunc2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %trunc3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ store double %trunc4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 8
+ store double %trunc5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 8
+ store double %trunc6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 8
+ store double %trunc7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 8
+ ret void
+}
+
+define void @ceil_4f32() #0 {
+; SSE2-LABEL: @ceil_4f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[CEIL0:%.*]] = call float @llvm.ceil.f32(float [[LD0]])
+; SSE2-NEXT: [[CEIL1:%.*]] = call float @llvm.ceil.f32(float [[LD1]])
+; SSE2-NEXT: [[CEIL2:%.*]] = call float @llvm.ceil.f32(float [[LD2]])
+; SSE2-NEXT: [[CEIL3:%.*]] = call float @llvm.ceil.f32(float [[LD3]])
+; SSE2-NEXT: store float [[CEIL0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[CEIL1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[CEIL2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[CEIL3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @ceil_4f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @ceil_4f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
+; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %ceil0 = call float @llvm.ceil.f32(float %ld0)
+ %ceil1 = call float @llvm.ceil.f32(float %ld1)
+ %ceil2 = call float @llvm.ceil.f32(float %ld2)
+ %ceil3 = call float @llvm.ceil.f32(float %ld3)
+ store float %ceil0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %ceil1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %ceil2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %ceil3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @ceil_8f32() #0 {
+; SSE2-LABEL: @ceil_8f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[CEIL0:%.*]] = call float @llvm.ceil.f32(float [[LD0]])
+; SSE2-NEXT: [[CEIL1:%.*]] = call float @llvm.ceil.f32(float [[LD1]])
+; SSE2-NEXT: [[CEIL2:%.*]] = call float @llvm.ceil.f32(float [[LD2]])
+; SSE2-NEXT: [[CEIL3:%.*]] = call float @llvm.ceil.f32(float [[LD3]])
+; SSE2-NEXT: [[CEIL4:%.*]] = call float @llvm.ceil.f32(float [[LD4]])
+; SSE2-NEXT: [[CEIL5:%.*]] = call float @llvm.ceil.f32(float [[LD5]])
+; SSE2-NEXT: [[CEIL6:%.*]] = call float @llvm.ceil.f32(float [[LD6]])
+; SSE2-NEXT: [[CEIL7:%.*]] = call float @llvm.ceil.f32(float [[LD7]])
+; SSE2-NEXT: store float [[CEIL0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[CEIL1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[CEIL2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[CEIL3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[CEIL4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[CEIL5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[CEIL6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[CEIL7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @ceil_8f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @ceil_8f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP1]])
+; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+ %ceil0 = call float @llvm.ceil.f32(float %ld0)
+ %ceil1 = call float @llvm.ceil.f32(float %ld1)
+ %ceil2 = call float @llvm.ceil.f32(float %ld2)
+ %ceil3 = call float @llvm.ceil.f32(float %ld3)
+ %ceil4 = call float @llvm.ceil.f32(float %ld4)
+ %ceil5 = call float @llvm.ceil.f32(float %ld5)
+ %ceil6 = call float @llvm.ceil.f32(float %ld6)
+ %ceil7 = call float @llvm.ceil.f32(float %ld7)
+ store float %ceil0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %ceil1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %ceil2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %ceil3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %ceil4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %ceil5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %ceil6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %ceil7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @ceil_16f32() #0 {
+; SSE2-LABEL: @ceil_16f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
+; SSE2-NEXT: [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
+; SSE2-NEXT: [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+; SSE2-NEXT: [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+; SSE2-NEXT: [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+; SSE2-NEXT: [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+; SSE2-NEXT: [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+; SSE2-NEXT: [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT: [[CEIL0:%.*]] = call float @llvm.ceil.f32(float [[LD0]])
+; SSE2-NEXT: [[CEIL1:%.*]] = call float @llvm.ceil.f32(float [[LD1]])
+; SSE2-NEXT: [[CEIL2:%.*]] = call float @llvm.ceil.f32(float [[LD2]])
+; SSE2-NEXT: [[CEIL3:%.*]] = call float @llvm.ceil.f32(float [[LD3]])
+; SSE2-NEXT: [[CEIL4:%.*]] = call float @llvm.ceil.f32(float [[LD4]])
+; SSE2-NEXT: [[CEIL5:%.*]] = call float @llvm.ceil.f32(float [[LD5]])
+; SSE2-NEXT: [[CEIL6:%.*]] = call float @llvm.ceil.f32(float [[LD6]])
+; SSE2-NEXT: [[CEIL7:%.*]] = call float @llvm.ceil.f32(float [[LD7]])
+; SSE2-NEXT: [[CEIL8:%.*]] = call float @llvm.ceil.f32(float [[LD8]])
+; SSE2-NEXT: [[CEIL9:%.*]] = call float @llvm.ceil.f32(float [[LD9]])
+; SSE2-NEXT: [[CEIL10:%.*]] = call float @llvm.ceil.f32(float [[LD10]])
+; SSE2-NEXT: [[CEIL11:%.*]] = call float @llvm.ceil.f32(float [[LD11]])
+; SSE2-NEXT: [[CEIL12:%.*]] = call float @llvm.ceil.f32(float [[LD12]])
+; SSE2-NEXT: [[CEIL13:%.*]] = call float @llvm.ceil.f32(float [[LD13]])
+; SSE2-NEXT: [[CEIL14:%.*]] = call float @llvm.ceil.f32(float [[LD14]])
+; SSE2-NEXT: [[CEIL15:%.*]] = call float @llvm.ceil.f32(float [[LD15]])
+; SSE2-NEXT: store float [[CEIL0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[CEIL1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[CEIL2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[CEIL3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[CEIL4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[CEIL5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[CEIL6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[CEIL7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: store float [[CEIL8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT: store float [[CEIL9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT: store float [[CEIL10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT: store float [[CEIL11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT: store float [[CEIL12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT: store float [[CEIL13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT: store float [[CEIL14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT: store float [[CEIL15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @ceil_16f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.ceil.v4f32(<4 x float> [[TMP4]])
+; SSE41-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @ceil_16f32(
+; AVX1-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP2]])
+; AVX1-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX1-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @ceil_16f32(
+; AVX2-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.ceil.v8f32(<8 x float> [[TMP2]])
+; AVX2-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX2-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @ceil_16f32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.ceil.v16f32(<16 x float> [[TMP1]])
+; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
+ %ld8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
+ %ld9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
+ %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+ %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+ %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+ %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+ %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+ %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+ %ceil0 = call float @llvm.ceil.f32(float %ld0 )
+ %ceil1 = call float @llvm.ceil.f32(float %ld1 )
+ %ceil2 = call float @llvm.ceil.f32(float %ld2 )
+ %ceil3 = call float @llvm.ceil.f32(float %ld3 )
+ %ceil4 = call float @llvm.ceil.f32(float %ld4 )
+ %ceil5 = call float @llvm.ceil.f32(float %ld5 )
+ %ceil6 = call float @llvm.ceil.f32(float %ld6 )
+ %ceil7 = call float @llvm.ceil.f32(float %ld7 )
+ %ceil8 = call float @llvm.ceil.f32(float %ld8 )
+ %ceil9 = call float @llvm.ceil.f32(float %ld9 )
+ %ceil10 = call float @llvm.ceil.f32(float %ld10)
+ %ceil11 = call float @llvm.ceil.f32(float %ld11)
+ %ceil12 = call float @llvm.ceil.f32(float %ld12)
+ %ceil13 = call float @llvm.ceil.f32(float %ld13)
+ %ceil14 = call float @llvm.ceil.f32(float %ld14)
+ %ceil15 = call float @llvm.ceil.f32(float %ld15)
+ store float %ceil0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
+ store float %ceil1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
+ store float %ceil2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
+ store float %ceil3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
+ store float %ceil4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
+ store float %ceil5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
+ store float %ceil6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
+ store float %ceil7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
+ store float %ceil8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
+ store float %ceil9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
+ store float %ceil10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+ store float %ceil11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+ store float %ceil12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+ store float %ceil13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+ store float %ceil14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+ store float %ceil15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+ ret void
+}
+
+define void @floor_4f32() #0 {
+; SSE2-LABEL: @floor_4f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[FLOOR0:%.*]] = call float @llvm.floor.f32(float [[LD0]])
+; SSE2-NEXT: [[FLOOR1:%.*]] = call float @llvm.floor.f32(float [[LD1]])
+; SSE2-NEXT: [[FLOOR2:%.*]] = call float @llvm.floor.f32(float [[LD2]])
+; SSE2-NEXT: [[FLOOR3:%.*]] = call float @llvm.floor.f32(float [[LD3]])
+; SSE2-NEXT: store float [[FLOOR0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[FLOOR1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[FLOOR2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[FLOOR3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @floor_4f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @floor_4f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
+; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %floor0 = call float @llvm.floor.f32(float %ld0)
+ %floor1 = call float @llvm.floor.f32(float %ld1)
+ %floor2 = call float @llvm.floor.f32(float %ld2)
+ %floor3 = call float @llvm.floor.f32(float %ld3)
+ store float %floor0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %floor1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %floor2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %floor3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @floor_8f32() #0 {
+; SSE2-LABEL: @floor_8f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[FLOOR0:%.*]] = call float @llvm.floor.f32(float [[LD0]])
+; SSE2-NEXT: [[FLOOR1:%.*]] = call float @llvm.floor.f32(float [[LD1]])
+; SSE2-NEXT: [[FLOOR2:%.*]] = call float @llvm.floor.f32(float [[LD2]])
+; SSE2-NEXT: [[FLOOR3:%.*]] = call float @llvm.floor.f32(float [[LD3]])
+; SSE2-NEXT: [[FLOOR4:%.*]] = call float @llvm.floor.f32(float [[LD4]])
+; SSE2-NEXT: [[FLOOR5:%.*]] = call float @llvm.floor.f32(float [[LD5]])
+; SSE2-NEXT: [[FLOOR6:%.*]] = call float @llvm.floor.f32(float [[LD6]])
+; SSE2-NEXT: [[FLOOR7:%.*]] = call float @llvm.floor.f32(float [[LD7]])
+; SSE2-NEXT: store float [[FLOOR0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[FLOOR1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[FLOOR2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[FLOOR3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[FLOOR4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[FLOOR5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[FLOOR6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[FLOOR7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @floor_8f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @floor_8f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP1]])
+; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+ %floor0 = call float @llvm.floor.f32(float %ld0)
+ %floor1 = call float @llvm.floor.f32(float %ld1)
+ %floor2 = call float @llvm.floor.f32(float %ld2)
+ %floor3 = call float @llvm.floor.f32(float %ld3)
+ %floor4 = call float @llvm.floor.f32(float %ld4)
+ %floor5 = call float @llvm.floor.f32(float %ld5)
+ %floor6 = call float @llvm.floor.f32(float %ld6)
+ %floor7 = call float @llvm.floor.f32(float %ld7)
+ store float %floor0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %floor1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %floor2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %floor3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %floor4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %floor5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %floor6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %floor7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @floor_16f32() #0 {
+; SSE2-LABEL: @floor_16f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
+; SSE2-NEXT: [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
+; SSE2-NEXT: [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+; SSE2-NEXT: [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+; SSE2-NEXT: [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+; SSE2-NEXT: [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+; SSE2-NEXT: [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+; SSE2-NEXT: [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT: [[FLOOR0:%.*]] = call float @llvm.floor.f32(float [[LD0]])
+; SSE2-NEXT: [[FLOOR1:%.*]] = call float @llvm.floor.f32(float [[LD1]])
+; SSE2-NEXT: [[FLOOR2:%.*]] = call float @llvm.floor.f32(float [[LD2]])
+; SSE2-NEXT: [[FLOOR3:%.*]] = call float @llvm.floor.f32(float [[LD3]])
+; SSE2-NEXT: [[FLOOR4:%.*]] = call float @llvm.floor.f32(float [[LD4]])
+; SSE2-NEXT: [[FLOOR5:%.*]] = call float @llvm.floor.f32(float [[LD5]])
+; SSE2-NEXT: [[FLOOR6:%.*]] = call float @llvm.floor.f32(float [[LD6]])
+; SSE2-NEXT: [[FLOOR7:%.*]] = call float @llvm.floor.f32(float [[LD7]])
+; SSE2-NEXT: [[FLOOR8:%.*]] = call float @llvm.floor.f32(float [[LD8]])
+; SSE2-NEXT: [[FLOOR9:%.*]] = call float @llvm.floor.f32(float [[LD9]])
+; SSE2-NEXT: [[FLOOR10:%.*]] = call float @llvm.floor.f32(float [[LD10]])
+; SSE2-NEXT: [[FLOOR11:%.*]] = call float @llvm.floor.f32(float [[LD11]])
+; SSE2-NEXT: [[FLOOR12:%.*]] = call float @llvm.floor.f32(float [[LD12]])
+; SSE2-NEXT: [[FLOOR13:%.*]] = call float @llvm.floor.f32(float [[LD13]])
+; SSE2-NEXT: [[FLOOR14:%.*]] = call float @llvm.floor.f32(float [[LD14]])
+; SSE2-NEXT: [[FLOOR15:%.*]] = call float @llvm.floor.f32(float [[LD15]])
+; SSE2-NEXT: store float [[FLOOR0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[FLOOR1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[FLOOR2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[FLOOR3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[FLOOR4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[FLOOR5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[FLOOR6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[FLOOR7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: store float [[FLOOR8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT: store float [[FLOOR9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT: store float [[FLOOR10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT: store float [[FLOOR11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT: store float [[FLOOR12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT: store float [[FLOOR13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT: store float [[FLOOR14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT: store float [[FLOOR15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @floor_16f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.floor.v4f32(<4 x float> [[TMP4]])
+; SSE41-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @floor_16f32(
+; AVX1-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP2]])
+; AVX1-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX1-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @floor_16f32(
+; AVX2-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.floor.v8f32(<8 x float> [[TMP2]])
+; AVX2-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX2-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @floor_16f32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.floor.v16f32(<16 x float> [[TMP1]])
+; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
+ %ld8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
+ %ld9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
+ %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+ %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+ %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+ %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+ %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+ %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+ %floor0 = call float @llvm.floor.f32(float %ld0 )
+ %floor1 = call float @llvm.floor.f32(float %ld1 )
+ %floor2 = call float @llvm.floor.f32(float %ld2 )
+ %floor3 = call float @llvm.floor.f32(float %ld3 )
+ %floor4 = call float @llvm.floor.f32(float %ld4 )
+ %floor5 = call float @llvm.floor.f32(float %ld5 )
+ %floor6 = call float @llvm.floor.f32(float %ld6 )
+ %floor7 = call float @llvm.floor.f32(float %ld7 )
+ %floor8 = call float @llvm.floor.f32(float %ld8 )
+ %floor9 = call float @llvm.floor.f32(float %ld9 )
+ %floor10 = call float @llvm.floor.f32(float %ld10)
+ %floor11 = call float @llvm.floor.f32(float %ld11)
+ %floor12 = call float @llvm.floor.f32(float %ld12)
+ %floor13 = call float @llvm.floor.f32(float %ld13)
+ %floor14 = call float @llvm.floor.f32(float %ld14)
+ %floor15 = call float @llvm.floor.f32(float %ld15)
+ store float %floor0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
+ store float %floor1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
+ store float %floor2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
+ store float %floor3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
+ store float %floor4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
+ store float %floor5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
+ store float %floor6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
+ store float %floor7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
+ store float %floor8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
+ store float %floor9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
+ store float %floor10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+ store float %floor11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+ store float %floor12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+ store float %floor13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+ store float %floor14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+ store float %floor15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+ ret void
+}
+
+define void @nearbyint_4f32() #0 {
+; SSE2-LABEL: @nearbyint_4f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[NEARBYINT0:%.*]] = call float @llvm.nearbyint.f32(float [[LD0]])
+; SSE2-NEXT: [[NEARBYINT1:%.*]] = call float @llvm.nearbyint.f32(float [[LD1]])
+; SSE2-NEXT: [[NEARBYINT2:%.*]] = call float @llvm.nearbyint.f32(float [[LD2]])
+; SSE2-NEXT: [[NEARBYINT3:%.*]] = call float @llvm.nearbyint.f32(float [[LD3]])
+; SSE2-NEXT: store float [[NEARBYINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[NEARBYINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[NEARBYINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[NEARBYINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @nearbyint_4f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @nearbyint_4f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
+; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %nearbyint0 = call float @llvm.nearbyint.f32(float %ld0)
+ %nearbyint1 = call float @llvm.nearbyint.f32(float %ld1)
+ %nearbyint2 = call float @llvm.nearbyint.f32(float %ld2)
+ %nearbyint3 = call float @llvm.nearbyint.f32(float %ld3)
+ store float %nearbyint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %nearbyint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %nearbyint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %nearbyint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @nearbyint_8f32() #0 {
+; SSE2-LABEL: @nearbyint_8f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[NEARBYINT0:%.*]] = call float @llvm.nearbyint.f32(float [[LD0]])
+; SSE2-NEXT: [[NEARBYINT1:%.*]] = call float @llvm.nearbyint.f32(float [[LD1]])
+; SSE2-NEXT: [[NEARBYINT2:%.*]] = call float @llvm.nearbyint.f32(float [[LD2]])
+; SSE2-NEXT: [[NEARBYINT3:%.*]] = call float @llvm.nearbyint.f32(float [[LD3]])
+; SSE2-NEXT: [[NEARBYINT4:%.*]] = call float @llvm.nearbyint.f32(float [[LD4]])
+; SSE2-NEXT: [[NEARBYINT5:%.*]] = call float @llvm.nearbyint.f32(float [[LD5]])
+; SSE2-NEXT: [[NEARBYINT6:%.*]] = call float @llvm.nearbyint.f32(float [[LD6]])
+; SSE2-NEXT: [[NEARBYINT7:%.*]] = call float @llvm.nearbyint.f32(float [[LD7]])
+; SSE2-NEXT: store float [[NEARBYINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[NEARBYINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[NEARBYINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[NEARBYINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[NEARBYINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[NEARBYINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[NEARBYINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[NEARBYINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @nearbyint_8f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @nearbyint_8f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP1]])
+; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+ %nearbyint0 = call float @llvm.nearbyint.f32(float %ld0)
+ %nearbyint1 = call float @llvm.nearbyint.f32(float %ld1)
+ %nearbyint2 = call float @llvm.nearbyint.f32(float %ld2)
+ %nearbyint3 = call float @llvm.nearbyint.f32(float %ld3)
+ %nearbyint4 = call float @llvm.nearbyint.f32(float %ld4)
+ %nearbyint5 = call float @llvm.nearbyint.f32(float %ld5)
+ %nearbyint6 = call float @llvm.nearbyint.f32(float %ld6)
+ %nearbyint7 = call float @llvm.nearbyint.f32(float %ld7)
+ store float %nearbyint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %nearbyint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %nearbyint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %nearbyint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %nearbyint4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %nearbyint5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %nearbyint6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %nearbyint7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @nearbyint_16f32() #0 {
+; SSE2-LABEL: @nearbyint_16f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
+; SSE2-NEXT: [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
+; SSE2-NEXT: [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+; SSE2-NEXT: [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+; SSE2-NEXT: [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+; SSE2-NEXT: [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+; SSE2-NEXT: [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+; SSE2-NEXT: [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT: [[NEARBYINT0:%.*]] = call float @llvm.nearbyint.f32(float [[LD0]])
+; SSE2-NEXT: [[NEARBYINT1:%.*]] = call float @llvm.nearbyint.f32(float [[LD1]])
+; SSE2-NEXT: [[NEARBYINT2:%.*]] = call float @llvm.nearbyint.f32(float [[LD2]])
+; SSE2-NEXT: [[NEARBYINT3:%.*]] = call float @llvm.nearbyint.f32(float [[LD3]])
+; SSE2-NEXT: [[NEARBYINT4:%.*]] = call float @llvm.nearbyint.f32(float [[LD4]])
+; SSE2-NEXT: [[NEARBYINT5:%.*]] = call float @llvm.nearbyint.f32(float [[LD5]])
+; SSE2-NEXT: [[NEARBYINT6:%.*]] = call float @llvm.nearbyint.f32(float [[LD6]])
+; SSE2-NEXT: [[NEARBYINT7:%.*]] = call float @llvm.nearbyint.f32(float [[LD7]])
+; SSE2-NEXT: [[NEARBYINT8:%.*]] = call float @llvm.nearbyint.f32(float [[LD8]])
+; SSE2-NEXT: [[NEARBYINT9:%.*]] = call float @llvm.nearbyint.f32(float [[LD9]])
+; SSE2-NEXT: [[NEARBYINT10:%.*]] = call float @llvm.nearbyint.f32(float [[LD10]])
+; SSE2-NEXT: [[NEARBYINT11:%.*]] = call float @llvm.nearbyint.f32(float [[LD11]])
+; SSE2-NEXT: [[NEARBYINT12:%.*]] = call float @llvm.nearbyint.f32(float [[LD12]])
+; SSE2-NEXT: [[NEARBYINT13:%.*]] = call float @llvm.nearbyint.f32(float [[LD13]])
+; SSE2-NEXT: [[NEARBYINT14:%.*]] = call float @llvm.nearbyint.f32(float [[LD14]])
+; SSE2-NEXT: [[NEARBYINT15:%.*]] = call float @llvm.nearbyint.f32(float [[LD15]])
+; SSE2-NEXT: store float [[NEARBYINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[NEARBYINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[NEARBYINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[NEARBYINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[NEARBYINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[NEARBYINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[NEARBYINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[NEARBYINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: store float [[NEARBYINT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT: store float [[NEARBYINT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT: store float [[NEARBYINT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT: store float [[NEARBYINT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT: store float [[NEARBYINT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT: store float [[NEARBYINT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT: store float [[NEARBYINT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT: store float [[NEARBYINT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @nearbyint_16f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> [[TMP4]])
+; SSE41-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @nearbyint_16f32(
+; AVX1-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP2]])
+; AVX1-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX1-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @nearbyint_16f32(
+; AVX2-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> [[TMP2]])
+; AVX2-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX2-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @nearbyint_16f32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> [[TMP1]])
+; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
+ %ld8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
+ %ld9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
+ %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+ %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+ %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+ %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+ %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+ %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+ %nearbyint0 = call float @llvm.nearbyint.f32(float %ld0 )
+ %nearbyint1 = call float @llvm.nearbyint.f32(float %ld1 )
+ %nearbyint2 = call float @llvm.nearbyint.f32(float %ld2 )
+ %nearbyint3 = call float @llvm.nearbyint.f32(float %ld3 )
+ %nearbyint4 = call float @llvm.nearbyint.f32(float %ld4 )
+ %nearbyint5 = call float @llvm.nearbyint.f32(float %ld5 )
+ %nearbyint6 = call float @llvm.nearbyint.f32(float %ld6 )
+ %nearbyint7 = call float @llvm.nearbyint.f32(float %ld7 )
+ %nearbyint8 = call float @llvm.nearbyint.f32(float %ld8 )
+ %nearbyint9 = call float @llvm.nearbyint.f32(float %ld9 )
+ %nearbyint10 = call float @llvm.nearbyint.f32(float %ld10)
+ %nearbyint11 = call float @llvm.nearbyint.f32(float %ld11)
+ %nearbyint12 = call float @llvm.nearbyint.f32(float %ld12)
+ %nearbyint13 = call float @llvm.nearbyint.f32(float %ld13)
+ %nearbyint14 = call float @llvm.nearbyint.f32(float %ld14)
+ %nearbyint15 = call float @llvm.nearbyint.f32(float %ld15)
+ store float %nearbyint0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
+ store float %nearbyint1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
+ store float %nearbyint2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
+ store float %nearbyint3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
+ store float %nearbyint4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
+ store float %nearbyint5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
+ store float %nearbyint6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
+ store float %nearbyint7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
+ store float %nearbyint8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
+ store float %nearbyint9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
+ store float %nearbyint10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+ store float %nearbyint11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+ store float %nearbyint12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+ store float %nearbyint13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+ store float %nearbyint14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+ store float %nearbyint15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+ ret void
+}
+
+define void @rint_4f32() #0 {
+; SSE2-LABEL: @rint_4f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[RINT0:%.*]] = call float @llvm.rint.f32(float [[LD0]])
+; SSE2-NEXT: [[RINT1:%.*]] = call float @llvm.rint.f32(float [[LD1]])
+; SSE2-NEXT: [[RINT2:%.*]] = call float @llvm.rint.f32(float [[LD2]])
+; SSE2-NEXT: [[RINT3:%.*]] = call float @llvm.rint.f32(float [[LD3]])
+; SSE2-NEXT: store float [[RINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[RINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[RINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[RINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @rint_4f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @rint_4f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
+; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %rint0 = call float @llvm.rint.f32(float %ld0)
+ %rint1 = call float @llvm.rint.f32(float %ld1)
+ %rint2 = call float @llvm.rint.f32(float %ld2)
+ %rint3 = call float @llvm.rint.f32(float %ld3)
+ store float %rint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %rint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %rint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %rint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @rint_8f32() #0 {
+; SSE2-LABEL: @rint_8f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[RINT0:%.*]] = call float @llvm.rint.f32(float [[LD0]])
+; SSE2-NEXT: [[RINT1:%.*]] = call float @llvm.rint.f32(float [[LD1]])
+; SSE2-NEXT: [[RINT2:%.*]] = call float @llvm.rint.f32(float [[LD2]])
+; SSE2-NEXT: [[RINT3:%.*]] = call float @llvm.rint.f32(float [[LD3]])
+; SSE2-NEXT: [[RINT4:%.*]] = call float @llvm.rint.f32(float [[LD4]])
+; SSE2-NEXT: [[RINT5:%.*]] = call float @llvm.rint.f32(float [[LD5]])
+; SSE2-NEXT: [[RINT6:%.*]] = call float @llvm.rint.f32(float [[LD6]])
+; SSE2-NEXT: [[RINT7:%.*]] = call float @llvm.rint.f32(float [[LD7]])
+; SSE2-NEXT: store float [[RINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[RINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[RINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[RINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[RINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[RINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[RINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[RINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @rint_8f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @rint_8f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
+; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+ %rint0 = call float @llvm.rint.f32(float %ld0)
+ %rint1 = call float @llvm.rint.f32(float %ld1)
+ %rint2 = call float @llvm.rint.f32(float %ld2)
+ %rint3 = call float @llvm.rint.f32(float %ld3)
+ %rint4 = call float @llvm.rint.f32(float %ld4)
+ %rint5 = call float @llvm.rint.f32(float %ld5)
+ %rint6 = call float @llvm.rint.f32(float %ld6)
+ %rint7 = call float @llvm.rint.f32(float %ld7)
+ store float %rint0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %rint1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %rint2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %rint3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %rint4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %rint5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %rint6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %rint7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @rint_16f32() #0 {
+; SSE2-LABEL: @rint_16f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
+; SSE2-NEXT: [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
+; SSE2-NEXT: [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+; SSE2-NEXT: [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+; SSE2-NEXT: [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+; SSE2-NEXT: [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+; SSE2-NEXT: [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+; SSE2-NEXT: [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT: [[RINT0:%.*]] = call float @llvm.rint.f32(float [[LD0]])
+; SSE2-NEXT: [[RINT1:%.*]] = call float @llvm.rint.f32(float [[LD1]])
+; SSE2-NEXT: [[RINT2:%.*]] = call float @llvm.rint.f32(float [[LD2]])
+; SSE2-NEXT: [[RINT3:%.*]] = call float @llvm.rint.f32(float [[LD3]])
+; SSE2-NEXT: [[RINT4:%.*]] = call float @llvm.rint.f32(float [[LD4]])
+; SSE2-NEXT: [[RINT5:%.*]] = call float @llvm.rint.f32(float [[LD5]])
+; SSE2-NEXT: [[RINT6:%.*]] = call float @llvm.rint.f32(float [[LD6]])
+; SSE2-NEXT: [[RINT7:%.*]] = call float @llvm.rint.f32(float [[LD7]])
+; SSE2-NEXT: [[RINT8:%.*]] = call float @llvm.rint.f32(float [[LD8]])
+; SSE2-NEXT: [[RINT9:%.*]] = call float @llvm.rint.f32(float [[LD9]])
+; SSE2-NEXT: [[RINT10:%.*]] = call float @llvm.rint.f32(float [[LD10]])
+; SSE2-NEXT: [[RINT11:%.*]] = call float @llvm.rint.f32(float [[LD11]])
+; SSE2-NEXT: [[RINT12:%.*]] = call float @llvm.rint.f32(float [[LD12]])
+; SSE2-NEXT: [[RINT13:%.*]] = call float @llvm.rint.f32(float [[LD13]])
+; SSE2-NEXT: [[RINT14:%.*]] = call float @llvm.rint.f32(float [[LD14]])
+; SSE2-NEXT: [[RINT15:%.*]] = call float @llvm.rint.f32(float [[LD15]])
+; SSE2-NEXT: store float [[RINT0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[RINT1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[RINT2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[RINT3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[RINT4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[RINT5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[RINT6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[RINT7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: store float [[RINT8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT: store float [[RINT9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT: store float [[RINT10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT: store float [[RINT11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT: store float [[RINT12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT: store float [[RINT13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT: store float [[RINT14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT: store float [[RINT15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @rint_16f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.rint.v4f32(<4 x float> [[TMP4]])
+; SSE41-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @rint_16f32(
+; AVX1-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP2]])
+; AVX1-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX1-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @rint_16f32(
+; AVX2-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.rint.v8f32(<8 x float> [[TMP2]])
+; AVX2-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX2-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @rint_16f32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.rint.v16f32(<16 x float> [[TMP1]])
+; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
+ %ld8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
+ %ld9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
+ %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+ %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+ %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+ %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+ %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+ %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+ %rint0 = call float @llvm.rint.f32(float %ld0 )
+ %rint1 = call float @llvm.rint.f32(float %ld1 )
+ %rint2 = call float @llvm.rint.f32(float %ld2 )
+ %rint3 = call float @llvm.rint.f32(float %ld3 )
+ %rint4 = call float @llvm.rint.f32(float %ld4 )
+ %rint5 = call float @llvm.rint.f32(float %ld5 )
+ %rint6 = call float @llvm.rint.f32(float %ld6 )
+ %rint7 = call float @llvm.rint.f32(float %ld7 )
+ %rint8 = call float @llvm.rint.f32(float %ld8 )
+ %rint9 = call float @llvm.rint.f32(float %ld9 )
+ %rint10 = call float @llvm.rint.f32(float %ld10)
+ %rint11 = call float @llvm.rint.f32(float %ld11)
+ %rint12 = call float @llvm.rint.f32(float %ld12)
+ %rint13 = call float @llvm.rint.f32(float %ld13)
+ %rint14 = call float @llvm.rint.f32(float %ld14)
+ %rint15 = call float @llvm.rint.f32(float %ld15)
+ store float %rint0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
+ store float %rint1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
+ store float %rint2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
+ store float %rint3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
+ store float %rint4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
+ store float %rint5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
+ store float %rint6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
+ store float %rint7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
+ store float %rint8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
+ store float %rint9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
+ store float %rint10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+ store float %rint11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+ store float %rint12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+ store float %rint13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+ store float %rint14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+ store float %rint15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+ ret void
+}
+
+define void @trunc_4f32() #0 {
+; SSE2-LABEL: @trunc_4f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[TRUNC0:%.*]] = call float @llvm.trunc.f32(float [[LD0]])
+; SSE2-NEXT: [[TRUNC1:%.*]] = call float @llvm.trunc.f32(float [[LD1]])
+; SSE2-NEXT: [[TRUNC2:%.*]] = call float @llvm.trunc.f32(float [[LD2]])
+; SSE2-NEXT: [[TRUNC3:%.*]] = call float @llvm.trunc.f32(float [[LD3]])
+; SSE2-NEXT: store float [[TRUNC0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[TRUNC1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[TRUNC2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[TRUNC3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @trunc_4f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @trunc_4f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
+; AVX-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %trunc0 = call float @llvm.trunc.f32(float %ld0)
+ %trunc1 = call float @llvm.trunc.f32(float %ld1)
+ %trunc2 = call float @llvm.trunc.f32(float %ld2)
+ %trunc3 = call float @llvm.trunc.f32(float %ld3)
+ store float %trunc0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %trunc1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %trunc2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %trunc3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @trunc_8f32() #0 {
+; SSE2-LABEL: @trunc_8f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[TRUNC0:%.*]] = call float @llvm.trunc.f32(float [[LD0]])
+; SSE2-NEXT: [[TRUNC1:%.*]] = call float @llvm.trunc.f32(float [[LD1]])
+; SSE2-NEXT: [[TRUNC2:%.*]] = call float @llvm.trunc.f32(float [[LD2]])
+; SSE2-NEXT: [[TRUNC3:%.*]] = call float @llvm.trunc.f32(float [[LD3]])
+; SSE2-NEXT: [[TRUNC4:%.*]] = call float @llvm.trunc.f32(float [[LD4]])
+; SSE2-NEXT: [[TRUNC5:%.*]] = call float @llvm.trunc.f32(float [[LD5]])
+; SSE2-NEXT: [[TRUNC6:%.*]] = call float @llvm.trunc.f32(float [[LD6]])
+; SSE2-NEXT: [[TRUNC7:%.*]] = call float @llvm.trunc.f32(float [[LD7]])
+; SSE2-NEXT: store float [[TRUNC0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[TRUNC1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[TRUNC2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[TRUNC3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[TRUNC4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[TRUNC5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[TRUNC6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[TRUNC7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @trunc_8f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX-LABEL: @trunc_8f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP1]])
+; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+ %trunc0 = call float @llvm.trunc.f32(float %ld0)
+ %trunc1 = call float @llvm.trunc.f32(float %ld1)
+ %trunc2 = call float @llvm.trunc.f32(float %ld2)
+ %trunc3 = call float @llvm.trunc.f32(float %ld3)
+ %trunc4 = call float @llvm.trunc.f32(float %ld4)
+ %trunc5 = call float @llvm.trunc.f32(float %ld5)
+ %trunc6 = call float @llvm.trunc.f32(float %ld6)
+ %trunc7 = call float @llvm.trunc.f32(float %ld7)
+ store float %trunc0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %trunc1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %trunc2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %trunc3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %trunc4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %trunc5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %trunc6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %trunc7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @trunc_16f32() #0 {
+; SSE2-LABEL: @trunc_16f32(
+; SSE2-NEXT: [[LD0:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+; SSE2-NEXT: [[LD1:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+; SSE2-NEXT: [[LD2:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+; SSE2-NEXT: [[LD3:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+; SSE2-NEXT: [[LD4:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+; SSE2-NEXT: [[LD5:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+; SSE2-NEXT: [[LD6:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+; SSE2-NEXT: [[LD7:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+; SSE2-NEXT: [[LD8:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
+; SSE2-NEXT: [[LD9:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
+; SSE2-NEXT: [[LD10:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+; SSE2-NEXT: [[LD11:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+; SSE2-NEXT: [[LD12:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+; SSE2-NEXT: [[LD13:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+; SSE2-NEXT: [[LD14:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+; SSE2-NEXT: [[LD15:%.*]] = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+; SSE2-NEXT: [[TRUNC0:%.*]] = call float @llvm.trunc.f32(float [[LD0]])
+; SSE2-NEXT: [[TRUNC1:%.*]] = call float @llvm.trunc.f32(float [[LD1]])
+; SSE2-NEXT: [[TRUNC2:%.*]] = call float @llvm.trunc.f32(float [[LD2]])
+; SSE2-NEXT: [[TRUNC3:%.*]] = call float @llvm.trunc.f32(float [[LD3]])
+; SSE2-NEXT: [[TRUNC4:%.*]] = call float @llvm.trunc.f32(float [[LD4]])
+; SSE2-NEXT: [[TRUNC5:%.*]] = call float @llvm.trunc.f32(float [[LD5]])
+; SSE2-NEXT: [[TRUNC6:%.*]] = call float @llvm.trunc.f32(float [[LD6]])
+; SSE2-NEXT: [[TRUNC7:%.*]] = call float @llvm.trunc.f32(float [[LD7]])
+; SSE2-NEXT: [[TRUNC8:%.*]] = call float @llvm.trunc.f32(float [[LD8]])
+; SSE2-NEXT: [[TRUNC9:%.*]] = call float @llvm.trunc.f32(float [[LD9]])
+; SSE2-NEXT: [[TRUNC10:%.*]] = call float @llvm.trunc.f32(float [[LD10]])
+; SSE2-NEXT: [[TRUNC11:%.*]] = call float @llvm.trunc.f32(float [[LD11]])
+; SSE2-NEXT: [[TRUNC12:%.*]] = call float @llvm.trunc.f32(float [[LD12]])
+; SSE2-NEXT: [[TRUNC13:%.*]] = call float @llvm.trunc.f32(float [[LD13]])
+; SSE2-NEXT: [[TRUNC14:%.*]] = call float @llvm.trunc.f32(float [[LD14]])
+; SSE2-NEXT: [[TRUNC15:%.*]] = call float @llvm.trunc.f32(float [[LD15]])
+; SSE2-NEXT: store float [[TRUNC0]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+; SSE2-NEXT: store float [[TRUNC1]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+; SSE2-NEXT: store float [[TRUNC2]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+; SSE2-NEXT: store float [[TRUNC3]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+; SSE2-NEXT: store float [[TRUNC4]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+; SSE2-NEXT: store float [[TRUNC5]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+; SSE2-NEXT: store float [[TRUNC6]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+; SSE2-NEXT: store float [[TRUNC7]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+; SSE2-NEXT: store float [[TRUNC8]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+; SSE2-NEXT: store float [[TRUNC9]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+; SSE2-NEXT: store float [[TRUNC10]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+; SSE2-NEXT: store float [[TRUNC11]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+; SSE2-NEXT: store float [[TRUNC12]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+; SSE2-NEXT: store float [[TRUNC13]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+; SSE2-NEXT: store float [[TRUNC14]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+; SSE2-NEXT: store float [[TRUNC15]], float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+; SSE2-NEXT: ret void
+;
+; SSE41-LABEL: @trunc_16f32(
+; SSE41-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP1]])
+; SSE41-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP2]])
+; SSE41-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP3]])
+; SSE41-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.trunc.v4f32(<4 x float> [[TMP4]])
+; SSE41-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE41-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE41-NEXT: ret void
+;
+; AVX1-LABEL: @trunc_16f32(
+; AVX1-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP1]])
+; AVX1-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP2]])
+; AVX1-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX1-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX1-NEXT: ret void
+;
+; AVX2-LABEL: @trunc_16f32(
+; AVX2-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP1]])
+; AVX2-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.trunc.v8f32(<8 x float> [[TMP2]])
+; AVX2-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX2-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX2-NEXT: ret void
+;
+; AVX512-LABEL: @trunc_16f32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.trunc.v16f32(<16 x float> [[TMP1]])
+; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT: ret void
+;
+ %ld0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0 ), align 4
+ %ld1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1 ), align 4
+ %ld2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2 ), align 4
+ %ld3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3 ), align 4
+ %ld4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4 ), align 4
+ %ld5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5 ), align 4
+ %ld6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6 ), align 4
+ %ld7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7 ), align 4
+ %ld8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8 ), align 4
+ %ld9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9 ), align 4
+ %ld10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+ %ld11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+ %ld12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+ %ld13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+ %ld14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+ %ld15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+ %trunc0 = call float @llvm.trunc.f32(float %ld0 )
+ %trunc1 = call float @llvm.trunc.f32(float %ld1 )
+ %trunc2 = call float @llvm.trunc.f32(float %ld2 )
+ %trunc3 = call float @llvm.trunc.f32(float %ld3 )
+ %trunc4 = call float @llvm.trunc.f32(float %ld4 )
+ %trunc5 = call float @llvm.trunc.f32(float %ld5 )
+ %trunc6 = call float @llvm.trunc.f32(float %ld6 )
+ %trunc7 = call float @llvm.trunc.f32(float %ld7 )
+ %trunc8 = call float @llvm.trunc.f32(float %ld8 )
+ %trunc9 = call float @llvm.trunc.f32(float %ld9 )
+ %trunc10 = call float @llvm.trunc.f32(float %ld10)
+ %trunc11 = call float @llvm.trunc.f32(float %ld11)
+ %trunc12 = call float @llvm.trunc.f32(float %ld12)
+ %trunc13 = call float @llvm.trunc.f32(float %ld13)
+ %trunc14 = call float @llvm.trunc.f32(float %ld14)
+ %trunc15 = call float @llvm.trunc.f32(float %ld15)
+ store float %trunc0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0 ), align 4
+ store float %trunc1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1 ), align 4
+ store float %trunc2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2 ), align 4
+ store float %trunc3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3 ), align 4
+ store float %trunc4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4 ), align 4
+ store float %trunc5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5 ), align 4
+ store float %trunc6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6 ), align 4
+ store float %trunc7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7 ), align 4
+ store float %trunc8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8 ), align 4
+ store float %trunc9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9 ), align 4
+ store float %trunc10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+ store float %trunc11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+ store float %trunc12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+ store float %trunc13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+ store float %trunc14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+ store float %trunc15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
+
diff --git a/test/Transforms/SLPVectorizer/X86/funclet.ll b/test/Transforms/SLPVectorizer/X86/funclet.ll
new file mode 100644
index 000000000000..11d0bd940902
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/funclet.ll
@@ -0,0 +1,48 @@
+; RUN: opt -S -slp-vectorizer < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-windows-msvc18.0.0"
+
+define void @test1(double* %a, double* %b, double* %c) #0 personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @_CxxThrowException(i8* null, i8* null)
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %entry
+ %0 = catchswitch within none [label %catch] unwind to caller
+
+catch: ; preds = %catch.dispatch
+ %1 = catchpad within %0 [i8* null, i32 64, i8* null]
+ %i0 = load double, double* %a, align 8
+ %i1 = load double, double* %b, align 8
+ %mul = fmul double %i0, %i1
+ %call = tail call double @floor(double %mul) #1 [ "funclet"(token %1) ]
+ %arrayidx3 = getelementptr inbounds double, double* %a, i64 1
+ %i3 = load double, double* %arrayidx3, align 8
+ %arrayidx4 = getelementptr inbounds double, double* %b, i64 1
+ %i4 = load double, double* %arrayidx4, align 8
+ %mul5 = fmul double %i3, %i4
+ %call5 = tail call double @floor(double %mul5) #1 [ "funclet"(token %1) ]
+ store double %call, double* %c, align 8
+ %arrayidx5 = getelementptr inbounds double, double* %c, i64 1
+ store double %call5, double* %arrayidx5, align 8
+ catchret from %1 to label %try.cont
+
+try.cont: ; preds = %for.cond.cleanup
+ ret void
+
+unreachable: ; preds = %entry
+ unreachable
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[cpad:.*]] = catchpad within {{.*}} [i8* null, i32 64, i8* null]
+; CHECK: call <2 x double> @llvm.floor.v2f64(<2 x double> {{.*}}) [ "funclet"(token %[[cpad]]) ]
+
+declare x86_stdcallcc void @_CxxThrowException(i8*, i8*)
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare double @floor(double) #1
+
+attributes #0 = { "target-features"="+sse2" }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/SLPVectorizer/X86/gep.ll b/test/Transforms/SLPVectorizer/X86/gep.ll
index d10f2b6015d4..60b0a114c516 100644
--- a/test/Transforms/SLPVectorizer/X86/gep.ll
+++ b/test/Transforms/SLPVectorizer/X86/gep.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -basicaa -slp-vectorizer -S |FileCheck %s
+; RUN: opt < %s -aa-pipeline=basic-aa -passes=slp-vectorizer -S |FileCheck %s
target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-unknown"
diff --git a/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll b/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
new file mode 100644
index 000000000000..1cd28a909f79
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/gep_mismatch.ll
@@ -0,0 +1,22 @@
+; RUN: opt < %s -S -slp-vectorizer
+
+; This code has GEPs with different index types, which should not
+; matter for the SLPVectorizer.
+
+target triple = "x86_64--linux"
+
+define void @foo() {
+entry:
+ br label %bb1
+
+bb1:
+ %ls1.ph = phi float* [ %_tmp1, %bb1 ], [ undef, %entry ]
+ %ls2.ph = phi float* [ %_tmp2, %bb1 ], [ undef, %entry ]
+ store float undef, float* %ls1.ph
+ %_tmp1 = getelementptr float, float* %ls1.ph, i32 1
+ %_tmp2 = getelementptr float, float* %ls2.ph, i64 4
+ br i1 false, label %bb1, label %bb2
+
+bb2:
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/insertvalue.ll b/test/Transforms/SLPVectorizer/X86/insertvalue.ll
new file mode 100644
index 000000000000..5884ee7a2675
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/insertvalue.ll
@@ -0,0 +1,189 @@
+; RUN: opt < %s -basicaa -slp-vectorizer -S -mtriple=x86_64-unknown-linux-gnu -mcpu=corei7-avx | FileCheck %s
+
+; CHECK-LABEL: julia_2xdouble
+; CHECK: load <2 x double>
+; CHECK: load <2 x double>
+; CHECK: fmul <2 x double>
+; CHECK: fadd <2 x double>
+define void @julia_2xdouble([2 x double]* sret, [2 x double]*, [2 x double]*, [2 x double]*) {
+top:
+ %px0 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 0
+ %x0 = load double, double* %px0, align 4
+ %py0 = getelementptr inbounds [2 x double], [2 x double]* %3, i64 0, i64 0
+ %y0 = load double, double* %py0, align 4
+ %m0 = fmul double %x0, %y0
+ %px1 = getelementptr inbounds [2 x double], [2 x double]* %2, i64 0, i64 1
+ %x1 = load double, double* %px1, align 4
+ %py1 = getelementptr inbounds [2 x double], [2 x double]* %3, i64 0, i64 1
+ %y1 = load double, double* %py1, align 4
+ %m1 = fmul double %x1, %y1
+ %pz0 = getelementptr inbounds [2 x double], [2 x double]* %1, i64 0, i64 0
+ %z0 = load double, double* %pz0, align 4
+ %a0 = fadd double %m0, %z0
+ %i0 = insertvalue [2 x double] undef, double %a0, 0
+ %pz1 = getelementptr inbounds [2 x double], [2 x double]* %1, i64 0, i64 1
+ %z1 = load double, double* %pz1, align 4
+ %a1 = fadd double %m1, %z1
+ %i1 = insertvalue [2 x double] %i0, double %a1, 1
+ store [2 x double] %i1, [2 x double]* %0, align 4
+ ret void
+}
+
+; CHECK-LABEL: julia_4xfloat
+; CHECK: load <4 x float>
+; CHECK: load <4 x float>
+; CHECK: fmul <4 x float>
+; CHECK: fadd <4 x float>
+define void @julia_4xfloat([4 x float]* sret, [4 x float]*, [4 x float]*, [4 x float]*) {
+top:
+ %px0 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 0
+ %x0 = load float, float* %px0, align 4
+ %py0 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 0
+ %y0 = load float, float* %py0, align 4
+ %m0 = fmul float %x0, %y0
+ %px1 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 1
+ %x1 = load float, float* %px1, align 4
+ %py1 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 1
+ %y1 = load float, float* %py1, align 4
+ %m1 = fmul float %x1, %y1
+ %px2 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 2
+ %x2 = load float, float* %px2, align 4
+ %py2 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 2
+ %y2 = load float, float* %py2, align 4
+ %m2 = fmul float %x2, %y2
+ %px3 = getelementptr inbounds [4 x float], [4 x float]* %2, i64 0, i64 3
+ %x3 = load float, float* %px3, align 4
+ %py3 = getelementptr inbounds [4 x float], [4 x float]* %3, i64 0, i64 3
+ %y3 = load float, float* %py3, align 4
+ %m3 = fmul float %x3, %y3
+ %pz0 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 0
+ %z0 = load float, float* %pz0, align 4
+ %a0 = fadd float %m0, %z0
+ %i0 = insertvalue [4 x float] undef, float %a0, 0
+ %pz1 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 1
+ %z1 = load float, float* %pz1, align 4
+ %a1 = fadd float %m1, %z1
+ %i1 = insertvalue [4 x float] %i0, float %a1, 1
+ %pz2 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 2
+ %z2 = load float, float* %pz2, align 4
+ %a2 = fadd float %m2, %z2
+ %i2 = insertvalue [4 x float] %i1, float %a2, 2
+ %pz3 = getelementptr inbounds [4 x float], [4 x float]* %1, i64 0, i64 3
+ %z3 = load float, float* %pz3, align 4
+ %a3 = fadd float %m3, %z3
+ %i3 = insertvalue [4 x float] %i2, float %a3, 3
+ store [4 x float] %i3, [4 x float]* %0, align 4
+ ret void
+}
+
+; CHECK-LABEL: julia_load_array_of_float
+; CHECK: fsub <4 x float>
+define void @julia_load_array_of_float([4 x float]* %a, [4 x float]* %b, [4 x float]* %c) {
+top:
+ %a_arr = load [4 x float], [4 x float]* %a, align 4
+ %a0 = extractvalue [4 x float] %a_arr, 0
+ %a2 = extractvalue [4 x float] %a_arr, 2
+ %a1 = extractvalue [4 x float] %a_arr, 1
+ %b_arr = load [4 x float], [4 x float]* %b, align 4
+ %b0 = extractvalue [4 x float] %b_arr, 0
+ %b2 = extractvalue [4 x float] %b_arr, 2
+ %b1 = extractvalue [4 x float] %b_arr, 1
+ %a3 = extractvalue [4 x float] %a_arr, 3
+ %c1 = fsub float %a1, %b1
+ %b3 = extractvalue [4 x float] %b_arr, 3
+ %c0 = fsub float %a0, %b0
+ %c2 = fsub float %a2, %b2
+ %c_arr0 = insertvalue [4 x float] undef, float %c0, 0
+ %c_arr1 = insertvalue [4 x float] %c_arr0, float %c1, 1
+ %c3 = fsub float %a3, %b3
+ %c_arr2 = insertvalue [4 x float] %c_arr1, float %c2, 2
+ %c_arr3 = insertvalue [4 x float] %c_arr2, float %c3, 3
+ store [4 x float] %c_arr3, [4 x float]* %c, align 4
+ ret void
+}
+
+; CHECK-LABEL: julia_load_array_of_i32
+; CHECK: load <4 x i32>
+; CHECK: load <4 x i32>
+; CHECK: sub <4 x i32>
+define void @julia_load_array_of_i32([4 x i32]* %a, [4 x i32]* %b, [4 x i32]* %c) {
+top:
+ %a_arr = load [4 x i32], [4 x i32]* %a, align 4
+ %a0 = extractvalue [4 x i32] %a_arr, 0
+ %a2 = extractvalue [4 x i32] %a_arr, 2
+ %a1 = extractvalue [4 x i32] %a_arr, 1
+ %b_arr = load [4 x i32], [4 x i32]* %b, align 4
+ %b0 = extractvalue [4 x i32] %b_arr, 0
+ %b2 = extractvalue [4 x i32] %b_arr, 2
+ %b1 = extractvalue [4 x i32] %b_arr, 1
+ %a3 = extractvalue [4 x i32] %a_arr, 3
+ %c1 = sub i32 %a1, %b1
+ %b3 = extractvalue [4 x i32] %b_arr, 3
+ %c0 = sub i32 %a0, %b0
+ %c2 = sub i32 %a2, %b2
+ %c_arr0 = insertvalue [4 x i32] undef, i32 %c0, 0
+ %c_arr1 = insertvalue [4 x i32] %c_arr0, i32 %c1, 1
+ %c3 = sub i32 %a3, %b3
+ %c_arr2 = insertvalue [4 x i32] %c_arr1, i32 %c2, 2
+ %c_arr3 = insertvalue [4 x i32] %c_arr2, i32 %c3, 3
+ store [4 x i32] %c_arr3, [4 x i32]* %c, align 4
+ ret void
+}
+
+; Almost identical to previous test, but for type that should NOT be vectorized.
+;
+; CHECK-LABEL: julia_load_array_of_i16
+; CHECK-NOT: i2>
+define void @julia_load_array_of_i16([4 x i16]* %a, [4 x i16]* %b, [4 x i16]* %c) {
+top:
+ %a_arr = load [4 x i16], [4 x i16]* %a, align 4
+ %a0 = extractvalue [4 x i16] %a_arr, 0
+ %a2 = extractvalue [4 x i16] %a_arr, 2
+ %a1 = extractvalue [4 x i16] %a_arr, 1
+ %b_arr = load [4 x i16], [4 x i16]* %b, align 4
+ %b0 = extractvalue [4 x i16] %b_arr, 0
+ %b2 = extractvalue [4 x i16] %b_arr, 2
+ %b1 = extractvalue [4 x i16] %b_arr, 1
+ %a3 = extractvalue [4 x i16] %a_arr, 3
+ %c1 = sub i16 %a1, %b1
+ %b3 = extractvalue [4 x i16] %b_arr, 3
+ %c0 = sub i16 %a0, %b0
+ %c2 = sub i16 %a2, %b2
+ %c_arr0 = insertvalue [4 x i16] undef, i16 %c0, 0
+ %c_arr1 = insertvalue [4 x i16] %c_arr0, i16 %c1, 1
+ %c3 = sub i16 %a3, %b3
+ %c_arr2 = insertvalue [4 x i16] %c_arr1, i16 %c2, 2
+ %c_arr3 = insertvalue [4 x i16] %c_arr2, i16 %c3, 3
+ store [4 x i16] %c_arr3, [4 x i16]* %c, align 4
+ ret void
+}
+
+%pseudovec = type { float, float, float, float }
+
+; CHECK-LABEL: julia_load_struct_of_float
+; CHECK: load <4 x float>
+; CHECK: load <4 x float>
+; CHECK: fsub <4 x float>
+define void @julia_load_struct_of_float(%pseudovec* %a, %pseudovec* %b, %pseudovec* %c) {
+top:
+ %a_struct = load %pseudovec, %pseudovec* %a, align 4
+ %a0 = extractvalue %pseudovec %a_struct, 0
+ %a1 = extractvalue %pseudovec %a_struct, 1
+ %b_struct = load %pseudovec, %pseudovec* %b, align 4
+ %a2 = extractvalue %pseudovec %a_struct, 2
+ %b0 = extractvalue %pseudovec %b_struct, 0
+ %a3 = extractvalue %pseudovec %a_struct, 3
+ %c0 = fsub float %a0, %b0
+ %b1 = extractvalue %pseudovec %b_struct, 1
+ %b2 = extractvalue %pseudovec %b_struct, 2
+ %c1 = fsub float %a1, %b1
+ %c_struct0 = insertvalue %pseudovec undef, float %c0, 0
+ %b3 = extractvalue %pseudovec %b_struct, 3
+ %c3 = fsub float %a3, %b3
+ %c_struct1 = insertvalue %pseudovec %c_struct0, float %c1, 1
+ %c2 = fsub float %a2, %b2
+ %c_struct2 = insertvalue %pseudovec %c_struct1, float %c2, 2
+ %c_struct3 = insertvalue %pseudovec %c_struct2, float %c3, 3
+ store %pseudovec %c_struct3, %pseudovec* %c, align 4
+ ret void
+}
diff --git a/test/Transforms/SLPVectorizer/X86/ordering.ll b/test/Transforms/SLPVectorizer/X86/ordering.ll
index dfe95ac824e3..11f5a3ddc741 100644
--- a/test/Transforms/SLPVectorizer/X86/ordering.ll
+++ b/test/Transforms/SLPVectorizer/X86/ordering.ll
@@ -48,7 +48,7 @@ cond.false57:
; CHECK-LABEL: invoketest
; CHECK-LABEL: cond.end60
-; CHECK-NEXT-NOT: phi <2 x double>
+; CHECK-NOT: phi <2 x double>
; CHECK: insertelement
; CHECK-LABEL: if.then63
diff --git a/test/Transforms/SLPVectorizer/X86/pr27163.ll b/test/Transforms/SLPVectorizer/X86/pr27163.ll
new file mode 100644
index 000000000000..2b8480ef82c6
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/pr27163.ll
@@ -0,0 +1,50 @@
+; RUN: opt -slp-vectorizer -S < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+%struct.B = type { i64, i64 }
+
+define void @test1(%struct.B* %p) personality i32 (...)* @__CxxFrameHandler3 {
+invoke.cont:
+ %gep1 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 0
+ %gep2 = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 1
+ %load1 = load i64, i64* %gep1, align 8
+ %load2 = load i64, i64* %gep2, align 8
+ store i64 %load1, i64* %gep1, align 8
+ store i64 %load2, i64* %gep2, align 8
+ invoke void @throw()
+ to label %unreachable unwind label %catch.dispatch
+
+catch.dispatch: ; preds = %invoke.cont
+ %cs = catchswitch within none [label %invoke.cont1] unwind label %ehcleanup
+
+invoke.cont1: ; preds = %catch.dispatch
+ %catch = catchpad within %cs [i8* null, i32 64, i8* null]
+ invoke void @throw() [ "funclet"(token %catch) ]
+ to label %unreachable unwind label %ehcleanup
+
+ehcleanup: ; preds = %invoke.cont1, %catch.dispatch
+ %phi = phi i64 [ %load1, %catch.dispatch ], [ 9, %invoke.cont1 ]
+ %cleanup = cleanuppad within none []
+ call void @release(i64 %phi) [ "funclet"(token %cleanup) ]
+ cleanupret from %cleanup unwind to caller
+
+unreachable: ; preds = %invoke.cont1, %invoke.cont
+ unreachable
+}
+
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[gep:.*]] = getelementptr inbounds %struct.B, %struct.B* %p, i64 0, i32 0
+; CHECK: %[[bc:.*]] = bitcast i64* %[[gep]] to <2 x i64>*
+; CHECK: %[[ld:.*]] = load <2 x i64>, <2 x i64>* %[[bc]], align 8
+; CHECK: %[[ee:.*]] = extractelement <2 x i64> %[[ld]], i32 0
+
+; CHECK: %[[phi:.*]] = phi i64 [ %[[ee]], {{.*}} ], [ 9, {{.*}} ]
+; CHECK: call void @release(i64 %[[phi]])
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @throw()
+
+declare void @release(i64)
diff --git a/test/Transforms/SLPVectorizer/X86/sqrt.ll b/test/Transforms/SLPVectorizer/X86/sqrt.ll
new file mode 100644
index 000000000000..00339d0de24f
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/sqrt.ll
@@ -0,0 +1,274 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -mtriple=x86_64-unknown -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=SSE
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=corei7-avx -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=bdver1 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=core-avx2 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX256
+; RUN: opt < %s -mtriple=x86_64-unknown -mcpu=skylake-avx512 -basicaa -slp-vectorizer -S | FileCheck %s --check-prefix=CHECK --check-prefix=AVX --check-prefix=AVX512
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+@src64 = common global [8 x double] zeroinitializer, align 64
+@src32 = common global [16 x float] zeroinitializer, align 64
+@dst64 = common global [8 x double] zeroinitializer, align 64
+@dst32 = common global [16 x float] zeroinitializer, align 64
+
+declare float @llvm.sqrt.f32(float)
+declare double @llvm.sqrt.f64(double)
+
+;
+; SQRT
+;
+
+define void @fma_2f64() #0 {
+; CHECK-LABEL: @fma_2f64(
+; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; CHECK-NEXT: [[TMP2:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]])
+; CHECK-NEXT: store <2 x double> [[TMP2]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; CHECK-NEXT: ret void
+;
+ %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %sqrt0 = call double @llvm.sqrt.f64(double %a0)
+ %sqrt1 = call double @llvm.sqrt.f64(double %a1)
+ store double %sqrt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %sqrt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ ret void
+}
+
+define void @fma_4f64() #0 {
+; SSE-LABEL: @fma_4f64(
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 8
+; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]])
+; SSE-NEXT: [[TMP4:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
+; SSE-NEXT: store <2 x double> [[TMP3]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 8
+; SSE-NEXT: store <2 x double> [[TMP4]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 8
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @fma_4f64(
+; AVX-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 8
+; AVX-NEXT: [[TMP2:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP1]])
+; AVX-NEXT: store <4 x double> [[TMP2]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 8
+; AVX-NEXT: ret void
+;
+ %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 8
+ %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 8
+ %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 8
+ %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 8
+ %sqrt0 = call double @llvm.sqrt.f64(double %a0)
+ %sqrt1 = call double @llvm.sqrt.f64(double %a1)
+ %sqrt2 = call double @llvm.sqrt.f64(double %a2)
+ %sqrt3 = call double @llvm.sqrt.f64(double %a3)
+ store double %sqrt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 8
+ store double %sqrt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 8
+ store double %sqrt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 8
+ store double %sqrt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 8
+ ret void
+}
+
+define void @fma_8f64() #0 {
+; SSE-LABEL: @fma_8f64(
+; SSE-NEXT: [[TMP1:%.*]] = load <2 x double>, <2 x double>* bitcast ([8 x double]* @src64 to <2 x double>*), align 4
+; SSE-NEXT: [[TMP2:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT: [[TMP3:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT: [[TMP4:%.*]] = load <2 x double>, <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT: [[TMP5:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP1]])
+; SSE-NEXT: [[TMP6:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP2]])
+; SSE-NEXT: [[TMP7:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP3]])
+; SSE-NEXT: [[TMP8:%.*]] = call <2 x double> @llvm.sqrt.v2f64(<2 x double> [[TMP4]])
+; SSE-NEXT: store <2 x double> [[TMP5]], <2 x double>* bitcast ([8 x double]* @dst64 to <2 x double>*), align 4
+; SSE-NEXT: store <2 x double> [[TMP6]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2) to <2 x double>*), align 4
+; SSE-NEXT: store <2 x double> [[TMP7]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <2 x double>*), align 4
+; SSE-NEXT: store <2 x double> [[TMP8]], <2 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6) to <2 x double>*), align 4
+; SSE-NEXT: ret void
+;
+; AVX256-LABEL: @fma_8f64(
+; AVX256-NEXT: [[TMP1:%.*]] = load <4 x double>, <4 x double>* bitcast ([8 x double]* @src64 to <4 x double>*), align 4
+; AVX256-NEXT: [[TMP2:%.*]] = load <4 x double>, <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT: [[TMP3:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP1]])
+; AVX256-NEXT: [[TMP4:%.*]] = call <4 x double> @llvm.sqrt.v4f64(<4 x double> [[TMP2]])
+; AVX256-NEXT: store <4 x double> [[TMP3]], <4 x double>* bitcast ([8 x double]* @dst64 to <4 x double>*), align 4
+; AVX256-NEXT: store <4 x double> [[TMP4]], <4 x double>* bitcast (double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4) to <4 x double>*), align 4
+; AVX256-NEXT: ret void
+;
+; AVX512-LABEL: @fma_8f64(
+; AVX512-NEXT: [[TMP1:%.*]] = load <8 x double>, <8 x double>* bitcast ([8 x double]* @src64 to <8 x double>*), align 4
+; AVX512-NEXT: [[TMP2:%.*]] = call <8 x double> @llvm.sqrt.v8f64(<8 x double> [[TMP1]])
+; AVX512-NEXT: store <8 x double> [[TMP2]], <8 x double>* bitcast ([8 x double]* @dst64 to <8 x double>*), align 4
+; AVX512-NEXT: ret void
+;
+ %a0 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 0), align 4
+ %a1 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 1), align 4
+ %a2 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 2), align 4
+ %a3 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 3), align 4
+ %a4 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 4), align 4
+ %a5 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 5), align 4
+ %a6 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 6), align 4
+ %a7 = load double, double* getelementptr inbounds ([8 x double], [8 x double]* @src64, i32 0, i64 7), align 4
+ %sqrt0 = call double @llvm.sqrt.f64(double %a0)
+ %sqrt1 = call double @llvm.sqrt.f64(double %a1)
+ %sqrt2 = call double @llvm.sqrt.f64(double %a2)
+ %sqrt3 = call double @llvm.sqrt.f64(double %a3)
+ %sqrt4 = call double @llvm.sqrt.f64(double %a4)
+ %sqrt5 = call double @llvm.sqrt.f64(double %a5)
+ %sqrt6 = call double @llvm.sqrt.f64(double %a6)
+ %sqrt7 = call double @llvm.sqrt.f64(double %a7)
+ store double %sqrt0, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 0), align 4
+ store double %sqrt1, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 1), align 4
+ store double %sqrt2, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 2), align 4
+ store double %sqrt3, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 3), align 4
+ store double %sqrt4, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 4), align 4
+ store double %sqrt5, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 5), align 4
+ store double %sqrt6, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 6), align 4
+ store double %sqrt7, double* getelementptr inbounds ([8 x double], [8 x double]* @dst64, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @fma_4f32() #0 {
+; CHECK-LABEL: @fma_4f32(
+; CHECK-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; CHECK-NEXT: [[TMP2:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]])
+; CHECK-NEXT: store <4 x float> [[TMP2]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; CHECK-NEXT: ret void
+;
+ %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %sqrt0 = call float @llvm.sqrt.f32(float %a0)
+ %sqrt1 = call float @llvm.sqrt.f32(float %a1)
+ %sqrt2 = call float @llvm.sqrt.f32(float %a2)
+ %sqrt3 = call float @llvm.sqrt.f32(float %a3)
+ store float %sqrt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %sqrt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %sqrt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %sqrt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ ret void
+}
+
+define void @fma_8f32() #0 {
+; SSE-LABEL: @fma_8f32(
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT: [[TMP3:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]])
+; SSE-NEXT: [[TMP4:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP2]])
+; SSE-NEXT: store <4 x float> [[TMP3]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE-NEXT: store <4 x float> [[TMP4]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT: ret void
+;
+; AVX-LABEL: @fma_8f32(
+; AVX-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX-NEXT: [[TMP2:%.*]] = call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[TMP1]])
+; AVX-NEXT: store <8 x float> [[TMP2]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX-NEXT: ret void
+;
+ %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+ %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+ %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+ %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+ %sqrt0 = call float @llvm.sqrt.f32(float %a0)
+ %sqrt1 = call float @llvm.sqrt.f32(float %a1)
+ %sqrt2 = call float @llvm.sqrt.f32(float %a2)
+ %sqrt3 = call float @llvm.sqrt.f32(float %a3)
+ %sqrt4 = call float @llvm.sqrt.f32(float %a4)
+ %sqrt5 = call float @llvm.sqrt.f32(float %a5)
+ %sqrt6 = call float @llvm.sqrt.f32(float %a6)
+ %sqrt7 = call float @llvm.sqrt.f32(float %a7)
+ store float %sqrt0, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %sqrt1, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %sqrt2, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %sqrt3, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %sqrt4, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %sqrt5, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %sqrt6, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %sqrt7, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ ret void
+}
+
+define void @fma_16f32() #0 {
+; SSE-LABEL: @fma_16f32(
+; SSE-NEXT: [[TMP1:%.*]] = load <4 x float>, <4 x float>* bitcast ([16 x float]* @src32 to <4 x float>*), align 4
+; SSE-NEXT: [[TMP2:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT: [[TMP3:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT: [[TMP4:%.*]] = load <4 x float>, <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT: [[TMP5:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP1]])
+; SSE-NEXT: [[TMP6:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP2]])
+; SSE-NEXT: [[TMP7:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP3]])
+; SSE-NEXT: [[TMP8:%.*]] = call <4 x float> @llvm.sqrt.v4f32(<4 x float> [[TMP4]])
+; SSE-NEXT: store <4 x float> [[TMP5]], <4 x float>* bitcast ([16 x float]* @dst32 to <4 x float>*), align 4
+; SSE-NEXT: store <4 x float> [[TMP6]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4) to <4 x float>*), align 4
+; SSE-NEXT: store <4 x float> [[TMP7]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <4 x float>*), align 4
+; SSE-NEXT: store <4 x float> [[TMP8]], <4 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12) to <4 x float>*), align 4
+; SSE-NEXT: ret void
+;
+; AVX256-LABEL: @fma_16f32(
+; AVX256-NEXT: [[TMP1:%.*]] = load <8 x float>, <8 x float>* bitcast ([16 x float]* @src32 to <8 x float>*), align 4
+; AVX256-NEXT: [[TMP2:%.*]] = load <8 x float>, <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT: [[TMP3:%.*]] = call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[TMP1]])
+; AVX256-NEXT: [[TMP4:%.*]] = call <8 x float> @llvm.sqrt.v8f32(<8 x float> [[TMP2]])
+; AVX256-NEXT: store <8 x float> [[TMP3]], <8 x float>* bitcast ([16 x float]* @dst32 to <8 x float>*), align 4
+; AVX256-NEXT: store <8 x float> [[TMP4]], <8 x float>* bitcast (float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8) to <8 x float>*), align 4
+; AVX256-NEXT: ret void
+;
+; AVX512-LABEL: @fma_16f32(
+; AVX512-NEXT: [[TMP1:%.*]] = load <16 x float>, <16 x float>* bitcast ([16 x float]* @src32 to <16 x float>*), align 4
+; AVX512-NEXT: [[TMP2:%.*]] = call <16 x float> @llvm.sqrt.v16f32(<16 x float> [[TMP1]])
+; AVX512-NEXT: store <16 x float> [[TMP2]], <16 x float>* bitcast ([16 x float]* @dst32 to <16 x float>*), align 4
+; AVX512-NEXT: ret void
+;
+ %a0 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 0), align 4
+ %a1 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 1), align 4
+ %a2 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 2), align 4
+ %a3 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 3), align 4
+ %a4 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 4), align 4
+ %a5 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 5), align 4
+ %a6 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 6), align 4
+ %a7 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 7), align 4
+ %a8 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 8), align 4
+ %a9 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 9), align 4
+ %a10 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 10), align 4
+ %a11 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 11), align 4
+ %a12 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 12), align 4
+ %a13 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 13), align 4
+ %a14 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 14), align 4
+ %a15 = load float, float* getelementptr inbounds ([16 x float], [16 x float]* @src32, i32 0, i64 15), align 4
+ %sqrt0 = call float @llvm.sqrt.f32(float %a0 )
+ %sqrt1 = call float @llvm.sqrt.f32(float %a1 )
+ %sqrt2 = call float @llvm.sqrt.f32(float %a2 )
+ %sqrt3 = call float @llvm.sqrt.f32(float %a3 )
+ %sqrt4 = call float @llvm.sqrt.f32(float %a4 )
+ %sqrt5 = call float @llvm.sqrt.f32(float %a5 )
+ %sqrt6 = call float @llvm.sqrt.f32(float %a6 )
+ %sqrt7 = call float @llvm.sqrt.f32(float %a7 )
+ %sqrt8 = call float @llvm.sqrt.f32(float %a8 )
+ %sqrt9 = call float @llvm.sqrt.f32(float %a9 )
+ %sqrt10 = call float @llvm.sqrt.f32(float %a10)
+ %sqrt11 = call float @llvm.sqrt.f32(float %a11)
+ %sqrt12 = call float @llvm.sqrt.f32(float %a12)
+ %sqrt13 = call float @llvm.sqrt.f32(float %a13)
+ %sqrt14 = call float @llvm.sqrt.f32(float %a14)
+ %sqrt15 = call float @llvm.sqrt.f32(float %a15)
+ store float %sqrt0 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 0), align 4
+ store float %sqrt1 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 1), align 4
+ store float %sqrt2 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 2), align 4
+ store float %sqrt3 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 3), align 4
+ store float %sqrt4 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 4), align 4
+ store float %sqrt5 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 5), align 4
+ store float %sqrt6 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 6), align 4
+ store float %sqrt7 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 7), align 4
+ store float %sqrt8 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 8), align 4
+ store float %sqrt9 , float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 9), align 4
+ store float %sqrt10, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 10), align 4
+ store float %sqrt11, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 11), align 4
+ store float %sqrt12, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 12), align 4
+ store float %sqrt13, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 13), align 4
+ store float %sqrt14, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 14), align 4
+ store float %sqrt15, float* getelementptr inbounds ([16 x float], [16 x float]* @dst32, i32 0, i64 15), align 4
+ ret void
+}
+
+attributes #0 = { nounwind }
diff --git a/test/Transforms/SLPVectorizer/X86/vector_gep.ll b/test/Transforms/SLPVectorizer/X86/vector_gep.ll
new file mode 100644
index 000000000000..595a77f0cfbf
--- /dev/null
+++ b/test/Transforms/SLPVectorizer/X86/vector_gep.ll
@@ -0,0 +1,24 @@
+;RUN: opt < %s -slp-vectorizer -S | FileCheck %s
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; This test checks that SLP vectorizer does not fail on vector GEP.
+; The GEP has scalar and vector parameters and returns vector of pointers.
+
+; Function Attrs: noreturn readonly uwtable
+define void @_Z3fn1v(i32 %x, <16 x i32*>%y) local_unnamed_addr #0 {
+; CHECK-LABEL: _Z3fn1v
+; CHECK: getelementptr i32, <16 x i32*>
+; CHECK: getelementptr i32, <16 x i32*>
+
+entry:
+ %conv42.le = sext i32 %x to i64
+ %conv36109.le = zext i32 2 to i64
+ %VectorGep = getelementptr i32, <16 x i32*> %y, i64 %conv36109.le
+ %VectorGep208 = getelementptr i32, <16 x i32*> %y, i64 %conv42.le
+ unreachable
+}
+
+attributes #0 = { noreturn readonly uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="knl" "target-features"="+adx,+aes,+avx,+avx2,+avx512cd,+avx512er,+avx512f,+avx512pf,+bmi,+bmi2,+cx16,+f16c,+fma,+fsgsbase,+fxsr,+lzcnt,+mmx,+movbe,+pclmul,+popcnt,+prefetchwt1,+rdrnd,+rdseed,+rtm,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave,+xsaveopt" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
diff --git a/test/Transforms/SROA/address-spaces.ll b/test/Transforms/SROA/address-spaces.ll
index 004695dceddc..119f2252d95e 100644
--- a/test/Transforms/SROA/address-spaces.ll
+++ b/test/Transforms/SROA/address-spaces.ll
@@ -66,3 +66,20 @@ for.end:
ret void
}
+%union.anon = type { i32* }
+
+@g = common global i32 0, align 4
+@l = common addrspace(3) global i32 0, align 4
+
+; Make sure an illegal bitcast isn't introduced
+define void @pr27557() {
+; CHECK-LABEL: @pr27557(
+; CHECK: %[[CAST:.*]] = bitcast i32** {{.*}} to i32 addrspace(3)**
+; CHECK: store i32 addrspace(3)* @l, i32 addrspace(3)** %[[CAST]]
+ %1 = alloca %union.anon, align 8
+ %2 = bitcast %union.anon* %1 to i32**
+ store i32* @g, i32** %2, align 8
+ %3 = bitcast %union.anon* %1 to i32 addrspace(3)**
+ store i32 addrspace(3)* @l, i32 addrspace(3)** %3, align 8
+ ret void
+}
diff --git a/test/Transforms/SROA/basictest.ll b/test/Transforms/SROA/basictest.ll
index 7b5daa9d7823..968669ad4f62 100644
--- a/test/Transforms/SROA/basictest.ll
+++ b/test/Transforms/SROA/basictest.ll
@@ -1633,3 +1633,39 @@ entry:
%load = load i16, i16* %bc2
ret i16 %load
}
+
+%struct.STest = type { %struct.SPos, %struct.SPos }
+%struct.SPos = type { float, float }
+
+define void @PR25873(%struct.STest* %outData) {
+; CHECK-LABEL: @PR25873(
+; CHECK: store i32 1123418112
+; CHECK: store i32 1139015680
+; CHECK: %[[HIZEXT:.*]] = zext i32 1139015680 to i64
+; CHECK: %[[HISHL:.*]] = shl i64 %[[HIZEXT]], 32
+; CHECK: %[[HIMASK:.*]] = and i64 undef, 4294967295
+; CHECK: %[[HIINSERT:.*]] = or i64 %[[HIMASK]], %[[HISHL]]
+; CHECK: %[[LOZEXT:.*]] = zext i32 1123418112 to i64
+; CHECK: %[[LOMASK:.*]] = and i64 %[[HIINSERT]], -4294967296
+; CHECK: %[[LOINSERT:.*]] = or i64 %[[LOMASK]], %[[LOZEXT]]
+; CHECK: store i64 %[[LOINSERT]]
+entry:
+ %tmpData = alloca %struct.STest, align 8
+ %0 = bitcast %struct.STest* %tmpData to i8*
+ call void @llvm.lifetime.start(i64 16, i8* %0)
+ %x = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 0
+ store float 1.230000e+02, float* %x, align 8
+ %y = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 0, i32 1
+ store float 4.560000e+02, float* %y, align 4
+ %m_posB = getelementptr inbounds %struct.STest, %struct.STest* %tmpData, i64 0, i32 1
+ %1 = bitcast %struct.STest* %tmpData to i64*
+ %2 = bitcast %struct.SPos* %m_posB to i64*
+ %3 = load i64, i64* %1, align 8
+ store i64 %3, i64* %2, align 8
+ %4 = bitcast %struct.STest* %outData to i8*
+ call void @llvm.memcpy.p0i8.p0i8.i64(i8* %4, i8* %0, i64 16, i32 4, i1 false)
+ call void @llvm.lifetime.end(i64 16, i8* %0)
+ ret void
+}
+
+declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/SROA/dbg-single-piece.ll b/test/Transforms/SROA/dbg-single-piece.ll
new file mode 100644
index 000000000000..319b7c14e6e0
--- /dev/null
+++ b/test/Transforms/SROA/dbg-single-piece.ll
@@ -0,0 +1,38 @@
+; RUN: opt -sroa %s -S | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+
+%foo = type { [8 x i8], [8 x i8] }
+
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #0
+define void @_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE() {
+entry:
+ %retval = alloca %foo, align 8
+ call void @llvm.dbg.declare(metadata %foo* %retval, metadata !1, metadata !7), !dbg !8
+; Checks that SROA still inserts a bit_piece expression, even if it produces only one piece
+; (as long as that piece is smaller than the whole thing)
+; CHECK-NOT: call void @llvm.dbg.value
+; CHECK: call void @llvm.dbg.value(metadata %foo* undef, i64 0, {{.*}}, metadata ![[BIT_PIECE:[0-9]+]]), !dbg
+; CHECK-NOT: call void @llvm.dbg.value
+; CHECK: ![[BIT_PIECE]] = !DIExpression(DW_OP_bit_piece, 64, 64)
+ %0 = bitcast %foo* %retval to i8*
+ %1 = getelementptr inbounds i8, i8* %0, i64 8
+ %2 = bitcast i8* %1 to %foo**
+ store %foo* undef, %foo** %2, align 8
+ ret void
+}
+
+attributes #0 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!9}
+!llvm.module.flags = !{!0}
+
+!0 = !{i32 2, !"Debug Info Version", i32 3}
+!1 = !DILocalVariable(name: "I", scope: !2, file: !3, line: 947, type: !4)
+!2 = distinct !DISubprogram(name: "findInsertLocation", linkageName: "_ZL18findInsertLocationPN4llvm17MachineBasicBlockENS_9SlotIndexERNS_13LiveIntervalsE", scope: !3, file: !3, line: 937, isLocal: true, isDefinition: true, scopeLine: 938, flags: DIFlagPrototyped, isOptimized: true, unit: !9)
+!3 = !DIFile(filename: "none", directory: ".")
+!4 = !DICompositeType(tag: DW_TAG_class_type, name: "bundle_iterator<llvm::MachineInstr, llvm::ilist_iterator<llvm::MachineInstr> >", scope: !5, file: !3, line: 163, size: 128, align: 64, elements: !6, templateParams: !6, identifier: "_ZTSN4llvm17MachineBasicBlock15bundle_iteratorINS_12MachineInstrENS_14ilist_iteratorIS2_EEEE")
+!5 = distinct !DICompositeType(tag: DW_TAG_class_type, name: "MachineBasicBlock", file: !3, line: 68, size: 1408, align: 64, identifier: "_ZTSN4llvm17MachineBasicBlockE")
+!6 = !{}
+!7 = !DIExpression()
+!8 = !DILocation(line: 947, column: 35, scope: !2)
+!9 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3)
diff --git a/test/Transforms/SROA/pr26972.ll b/test/Transforms/SROA/pr26972.ll
new file mode 100644
index 000000000000..a71058c05b98
--- /dev/null
+++ b/test/Transforms/SROA/pr26972.ll
@@ -0,0 +1,17 @@
+; RUN: opt < %s -sroa -S | FileCheck %s
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-linux"
+
+; Make sure we properly handle allocas where the allocated
+; size overflows a uint32_t. This specific constant results in
+; the size in bits being 32 after truncation to a 32-bit int.
+; CHECK-LABEL: fn1
+; CHECK-NEXT: ret void
+define void @fn1() {
+ %a = alloca [1073741825 x i32], align 16
+ %t0 = bitcast [1073741825 x i32]* %a to i8*
+ call void @llvm.lifetime.end(i64 4294967300, i8* %t0)
+ ret void
+}
+
+declare void @llvm.lifetime.end(i64, i8* nocapture)
diff --git a/test/Transforms/SafeStack/AArch64/abi.ll b/test/Transforms/SafeStack/AArch64/abi.ll
index cdec923eb74c..bd6710d160c5 100644
--- a/test/Transforms/SafeStack/AArch64/abi.ll
+++ b/test/Transforms/SafeStack/AArch64/abi.ll
@@ -3,7 +3,7 @@
define void @foo() nounwind uwtable safestack {
entry:
-; CHECK: %[[TP:.*]] = call i8* @llvm.aarch64.thread.pointer()
+; CHECK: %[[TP:.*]] = call i8* @llvm.thread.pointer()
; CHECK: %[[SPA0:.*]] = getelementptr i8, i8* %[[TP]], i32 72
; CHECK: %[[SPA:.*]] = bitcast i8* %[[SPA0]] to i8**
; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
diff --git a/test/Transforms/SafeStack/AArch64/abi_ssp.ll b/test/Transforms/SafeStack/AArch64/abi_ssp.ll
new file mode 100644
index 000000000000..5d584d0a76b9
--- /dev/null
+++ b/test/Transforms/SafeStack/AArch64/abi_ssp.ll
@@ -0,0 +1,22 @@
+; RUN: opt -safe-stack -S -mtriple=aarch64-linux-android < %s -o - | FileCheck --check-prefix=TLS %s
+
+
+define void @foo() nounwind uwtable safestack sspreq {
+entry:
+; The first @llvm.thread.pointer is for the unsafe stack pointer, skip it.
+; TLS: call i8* @llvm.thread.pointer()
+
+; TLS: %[[TP2:.*]] = call i8* @llvm.thread.pointer()
+; TLS: %[[B:.*]] = getelementptr i8, i8* %[[TP2]], i32 40
+; TLS: %[[C:.*]] = bitcast i8* %[[B]] to i8**
+; TLS: %[[StackGuard:.*]] = load i8*, i8** %[[C]]
+; TLS: store i8* %[[StackGuard]], i8** %[[StackGuardSlot:.*]]
+ %a = alloca i128, align 16
+ call void @Capture(i128* %a)
+
+; TLS: %[[A:.*]] = load i8*, i8** %[[StackGuardSlot]]
+; TLS: icmp ne i8* %[[StackGuard]], %[[A]]
+ ret void
+}
+
+declare void @Capture(i128*)
diff --git a/test/Transforms/SafeStack/ARM/setjmp.ll b/test/Transforms/SafeStack/ARM/setjmp.ll
index 8c57908bbe4b..20e46f8f0e21 100644
--- a/test/Transforms/SafeStack/ARM/setjmp.ll
+++ b/test/Transforms/SafeStack/ARM/setjmp.ll
@@ -6,8 +6,8 @@
define void @f(i32 %b) safestack {
entry:
; CHECK: %[[SPA:.*]] = call i8** @__safestack_pointer_address()
-; CHECK: %[[USDP:.*]] = alloca i8*
; CHECK: %[[USP:.*]] = load i8*, i8** %[[SPA]]
+; CHECK: %[[USDP:.*]] = alloca i8*
; CHECK: store i8* %[[USP]], i8** %[[USDP]]
; CHECK: call i32 @setjmp
@@ -26,6 +26,8 @@ if.then:
br label %if.end
if.end:
+; CHECK: store i8* %[[USP:.*]], i8** %[[SPA:.*]]
+
ret void
}
diff --git a/test/Transforms/SafeStack/X86/abi_ssp.ll b/test/Transforms/SafeStack/X86/abi_ssp.ll
new file mode 100644
index 000000000000..ba4ced5b8820
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/abi_ssp.ll
@@ -0,0 +1,19 @@
+; RUN: opt -safe-stack -S -mtriple=i686-pc-linux-gnu < %s -o - | FileCheck --check-prefix=TLS --check-prefix=TLS32 %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck --check-prefix=TLS --check-prefix=TLS64 %s
+; RUN: opt -safe-stack -S -mtriple=i686-linux-android < %s -o - | FileCheck --check-prefix=TLS --check-prefix=TLS32 %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-linux-android < %s -o - | FileCheck --check-prefix=TLS --check-prefix=TLS64 %s
+
+define void @foo() safestack sspreq {
+entry:
+; TLS32: %[[StackGuard:.*]] = load i8*, i8* addrspace(256)* inttoptr (i32 20 to i8* addrspace(256)*)
+; TLS64: %[[StackGuard:.*]] = load i8*, i8* addrspace(257)* inttoptr (i32 40 to i8* addrspace(257)*)
+; TLS: store i8* %[[StackGuard]], i8** %[[StackGuardSlot:.*]]
+ %a = alloca i8, align 1
+ call void @Capture(i8* %a)
+
+; TLS: %[[A:.*]] = load i8*, i8** %[[StackGuardSlot]]
+; TLS: icmp ne i8* %[[StackGuard]], %[[A]]
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/X86/ssp.ll b/test/Transforms/SafeStack/X86/ssp.ll
new file mode 100644
index 000000000000..0e28878c5477
--- /dev/null
+++ b/test/Transforms/SafeStack/X86/ssp.ll
@@ -0,0 +1,30 @@
+; RUN: opt -safe-stack -S -mtriple=x86_64-unknown < %s -o - | FileCheck %s
+
+define void @foo() safestack sspreq {
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
+
+; CHECK: %[[A:.*]] = getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK: %[[StackGuardSlot:.*]] = bitcast i8* %[[A]] to i8**
+; CHECK: %[[StackGuard:.*]] = load i8*, i8** @__stack_chk_guard
+; CHECK: store i8* %[[StackGuard]], i8** %[[StackGuardSlot]]
+ %a = alloca i8, align 1
+
+; CHECK: call void @Capture
+ call void @Capture(i8* %a)
+
+; CHECK: %[[B:.*]] = load i8*, i8** %[[StackGuardSlot]]
+; CHECK: %[[COND:.*]] = icmp ne i8* %[[StackGuard]], %[[B]]
+; CHECK: br i1 %[[COND]], {{.*}} !prof
+
+; CHECK: call void @__stack_chk_fail()
+; CHECK-NEXT: unreachable
+
+; CHECK: store i8* %[[USP]], i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: ret void
+ ret void
+}
+
+declare void @Capture(i8*)
diff --git a/test/Transforms/SafeStack/array-aligned.ll b/test/Transforms/SafeStack/array-aligned.ll
index 4676903ec772..26558e4fa812 100644
--- a/test/Transforms/SafeStack/array-aligned.ll
+++ b/test/Transforms/SafeStack/array-aligned.ll
@@ -13,16 +13,15 @@ entry:
; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
- ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
%a.addr = alloca i8*, align 8
-
- ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
- ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to [16 x i8]*
%buf = alloca [16 x i8], align 16
+ ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
; CHECK: store i8* {{.*}}, i8** %[[AADDR]], align 8
store i8* %a, i8** %a.addr, align 8
+ ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+ ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to [16 x i8]*
; CHECK: %[[GEP:.*]] = getelementptr inbounds [16 x i8], [16 x i8]* %[[BUFPTR2]], i32 0, i32 0
%gep = getelementptr inbounds [16 x i8], [16 x i8]* %buf, i32 0, i32 0
diff --git a/test/Transforms/SafeStack/array.ll b/test/Transforms/SafeStack/array.ll
index 564213e6d58f..7dcf7fa50d94 100644
--- a/test/Transforms/SafeStack/array.ll
+++ b/test/Transforms/SafeStack/array.ll
@@ -17,16 +17,15 @@ entry:
; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
- ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
%a.addr = alloca i8*, align 8
-
- ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
- ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to [4 x i8]*
%buf = alloca [4 x i8], align 1
+ ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
; CHECK: store i8* {{.*}}, i8** %[[AADDR]], align 8
store i8* %a, i8** %a.addr, align 8
+ ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+ ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to [4 x i8]*
; CHECK: %[[GEP:.*]] = getelementptr inbounds [4 x i8], [4 x i8]* %[[BUFPTR2]], i32 0, i32 0
%gep = getelementptr inbounds [4 x i8], [4 x i8]* %buf, i32 0, i32 0
diff --git a/test/Transforms/SafeStack/coloring.ll b/test/Transforms/SafeStack/coloring.ll
new file mode 100644
index 000000000000..3ed9ccb43f39
--- /dev/null
+++ b/test/Transforms/SafeStack/coloring.ll
@@ -0,0 +1,44 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f() safestack {
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK: %[[USST:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+
+ %x = alloca i32, align 4
+ %x1 = alloca i32, align 4
+ %x2 = alloca i32, align 4
+ %0 = bitcast i32* %x to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %0)
+
+; CHECK: %[[A1:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: %[[A2:.*]] = bitcast i8* %[[A1]] to i32*
+; CHECK: call void @capture(i32* nonnull %[[A2]])
+
+ call void @capture(i32* nonnull %x)
+ call void @llvm.lifetime.end(i64 4, i8* %0)
+ %1 = bitcast i32* %x1 to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %1)
+
+; CHECK: %[[B1:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: %[[B2:.*]] = bitcast i8* %[[B1]] to i32*
+; CHECK: call void @capture(i32* nonnull %[[B2]])
+
+ call void @capture(i32* nonnull %x1)
+ call void @llvm.lifetime.end(i64 4, i8* %1)
+ %2 = bitcast i32* %x2 to i8*
+ call void @llvm.lifetime.start(i64 4, i8* %2)
+
+; CHECK: %[[C1:.*]] = getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: %[[C2:.*]] = bitcast i8* %[[C1]] to i32*
+; CHECK: call void @capture(i32* nonnull %[[C2]])
+
+ call void @capture(i32* nonnull %x2)
+ call void @llvm.lifetime.end(i64 4, i8* %2)
+ ret void
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @capture(i32*)
diff --git a/test/Transforms/SafeStack/coloring2.ll b/test/Transforms/SafeStack/coloring2.ll
new file mode 100644
index 000000000000..54ed812cfe28
--- /dev/null
+++ b/test/Transforms/SafeStack/coloring2.ll
@@ -0,0 +1,482 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+; x and y share the stack slot.
+define void @f() safestack {
+; CHECK-LABEL: define void @f
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -16
+
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %z = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+ %z0 = bitcast i32* %z to i8*
+
+ call void @llvm.lifetime.start(i64 -1, i8* %z0)
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+ call void @capture32(i32* %x)
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+ call void @capture32(i32* %y)
+ call void @llvm.lifetime.end(i64 -1, i8* %y0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+ call void @capture32(i32* %z)
+ call void @llvm.lifetime.end(i64 -1, i8* %z0)
+
+ ret void
+}
+
+define void @no_markers() safestack {
+; CHECK-LABEL: define void @no_markers(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -16
+
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+ call void @capture32(i32* %x)
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+ call void @capture32(i32* %y)
+
+ ret void
+}
+
+; x and y can't share memory, but they can split z's storage.
+define void @g() safestack {
+; CHECK-LABEL: define void @g
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -16
+
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %z = alloca i64, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+ %z0 = bitcast i64* %z to i8*
+
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+ call void @capture32(i32* %x)
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+ call void @capture32(i32* %y)
+ call void @llvm.lifetime.end(i64 -1, i8* %y0)
+ call void @llvm.lifetime.start(i64 -1, i8* %z0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+ call void @capture64(i64* %z)
+ call void @llvm.lifetime.end(i64 -1, i8* %z0)
+
+ ret void
+}
+
+; Both y and z fit in x's alignment gap.
+define void @h() safestack {
+; CHECK-LABEL: define void @h
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -16
+
+ %x = alloca i32, align 16
+ %z = alloca i64, align 4
+ %y = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+ %z0 = bitcast i64* %z to i8*
+
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+ call void @llvm.lifetime.start(i64 -1, i8* %z0)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -16
+ call void @capture32(i32* %x)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -12
+ call void @capture32(i32* %y)
+
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+ call void @capture64(i64* %z)
+
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ call void @llvm.lifetime.end(i64 -1, i8* %y0)
+ call void @llvm.lifetime.end(i64 -1, i8* %z0)
+
+ ret void
+}
+
+; void f(bool a, bool b) {
+; long x1, x2; capture64(&x1); capture64(&x2);
+; if (a) {
+; long y; capture64(&y);
+; if (b) {
+; long y1; capture64(&y1);
+; } else {
+; long y2; capture64(&y2);
+; }
+; } else {
+; long z; capture64(&z);
+; if (b) {
+; long z1; capture64(&z1);
+; } else {
+; long z2; capture64(&z2);
+; }
+; }
+; }
+; Everything fits in 4 x 64-bit slots.
+define void @i(i1 zeroext %a, i1 zeroext %b) safestack {
+; CHECK-LABEL: define void @i
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -32
+ %x1 = alloca i64, align 8
+ %x2 = alloca i64, align 8
+ %y = alloca i64, align 8
+ %y1 = alloca i64, align 8
+ %y2 = alloca i64, align 8
+ %z = alloca i64, align 8
+ %z1 = alloca i64, align 8
+ %z2 = alloca i64, align 8
+ %0 = bitcast i64* %x1 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %0)
+ %1 = bitcast i64* %x2 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %1)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %x1)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %x2)
+ br i1 %a, label %if.then, label %if.else4
+
+if.then: ; preds = %entry
+ %2 = bitcast i64* %y to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %2)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -24
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %y)
+ br i1 %b, label %if.then3, label %if.else
+
+if.then3: ; preds = %if.then
+ %3 = bitcast i64* %y1 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %3)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %y1)
+ call void @llvm.lifetime.end(i64 -1, i8* %3)
+ br label %if.end
+
+if.else: ; preds = %if.then
+ %4 = bitcast i64* %y2 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %4)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %y2)
+ call void @llvm.lifetime.end(i64 -1, i8* %4)
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then3
+ call void @llvm.lifetime.end(i64 -1, i8* %2)
+ br label %if.end9
+
+if.else4: ; preds = %entry
+ %5 = bitcast i64* %z to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %5)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -24
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %z)
+ br i1 %b, label %if.then6, label %if.else7
+
+if.then6: ; preds = %if.else4
+ %6 = bitcast i64* %z1 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %6)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %z1)
+ call void @llvm.lifetime.end(i64 -1, i8* %6)
+ br label %if.end8
+
+if.else7: ; preds = %if.else4
+ %7 = bitcast i64* %z2 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %7)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -32
+; CHECK: call void @capture64(
+ call void @capture64(i64* nonnull %z2)
+ call void @llvm.lifetime.end(i64 -1, i8* %7)
+ br label %if.end8
+
+if.end8: ; preds = %if.else7, %if.then6
+ call void @llvm.lifetime.end(i64 -1, i8* %5)
+ br label %if.end9
+
+if.end9: ; preds = %if.end8, %if.end
+ call void @llvm.lifetime.end(i64 -1, i8* %1)
+ call void @llvm.lifetime.end(i64 -1, i8* %0)
+ ret void
+}
+
+; lifetime for x ends in 2 different BBs
+define void @no_merge1(i1 %d) safestack {
+; CHECK-LABEL: define void @no_merge1(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -16
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %x)
+ br i1 %d, label %bb2, label %bb3
+bb2:
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK: call void @capture32(
+ call void @capture32(i32* %y)
+ call void @llvm.lifetime.end(i64 -1, i8* %y0)
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ ret void
+bb3:
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ ret void
+}
+
+define void @merge1(i1 %d) safestack {
+; CHECK-LABEL: define void @merge1(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -16
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %x)
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ br i1 %d, label %bb2, label %bb3
+bb2:
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %y)
+ call void @llvm.lifetime.end(i64 -1, i8* %y0)
+ ret void
+bb3:
+ ret void
+}
+
+; Missing lifetime.end
+define void @merge2_noend(i1 %d) safestack {
+; CHECK-LABEL: define void @merge2_noend(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -16
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %x)
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ br i1 %d, label %bb2, label %bb3
+bb2:
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %y)
+ ret void
+bb3:
+ ret void
+}
+
+; Missing lifetime.end
+define void @merge3_noend(i1 %d) safestack {
+; CHECK-LABEL: define void @merge3_noend(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -16
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %x0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %x)
+ br i1 %d, label %bb2, label %bb3
+bb2:
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %y)
+ ret void
+bb3:
+ ret void
+}
+
+; Missing lifetime.start
+define void @nomerge4_nostart(i1 %d) safestack {
+; CHECK-LABEL: define void @nomerge4_nostart(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -16
+ %x = alloca i32, align 4
+ %y = alloca i32, align 4
+ %x0 = bitcast i32* %x to i8*
+ %y0 = bitcast i32* %y to i8*
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -4
+; CHECK: call void @capture32(
+ call void @capture32(i32* %x)
+ call void @llvm.lifetime.end(i64 -1, i8* %x0)
+ br i1 %d, label %bb2, label %bb3
+bb2:
+ call void @llvm.lifetime.start(i64 -1, i8* %y0)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -8
+; CHECK: call void @capture32(
+ call void @capture32(i32* %y)
+ ret void
+bb3:
+ ret void
+}
+
+define void @array_merge() safestack {
+; CHECK-LABEL: define void @array_merge(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -800
+ %A.i1 = alloca [100 x i32], align 4
+ %B.i2 = alloca [100 x i32], align 4
+ %A.i = alloca [100 x i32], align 4
+ %B.i = alloca [100 x i32], align 4
+ %0 = bitcast [100 x i32]* %A.i to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %0)
+ %1 = bitcast [100 x i32]* %B.i to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %1)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -400
+; CHECK: call void @capture100x32(
+ call void @capture100x32([100 x i32]* %A.i)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -800
+; CHECK: call void @capture100x32(
+ call void @capture100x32([100 x i32]* %B.i)
+ call void @llvm.lifetime.end(i64 -1, i8* %0)
+ call void @llvm.lifetime.end(i64 -1, i8* %1)
+ %2 = bitcast [100 x i32]* %A.i1 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %2)
+ %3 = bitcast [100 x i32]* %B.i2 to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %3)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -400
+; CHECK: call void @capture100x32(
+ call void @capture100x32([100 x i32]* %A.i1)
+; CHECK: getelementptr i8, i8* %[[USP]], i32 -800
+; CHECK: call void @capture100x32(
+ call void @capture100x32([100 x i32]* %B.i2)
+ call void @llvm.lifetime.end(i64 -1, i8* %2)
+ call void @llvm.lifetime.end(i64 -1, i8* %3)
+ ret void
+}
+
+define void @myCall_pr15707() safestack {
+; CHECK-LABEL: define void @myCall_pr15707(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -200000
+ %buf1 = alloca i8, i32 100000, align 16
+ %buf2 = alloca i8, i32 100000, align 16
+
+ call void @llvm.lifetime.start(i64 -1, i8* %buf1)
+ call void @llvm.lifetime.end(i64 -1, i8* %buf1)
+
+ call void @llvm.lifetime.start(i64 -1, i8* %buf1)
+ call void @llvm.lifetime.start(i64 -1, i8* %buf2)
+ call void @capture8(i8* %buf1)
+ call void @capture8(i8* %buf2)
+ ret void
+}
+
+; Check that we don't assert and crash even when there are allocas
+; outside the declared lifetime regions.
+define void @bad_range() safestack {
+; CHECK-LABEL: define void @bad_range(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; A.i and B.i unsafe, not merged
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -800
+; A.i1 and B.i2 safe
+; CHECK: = alloca [100 x i32], align 4
+; CHECK: = alloca [100 x i32], align 4
+
+ %A.i1 = alloca [100 x i32], align 4
+ %B.i2 = alloca [100 x i32], align 4
+ %A.i = alloca [100 x i32], align 4
+ %B.i = alloca [100 x i32], align 4
+ %0 = bitcast [100 x i32]* %A.i to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %0) nounwind
+ %1 = bitcast [100 x i32]* %B.i to i8*
+ call void @llvm.lifetime.start(i64 -1, i8* %1) nounwind
+ call void @capture100x32([100 x i32]* %A.i)
+ call void @capture100x32([100 x i32]* %B.i)
+ call void @llvm.lifetime.end(i64 -1, i8* %0) nounwind
+ call void @llvm.lifetime.end(i64 -1, i8* %1) nounwind
+ br label %block2
+
+block2:
+ ; I am used outside the marked lifetime.
+ call void @capture100x32([100 x i32]* %A.i)
+ call void @capture100x32([100 x i32]* %B.i)
+ ret void
+}
+
+%struct.Klass = type { i32, i32 }
+
+define i32 @shady_range(i32 %argc, i8** nocapture %argv) safestack {
+; CHECK-LABEL: define i32 @shady_range(
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -64
+ %a.i = alloca [4 x %struct.Klass], align 16
+ %b.i = alloca [4 x %struct.Klass], align 16
+ %a8 = bitcast [4 x %struct.Klass]* %a.i to i8*
+ %b8 = bitcast [4 x %struct.Klass]* %b.i to i8*
+ ; I am used outside the lifetime zone below:
+ %z2 = getelementptr inbounds [4 x %struct.Klass], [4 x %struct.Klass]* %a.i, i64 0, i64 0, i32 0
+ call void @llvm.lifetime.start(i64 -1, i8* %a8)
+ call void @llvm.lifetime.start(i64 -1, i8* %b8)
+ call void @capture8(i8* %a8)
+ call void @capture8(i8* %b8)
+ %z3 = load i32, i32* %z2, align 16
+ call void @llvm.lifetime.end(i64 -1, i8* %a8)
+ call void @llvm.lifetime.end(i64 -1, i8* %b8)
+ ret i32 %z3
+}
+
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
+declare void @capture8(i8*)
+declare void @capture32(i32*)
+declare void @capture64(i64*)
+declare void @capture100x32([100 x i32]*)
diff --git a/test/Transforms/SafeStack/debug-loc-dynamic.ll b/test/Transforms/SafeStack/debug-loc-dynamic.ll
new file mode 100644
index 000000000000..280d010774e1
--- /dev/null
+++ b/test/Transforms/SafeStack/debug-loc-dynamic.ll
@@ -0,0 +1,57 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test llvm.dbg.value for dynamic allocas moved onto the unsafe stack.
+; In the dynamic alloca case, the dbg.value does not change with the exception
+; of the alloca pointer in the first argument being replaced with the new stack
+; top address.
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define void @f(i32 %n) safestack !dbg !6 {
+entry:
+ tail call void @llvm.dbg.value(metadata i32 %n, i64 0, metadata !11, metadata !14), !dbg !15
+ %0 = zext i32 %n to i64, !dbg !16
+
+; CHECK: store i8* %[[VLA:.*]], i8** @__safestack_unsafe_stack_ptr
+; CHECK: tail call void @llvm.dbg.value(metadata i8* %[[VLA]], i64 0, metadata ![[TYPE:.*]], metadata ![[EXPR:.*]])
+; CHECK: call void @capture({{.*}} %[[VLA]])
+
+ %vla = alloca i8, i64 %0, align 16, !dbg !16
+ tail call void @llvm.dbg.value(metadata i8* %vla, i64 0, metadata !12, metadata !17), !dbg !18
+ call void @capture(i8* nonnull %vla), !dbg !19
+ ret void, !dbg !20
+}
+
+declare void @capture(i8*)
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata)
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 272832) (llvm/trunk 272831)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "../llvm/1.cc", directory: "/code/build-llvm")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.9.0 (trunk 272832) (llvm/trunk 272831)"}
+!6 = distinct !DISubprogram(name: "f", linkageName: "_Z1fi", scope: !1, file: !1, line: 2, type: !7, isLocal: false, isDefinition: true, scopeLine: 2, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !10)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null, !9}
+!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!10 = !{!11, !12}
+!11 = !DILocalVariable(name: "n", arg: 1, scope: !6, file: !1, line: 2, type: !9)
+
+; CHECK-DAG: ![[TYPE]] = !DILocalVariable(name: "x",
+!12 = !DILocalVariable(name: "x", scope: !6, file: !1, line: 3, type: !13)
+!13 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
+!14 = !DIExpression()
+!15 = !DILocation(line: 2, column: 12, scope: !6)
+!16 = !DILocation(line: 3, column: 3, scope: !6)
+
+; CHECK-DAG: ![[EXPR]] = !DIExpression(DW_OP_deref)
+!17 = !DIExpression(DW_OP_deref)
+!18 = !DILocation(line: 3, column: 8, scope: !6)
+!19 = !DILocation(line: 4, column: 3, scope: !6)
+!20 = !DILocation(line: 5, column: 1, scope: !6)
diff --git a/test/Transforms/SafeStack/debug-loc.ll b/test/Transforms/SafeStack/debug-loc.ll
index e72d0e9d2ff2..fc0b6f911f7e 100644
--- a/test/Transforms/SafeStack/debug-loc.ll
+++ b/test/Transforms/SafeStack/debug-loc.ll
@@ -55,28 +55,27 @@ attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-
!llvm.module.flags = !{!15, !16}
!llvm.ident = !{!17}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !3)
!1 = !DIFile(filename: "../llvm/2.cc", directory: "/code/build-llvm")
!2 = !{}
!3 = !{!4}
!4 = !DICompositeType(tag: DW_TAG_structure_type, name: "S", file: !1, line: 4, size: 800, align: 8, elements: !5, identifier: "_ZTS1S")
!5 = !{!6}
-!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !"_ZTS1S", file: !1, line: 5, baseType: !7, size: 800, align: 8)
+!6 = !DIDerivedType(tag: DW_TAG_member, name: "a", scope: !4, file: !1, line: 5, baseType: !7, size: 800, align: 8)
!7 = !DICompositeType(tag: DW_TAG_array_type, baseType: !8, size: 800, align: 8, elements: !9)
!8 = !DIBasicType(name: "char", size: 8, align: 8, encoding: DW_ATE_signed_char)
!9 = !{!10}
!10 = !DISubrange(count: 100)
-!11 = !{!12}
-!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1S", scope: !1, file: !1, line: 10, type: !13, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!12 = distinct !DISubprogram(name: "f", linkageName: "_Z1f1S", scope: !1, file: !1, line: 10, type: !13, isLocal: false, isDefinition: true, scopeLine: 10, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!13 = !DISubroutineType(types: !14)
-!14 = !{null, !"_ZTS1S"}
+!14 = !{null, !4}
!15 = !{i32 2, !"Dwarf Version", i32 4}
!16 = !{i32 2, !"Debug Info Version", i32 3}
!17 = !{!"clang version 3.8.0 (trunk 254019) (llvm/trunk 254036)"}
-!18 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 10, type: !"_ZTS1S")
+!18 = !DILocalVariable(name: "zzz", arg: 1, scope: !12, file: !1, line: 10, type: !4)
!19 = !DIExpression()
!20 = !DILocation(line: 10, column: 10, scope: !12)
-!21 = !DILocalVariable(name: "xxx", scope: !12, file: !1, line: 11, type: !"_ZTS1S")
+!21 = !DILocalVariable(name: "xxx", scope: !12, file: !1, line: 11, type: !4)
!22 = !DILocation(line: 11, column: 5, scope: !12)
!23 = !DILocation(line: 12, column: 3, scope: !12)
!24 = !DILocation(line: 13, column: 3, scope: !12)
diff --git a/test/Transforms/SafeStack/debug-loc2.ll b/test/Transforms/SafeStack/debug-loc2.ll
new file mode 100644
index 000000000000..35e9b7711d2f
--- /dev/null
+++ b/test/Transforms/SafeStack/debug-loc2.ll
@@ -0,0 +1,98 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+; Test llvm.dbg.value for the local variables moved onto the unsafe stack.
+; SafeStack rewrites them relative to the unsafe stack pointer (base address of
+; the unsafe stack frame).
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: noinline safestack uwtable
+define void @f() #0 !dbg !6 {
+entry:
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+ %x1 = alloca i32, align 4
+ %x2 = alloca i32, align 4
+ %0 = bitcast i32* %x1 to i8*, !dbg !13
+ %1 = bitcast i32* %x2 to i8*, !dbg !14
+
+; Unhandled dbg.value: expression does not start with OP_DW_deref
+; CHECK: call void @llvm.dbg.value(metadata ![[EMPTY:.*]], i64 0, metadata !{{.*}}, metadata !{{.*}})
+ tail call void @llvm.dbg.value(metadata i32* %x1, i64 0, metadata !10, metadata !23), !dbg !16
+
+; Unhandled dbg.value: expression does not start with OP_DW_deref
+; CHECK: call void @llvm.dbg.value(metadata ![[EMPTY]], i64 0, metadata !{{.*}}, metadata !{{.*}})
+ tail call void @llvm.dbg.value(metadata i32* %x1, i64 0, metadata !10, metadata !24), !dbg !16
+
+; Supported dbg.value: rewritted based on the [[USP]] value.
+; CHECK: call void @llvm.dbg.value(metadata i8* %[[USP]], i64 0, metadata ![[X1:.*]], metadata ![[X1_EXPR:.*]])
+ tail call void @llvm.dbg.value(metadata i32* %x1, i64 0, metadata !10, metadata !15), !dbg !16
+ call void @capture(i32* nonnull %x1), !dbg !17
+
+; An extra non-dbg.value metadata use of %x2. Replaced with an empty metadata.
+; CHECK: call void @llvm.random.metadata.use(metadata ![[EMPTY]])
+ call void @llvm.random.metadata.use(metadata i32* %x2)
+
+; CHECK: call void @llvm.dbg.value(metadata i8* %[[USP]], i64 0, metadata ![[X2:.*]], metadata ![[X2_EXPR:.*]])
+ call void @llvm.dbg.value(metadata i32* %x2, i64 0, metadata !12, metadata !15), !dbg !18
+ call void @capture(i32* nonnull %x2), !dbg !19
+ ret void, !dbg !20
+}
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.start(i64, i8* nocapture) #1
+
+declare void @capture(i32*) #2
+
+; Function Attrs: argmemonly nounwind
+declare void @llvm.lifetime.end(i64, i8* nocapture) #1
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #3
+
+declare void @llvm.random.metadata.use(metadata)
+
+attributes #0 = { noinline safestack uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { argmemonly nounwind }
+attributes #2 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { nounwind readnone }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 271022) (llvm/trunk 271027)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
+!1 = !DIFile(filename: "../llvm/2.cc", directory: "/code/build-llvm")
+
+; CHECK-DAG: ![[EMPTY]] = !{}
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.9.0 (trunk 271022) (llvm/trunk 271027)"}
+!6 = distinct !DISubprogram(name: "f", linkageName: "_Z1fv", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !9)
+!7 = !DISubroutineType(types: !8)
+!8 = !{null}
+!9 = !{!10, !12}
+
+; CHECK-DAG: ![[X1]] = !DILocalVariable(name: "x1",
+!10 = !DILocalVariable(name: "x1", scope: !6, file: !1, line: 5, type: !11)
+!11 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+
+; CHECK-DAG: ![[X2]] = !DILocalVariable(name: "x2",
+!12 = !DILocalVariable(name: "x2", scope: !6, file: !1, line: 6, type: !11)
+!13 = !DILocation(line: 5, column: 3, scope: !6)
+!14 = !DILocation(line: 6, column: 3, scope: !6)
+
+; CHECK-DAG: ![[X1_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_minus, 4)
+; CHECK-DAG: ![[X2_EXPR]] = !DIExpression(DW_OP_deref, DW_OP_minus, 8)
+!15 = !DIExpression(DW_OP_deref)
+!16 = !DILocation(line: 5, column: 7, scope: !6)
+!17 = !DILocation(line: 8, column: 3, scope: !6)
+!18 = !DILocation(line: 6, column: 7, scope: !6)
+!19 = !DILocation(line: 9, column: 3, scope: !6)
+!20 = !DILocation(line: 10, column: 1, scope: !6)
+!21 = !DILocation(line: 10, column: 1, scope: !22)
+!22 = !DILexicalBlockFile(scope: !6, file: !1, discriminator: 1)
+!23 = !DIExpression()
+!24 = !DIExpression(DW_OP_minus, 42)
diff --git a/test/Transforms/SafeStack/dynamic-alloca.ll b/test/Transforms/SafeStack/dynamic-alloca.ll
index bfec66f82a2f..b0571f72f1aa 100644
--- a/test/Transforms/SafeStack/dynamic-alloca.ll
+++ b/test/Transforms/SafeStack/dynamic-alloca.ll
@@ -8,7 +8,7 @@
; Requires protector.
define void @foo(i32 %n) nounwind uwtable safestack {
entry:
- ; CHECK: __safestack_unsafe_stack_ptr
+ ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
%n.addr = alloca i32, align 4
%a = alloca i32*, align 8
store i32 %n, i32* %n.addr, align 4
@@ -17,5 +17,6 @@ entry:
%1 = alloca i8, i64 %conv
%2 = bitcast i8* %1 to i32*
store i32* %2, i32** %a, align 8
+ ; CHECK: store i8* %[[SP:.*]], i8** @__safestack_unsafe_stack_ptr
ret void
}
diff --git a/test/Transforms/SafeStack/phi.ll b/test/Transforms/SafeStack/phi.ll
new file mode 100644
index 000000000000..3ee56aa0f566
--- /dev/null
+++ b/test/Transforms/SafeStack/phi.ll
@@ -0,0 +1,35 @@
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f(i1 %d1, i1 %d2) safestack {
+entry:
+; CHECK-LABEL: define void @f(
+; CHECK: %[[USP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+; CHECK-NEXT: getelementptr i8, i8* %[[USP]], i32 -16
+; CHECK: br i1 %d1, label %[[BB0:.*]], label %[[BB1:.*]]
+ %a = alloca i32, align 8
+ %b = alloca i32, align 8
+ br i1 %d1, label %bb0, label %bb1
+
+bb0:
+; CHECK: [[BB0]]:
+; CHECK: %[[Ai8:.*]] = getelementptr i8, i8* %unsafe_stack_ptr, i32
+; CHECK: %[[AUNSAFE:.*]] = bitcast i8* %[[Ai8]] to i32*
+; CHECK: br i1
+ br i1 %d2, label %bb2, label %bb2
+
+bb1:
+; CHECK: [[BB1]]:
+; CHECK: %[[Bi8:.*]] = getelementptr i8, i8* %unsafe_stack_ptr, i32
+; CHECK: %[[BUNSAFE:.*]] = bitcast i8* %[[Bi8]] to i32*
+; CHECK: br label
+ br label %bb2
+
+bb2:
+; CHECK: phi i32* [ %[[AUNSAFE]], %[[BB0]] ], [ %[[AUNSAFE]], %[[BB0]] ], [ %[[BUNSAFE]], %[[BB1]] ]
+ %c = phi i32* [ %a, %bb0 ], [ %a, %bb0 ], [ %b, %bb1 ]
+ call void @capture(i32* %c)
+ ret void
+}
+
+declare void @capture(i32*)
diff --git a/test/Transforms/SafeStack/setjmp2.ll b/test/Transforms/SafeStack/setjmp2.ll
index bb15d7e03ace..dc83c4824207 100644
--- a/test/Transforms/SafeStack/setjmp2.ll
+++ b/test/Transforms/SafeStack/setjmp2.ll
@@ -12,8 +12,8 @@
; CHECK: @foo(i32 %[[ARG:.*]])
define i32 @foo(i32 %size) nounwind uwtable safestack {
entry:
- ; CHECK: %[[DYNPTR:.*]] = alloca i8*
- ; CHECK-NEXT: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+ ; CHECK: %[[SP:.*]] = load i8*, i8** @__safestack_unsafe_stack_ptr
+ ; CHECK-NEXT: %[[DYNPTR:.*]] = alloca i8*
; CHECK-NEXT: store i8* %[[SP]], i8** %[[DYNPTR]]
; CHECK-NEXT: %[[ZEXT:.*]] = zext i32 %[[ARG]] to i64
@@ -35,6 +35,7 @@ entry:
; CHECK: call void @funcall(i32* %[[ALLOCA]])
call void @funcall(i32* %a)
+ ; CHECK-NEXT: store i8* %[[SP:.*]], i8** @__safestack_unsafe_stack_ptr
ret i32 0
}
diff --git a/test/Transforms/SafeStack/sink-to-use.ll b/test/Transforms/SafeStack/sink-to-use.ll
new file mode 100644
index 000000000000..e208ce1da9d4
--- /dev/null
+++ b/test/Transforms/SafeStack/sink-to-use.ll
@@ -0,0 +1,22 @@
+; Test that unsafe alloca address calculation is done immediately before each use.
+; RUN: opt -safe-stack -S -mtriple=x86_64-pc-linux-gnu < %s -o - | FileCheck %s
+; RUN: opt -safe-stack -S -mtriple=i386-pc-linux-gnu < %s -o - | FileCheck %s
+
+define void @f() safestack {
+entry:
+ %x0 = alloca i32, align 4
+ %x1 = alloca i32, align 4
+
+; CHECK: %[[A:.*]] = getelementptr i8, i8* %{{.*}}, i32 -4
+; CHECK: %[[X0:.*]] = bitcast i8* %[[A]] to i32*
+; CHECK: call void @use(i32* %[[X0]])
+ call void @use(i32* %x0)
+
+; CHECK: %[[B:.*]] = getelementptr i8, i8* %{{.*}}, i32 -8
+; CHECK: %[[X1:.*]] = bitcast i8* %[[B]] to i32*
+; CHECK: call void @use(i32* %[[X1]])
+ call void @use(i32* %x1)
+ ret void
+}
+
+declare void @use(i32*)
diff --git a/test/Transforms/SafeStack/struct.ll b/test/Transforms/SafeStack/struct.ll
index 12a0085a2cc3..b64803d160c6 100644
--- a/test/Transforms/SafeStack/struct.ll
+++ b/test/Transforms/SafeStack/struct.ll
@@ -15,16 +15,15 @@ entry:
; CHECK: store i8* %[[USST]], i8** @__safestack_unsafe_stack_ptr
- ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
%a.addr = alloca i8*, align 8
-
- ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
- ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to %struct.foo*
%buf = alloca %struct.foo, align 1
+ ; CHECK: %[[AADDR:.*]] = alloca i8*, align 8
; CHECK: store i8* {{.*}}, i8** %[[AADDR]], align 8
store i8* %a, i8** %a.addr, align 8
+ ; CHECK: %[[BUFPTR:.*]] = getelementptr i8, i8* %[[USP]], i32 -16
+ ; CHECK: %[[BUFPTR2:.*]] = bitcast i8* %[[BUFPTR]] to %struct.foo*
; CHECK: %[[GEP:.*]] = getelementptr inbounds %struct.foo, %struct.foo* %[[BUFPTR2]], i32 0, i32 0, i32 0
%gep = getelementptr inbounds %struct.foo, %struct.foo* %buf, i32 0, i32 0, i32 0
diff --git a/test/Transforms/SampleProfile/Inputs/fnptr.binprof b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
index a074f53db945..420fd8f86d00 100644
--- a/test/Transforms/SampleProfile/Inputs/fnptr.binprof
+++ b/test/Transforms/SampleProfile/Inputs/fnptr.binprof
Binary files differ
diff --git a/test/Transforms/SampleProfile/Inputs/inline-act.prof b/test/Transforms/SampleProfile/Inputs/inline-act.prof
new file mode 100644
index 000000000000..655739f37888
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-act.prof
@@ -0,0 +1,3 @@
+_Z3bari:100:0
+ 1: _Z3fooi:100
+ 2: 100
diff --git a/test/Transforms/SampleProfile/Inputs/inline-combine.prof b/test/Transforms/SampleProfile/Inputs/inline-combine.prof
new file mode 100644
index 000000000000..8d1c0b8103ce
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/inline-combine.prof
@@ -0,0 +1,2 @@
+foo:1000:1000
+ 1: bar:1000
diff --git a/test/Transforms/SampleProfile/Inputs/propagate.prof b/test/Transforms/SampleProfile/Inputs/propagate.prof
index ee9c6d62dfd1..f298752d03cb 100644
--- a/test/Transforms/SampleProfile/Inputs/propagate.prof
+++ b/test/Transforms/SampleProfile/Inputs/propagate.prof
@@ -1,17 +1,22 @@
-_Z3fooiil:58139:0
+_Z3fooiil:33168:0
0: 0
1: 0
2: 0
- 4: 1
- 5: 10
- 6: 2
- 7: 5
- 8: 3
- 9: 0
- 10: 0
- 11: 6339
- 12: 16191
- 13: 8141
- 16: 1
+ 4: 0
+ 4.1: 302
+ 4.2: 315
+ 5: 302
+ 6: 200
+ 7: 308
+ 8: 227
+ 9: 227
+ 10: 227
+ 11: 83
+ 11.1: 7553
+ 11.2: 7479
+ 12: 7479
+ 13: 7479
+ 16: 305
18: 0
19: 0
+ 65533: 308
diff --git a/test/Transforms/SampleProfile/Inputs/summary.prof b/test/Transforms/SampleProfile/Inputs/summary.prof
new file mode 100644
index 000000000000..49a5b57d5e64
--- /dev/null
+++ b/test/Transforms/SampleProfile/Inputs/summary.prof
@@ -0,0 +1,4 @@
+bar:100:3
+ 1: 100
+foo:200:1
+ 1: 200
diff --git a/test/Transforms/SampleProfile/branch.ll b/test/Transforms/SampleProfile/branch.ll
index 1700749f0be9..ac68fd857bd2 100644
--- a/test/Transforms/SampleProfile/branch.ll
+++ b/test/Transforms/SampleProfile/branch.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/branch.prof | opt -analyze -branch-prob | FileCheck %s
; Original C++ code for this test case:
;
@@ -157,13 +158,12 @@ attributes #4 = { nounwind readonly }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !5)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 248211) (llvm/trunk 248217)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, retainedTypes: !3)
!1 = !DIFile(filename: "test.cc", directory: "/ssd/llvm_commit")
!2 = !{}
!3 = !{!4}
!4 = !DIBasicType(name: "double", size: 64, align: 64, encoding: DW_ATE_float)
-!5 = !{!6}
-!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!6 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 4, type: !7, isLocal: false, isDefinition: true, scopeLine: 4, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!7 = !DISubroutineType(types: !8)
!8 = !{!9, !9, !10}
!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/SampleProfile/calls.ll b/test/Transforms/SampleProfile/calls.ll
index 53ea9297d7d0..0105019c73ea 100644
--- a/test/Transforms/SampleProfile/calls.ll
+++ b/test/Transforms/SampleProfile/calls.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/calls.prof | opt -analyze -branch-prob | FileCheck %s
; Original C++ test case
;
@@ -16,11 +17,6 @@
; return 0;
; }
;
-; Note that this test is missing the llvm.dbg.cu annotation. This emulates
-; the effect of the user having only used -fprofile-sample-use without
-; -gmlt when invoking the driver. In those cases, we need to track source
-; location information but we do not have to generate debug info in the
-; final binary.
@.str = private unnamed_addr constant [11 x i8] c"sum is %d\0A\00", align 1
; Function Attrs: nounwind uwtable
@@ -63,8 +59,8 @@ while.body: ; preds = %while.cond
; both branches out of while.body had the same weight. In reality,
; the edge while.body->if.then is taken most of the time.
;
-; CHECK: edge while.body -> if.then probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
; CHECK: edge while.body -> if.else probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK: edge while.body -> if.then probability is 0x80000000 / 0x80000000 = 100.00% [HOT edge]
if.then: ; preds = %while.body
@@ -89,17 +85,17 @@ while.end: ; preds = %while.cond
declare i32 @printf(i8*, ...) #2
+!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
-!3 = !{!4, !7}
-!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "calls.cc", directory: ".")
!6 = !DISubroutineType(types: !2)
-!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/cov-zero-samples.ll b/test/Transforms/SampleProfile/cov-zero-samples.ll
index d81e6438ee01..dc3e6de8c3c9 100644
--- a/test/Transforms/SampleProfile/cov-zero-samples.ll
+++ b/test/Transforms/SampleProfile/cov-zero-samples.ll
@@ -1,6 +1,7 @@
-; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/cov-zero-samples.prof -sample-profile-check-record-coverage=100 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
;
-; CHECK: remark: cov-zero-samples.cc:9:25: Applied 404065 samples from profile (offset: 2.1)
+; CHECK: remark: cov-zero-samples.cc:9:29: Applied 404065 samples from profile (offset: 2.1)
; CHECK: remark: cov-zero-samples.cc:10:9: Applied 443089 samples from profile (offset: 3)
; CHECK: remark: cov-zero-samples.cc:10:36: Applied 0 samples from profile (offset: 3.1)
; CHECK: remark: cov-zero-samples.cc:11:12: Applied 404066 samples from profile (offset: 4)
@@ -86,15 +87,14 @@ declare i32 @printf(i8*, ...)
!llvm.module.flags = !{!15, !16}
!llvm.ident = !{!17}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3, globals: !11)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 253667) (llvm/trunk 253670)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2, globals: !11)
!1 = !DIFile(filename: "cov-zero-samples.cc", directory: ".")
!2 = !{}
-!3 = !{!4, !8}
-!4 = distinct !DISubprogram(name: "never_called", linkageName: "_Z12never_calledi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "never_called", linkageName: "_Z12never_calledi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !9, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!8 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !9, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!9 = !DISubroutineType(types: !10)
!10 = !{!7}
!11 = !{!12}
diff --git a/test/Transforms/SampleProfile/coverage-warning.ll b/test/Transforms/SampleProfile/coverage-warning.ll
index 14a2710b0810..4b8349d0af97 100644
--- a/test/Transforms/SampleProfile/coverage-warning.ll
+++ b/test/Transforms/SampleProfile/coverage-warning.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/coverage-warning.prof -sample-profile-check-record-coverage=90 -sample-profile-check-sample-coverage=100 -o /dev/null 2>&1 | FileCheck %s
define i32 @foo(i32 %i) !dbg !4 {
; The profile has samples for line locations that are no longer present.
; Coverage does not reach 90%, so we should get this warning:
@@ -30,11 +31,10 @@ return: ; preds = %if.end, %if.then
!llvm.module.flags = !{!6, !7}
!llvm.ident = !{!8}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251524) (llvm/trunk 251531)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2)
!1 = !DIFile(filename: "coverage-warning.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !2)
!6 = !{i32 2, !"Dwarf Version", i32 4}
!7 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SampleProfile/discriminator.ll b/test/Transforms/SampleProfile/discriminator.ll
index 0915fc884f82..862c2ee43c53 100644
--- a/test/Transforms/SampleProfile/discriminator.ll
+++ b/test/Transforms/SampleProfile/discriminator.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/discriminator.prof | opt -analyze -branch-prob | FileCheck %s
; Original code
;
@@ -66,11 +67,10 @@ while.end: ; preds = %while.cond
!llvm.module.flags = !{!7, !8}
!llvm.ident = !{!9}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "discriminator.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 1, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "discriminator.c", directory: ".")
!6 = !DISubroutineType(types: !2)
!7 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/SampleProfile/entry_counts.ll b/test/Transforms/SampleProfile/entry_counts.ll
index 50cd575295a9..1f7aceb8abb1 100644
--- a/test/Transforms/SampleProfile/entry_counts.ll
+++ b/test/Transforms/SampleProfile/entry_counts.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/entry_counts.prof -S | FileCheck %s
; According to the profile, function empty() was called 13,293 times.
; CHECK: {{.*}} = !{!"function_entry_count", i64 13293}
@@ -8,15 +9,21 @@ entry:
ret void, !dbg !9
}
+; This function does not have profile, check if function_entry_count is 0
+; CHECK: {{.*}} = !{!"function_entry_count", i64 0}
+define void @no_profile() {
+entry:
+ ret void
+}
+
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!6, !7}
!llvm.ident = !{!8}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: 2, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.7.0 (trunk 237249) (llvm/trunk 237261)", isOptimized: false, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "entry_counts.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "empty", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !2)
!6 = !{i32 2, !"Dwarf Version", i32 4}
!7 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SampleProfile/fnptr.ll b/test/Transforms/SampleProfile/fnptr.ll
index 7b07ca9679bb..b41fec7aed1a 100644
--- a/test/Transforms/SampleProfile/fnptr.ll
+++ b/test/Transforms/SampleProfile/fnptr.ll
@@ -5,12 +5,15 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/fnptr.binprof | opt -analyze -branch-prob | FileCheck %s
+
; CHECK: edge for.body3 -> if.then probability is 0x1a4f3959 / 0x80000000 = 20.55%
; CHECK: edge for.body3 -> if.else probability is 0x65b0c6a7 / 0x80000000 = 79.45%
-; CHECK: edge for.inc -> for.inc12 probability is 0x33d4a4c1 / 0x80000000 = 40.49%
-; CHECK: edge for.inc -> for.body3 probability is 0x4c2b5b3f / 0x80000000 = 59.51%
-; CHECK: edge for.inc12 -> for.end14 probability is 0x3f06d04e / 0x80000000 = 49.24%
-; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x40f92fb2 / 0x80000000 = 50.76%
+; CHECK: edge for.inc -> for.inc12 probability is 0x20dc8dc9 / 0x80000000 = 25.67%
+; CHECK: edge for.inc -> for.body3 probability is 0x5f237237 / 0x80000000 = 74.33%
+; CHECK: edge for.inc12 -> for.end14 probability is 0x00000000 / 0x80000000 = 0.00%
+; CHECK: edge for.inc12 -> for.cond1.preheader probability is 0x80000000 / 0x80000000 = 100.00%
; Original C++ test case.
;
@@ -126,21 +129,22 @@ declare i32 @printf(i8* nocapture readonly, ...) #1
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
+!llvm.dbg.cu = !{!26}
!0 = !{i32 2, !"Debug Info Version", i32 3}
!1 = !{!"clang version 3.6.0 "}
!2 = !DILocation(line: 9, column: 3, scope: !3)
-!3 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 8, file: !4, scope: !5, type: !6, variables: !7)
+!3 = distinct !DISubprogram(name: "foo", line: 8, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 8, file: !4, scope: !5, type: !6, variables: !7)
!4 = !DIFile(filename: "fnptr.cc", directory: ".")
!5 = !DIFile(filename: "fnptr.cc", directory: ".")
!6 = !DISubroutineType(types: !7)
!7 = !{}
!8 = !DILocation(line: 9, column: 14, scope: !3)
!9 = !DILocation(line: 13, column: 3, scope: !10)
-!10 = distinct !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 12, file: !4, scope: !5, type: !6, variables: !7)
+!10 = distinct !DISubprogram(name: "bar", line: 12, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 12, file: !4, scope: !5, type: !6, variables: !7)
!11 = !DILocation(line: 13, column: 14, scope: !10)
!12 = !DILocation(line: 19, column: 3, scope: !13)
-!13 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 16, file: !4, scope: !5, type: !6, variables: !7)
+!13 = distinct !DISubprogram(name: "main", line: 16, isLocal: false, isDefinition: true, flags: DIFlagPrototyped, isOptimized: true, unit: !26, scopeLine: 16, file: !4, scope: !5, type: !6, variables: !7)
!14 = !DILocation(line: 20, column: 5, scope: !13)
!15 = !DILocation(line: 21, column: 15, scope: !13)
!16 = !DILocation(line: 22, column: 11, scope: !13)
@@ -153,3 +157,4 @@ declare i32 @printf(i8* nocapture readonly, ...) #1
!23 = !{!"branch_weights", i32 0, i32 534}
!24 = !DILocation(line: 27, column: 3, scope: !13)
!25 = !DILocation(line: 28, column: 3, scope: !13)
+!26 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4)
diff --git a/test/Transforms/SampleProfile/gcc-simple.ll b/test/Transforms/SampleProfile/gcc-simple.ll
index 1ae927158c11..cbd105ebc3b4 100644
--- a/test/Transforms/SampleProfile/gcc-simple.ll
+++ b/test/Transforms/SampleProfile/gcc-simple.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/gcc-simple.afdo -S | FileCheck %s
; XFAIL: powerpc64-, s390x, mips-, mips64-, sparc
; Original code:
;
@@ -144,16 +145,15 @@ attributes #3 = { nounwind }
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 247554) (llvm/trunk 247557)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "discriminator.cc", directory: "/usr/local/google/home/dnovillo/llvm/test/autofdo")
!2 = !{}
-!3 = !{!4, !9}
-!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !8}
!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
-!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!10 = !DISubroutineType(types: !11)
!11 = !{!12}
!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/SampleProfile/inline-act.ll b/test/Transforms/SampleProfile/inline-act.ll
new file mode 100644
index 000000000000..6383dfbddf95
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-act.ll
@@ -0,0 +1,72 @@
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-act.prof
+
+; Sample profile should have non-empty ACT passed to inliner
+
+; int t;
+; bool foo(int value) {
+; switch(value) {
+; case 0:
+; case 1:
+; case 3:
+; return true;
+; default:
+; return false;
+; }
+; }
+; void bar(int i) {
+; if (foo(i))
+; t *= 2;
+; }
+
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+@t = global i32 0, align 4
+
+; Function Attrs: nounwind uwtable
+define zeroext i1 @_Z3fooi(i32) #0 {
+ %switch.tableidx = sub i32 %0, 0
+ %2 = icmp ult i32 %switch.tableidx, 4
+ br i1 %2, label %switch.lookup, label %3
+
+switch.lookup: ; preds = %1
+ %switch.cast = trunc i32 %switch.tableidx to i4
+ %switch.shiftamt = mul i4 %switch.cast, 1
+ %switch.downshift = lshr i4 -5, %switch.shiftamt
+ %switch.masked = trunc i4 %switch.downshift to i1
+ ret i1 %switch.masked
+
+; <label>:3: ; preds = %1
+ ret i1 false
+}
+
+; Function Attrs: nounwind uwtable
+define void @_Z3bari(i32) #0 !dbg !9 {
+ %2 = call zeroext i1 @_Z3fooi(i32 %0), !dbg !10
+ br i1 %2, label %3, label %6, !dbg !10
+
+; <label>:3: ; preds = %1
+ %4 = load i32, i32* @t, align 4
+ %5 = shl nsw i32 %4, 1
+ store i32 %5, i32* @t, align 4
+ br label %6
+
+; <label>:6: ; preds = %3, %1
+ ret void
+}
+
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3}
+!llvm.ident = !{!4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 272227) (llvm/trunk 272226)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
+!1 = !DIFile(filename: "test.cc", directory: "./")
+!2 = !{}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{!"clang version 3.9.0 (trunk 272227) (llvm/trunk 272226)"}
+!6 = !DISubroutineType(types: !2)
+!9 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 14, type: !6, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
+!10 = !DILocation(line: 15, column: 7, scope: !9)
+!11 = !DILocation(line: 16, column: 7, scope: !9)
diff --git a/test/Transforms/SampleProfile/inline-combine.ll b/test/Transforms/SampleProfile/inline-combine.ll
new file mode 100644
index 000000000000..bcc1770bfae1
--- /dev/null
+++ b/test/Transforms/SampleProfile/inline-combine.ll
@@ -0,0 +1,47 @@
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/inline-combine.prof -S | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/inline-combine.prof -S | FileCheck %s
+
+%"class.llvm::FoldingSetNodeID" = type { %"class.llvm::SmallVector" }
+%"class.llvm::SmallVector" = type { %"class.llvm::SmallVectorImpl.base", %"struct.llvm::SmallVectorStorage" }
+%"class.llvm::SmallVectorImpl.base" = type { %"class.llvm::SmallVectorTemplateBase.base" }
+%"class.llvm::SmallVectorTemplateBase.base" = type { %"class.llvm::SmallVectorTemplateCommon.base" }
+%"class.llvm::SmallVectorTemplateCommon.base" = type <{ %"class.llvm::SmallVectorBase", %"struct.llvm::AlignedCharArrayUnion" }>
+%"class.llvm::SmallVectorBase" = type { i8*, i8*, i8* }
+%"struct.llvm::AlignedCharArrayUnion" = type { %"struct.llvm::AlignedCharArray" }
+%"struct.llvm::AlignedCharArray" = type { [4 x i8] }
+%"struct.llvm::SmallVectorStorage" = type { [31 x %"struct.llvm::AlignedCharArrayUnion"] }
+%"class.llvm::SmallVectorImpl" = type { %"class.llvm::SmallVectorTemplateBase.base", [4 x i8] }
+
+$foo = comdat any
+
+$bar = comdat any
+
+define void @foo(%"class.llvm::FoldingSetNodeID"* %this) comdat align 2 !dbg !3 {
+ %1 = alloca %"class.llvm::FoldingSetNodeID"*, align 8
+ store %"class.llvm::FoldingSetNodeID"* %this, %"class.llvm::FoldingSetNodeID"** %1, align 8
+ %2 = load %"class.llvm::FoldingSetNodeID"*, %"class.llvm::FoldingSetNodeID"** %1, align 8
+ %3 = getelementptr inbounds %"class.llvm::FoldingSetNodeID", %"class.llvm::FoldingSetNodeID"* %2, i32 0, i32 0
+; the call should have been inlined after sample-profile pass
+; CHECK-NOT: call
+ call void bitcast (void (%"class.llvm::SmallVectorImpl"*)* @bar to void (%"class.llvm::SmallVector"*)*)(%"class.llvm::SmallVector"* %3), !dbg !7
+ ret void
+}
+
+define void @bar(%"class.llvm::SmallVectorImpl"* %this) comdat align 2 !dbg !8 {
+ ret void
+}
+
+!llvm.module.flags = !{!0, !1}
+!llvm.ident = !{!2}
+!llvm.dbg.cu = !{!9}
+
+!0 = !{i32 2, !"Dwarf Version", i32 4}
+!1 = !{i32 1, !"Debug Info Version", i32 3}
+!2 = !{!"clang version 3.5 "}
+!3 = distinct !DISubprogram(name: "foo", scope: !4, file: !4, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !9, variables: !6)
+!4 = !DIFile(filename: "test.cc", directory: ".")
+!5 = !DISubroutineType(types: !6)
+!6 = !{}
+!7 = !DILocation(line: 4, scope: !3)
+!8 = distinct !DISubprogram(name: "bar", scope: !4, file: !4, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !9, variables: !6)
+!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: FullDebug, file: !4)
diff --git a/test/Transforms/SampleProfile/inline-coverage.ll b/test/Transforms/SampleProfile/inline-coverage.ll
index 7248540b4f7c..c88e7f865fa2 100644
--- a/test/Transforms/SampleProfile/inline-coverage.ll
+++ b/test/Transforms/SampleProfile/inline-coverage.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -instcombine -sample-profile -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
+; RUN: opt < %s -passes="function(instcombine),sample-profile" -sample-profile-file=%S/Inputs/inline-coverage.prof -sample-profile-check-record-coverage=100 -sample-profile-check-sample-coverage=110 -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
;
; Original code:
;
@@ -16,7 +17,7 @@
; 12 }
;
; CHECK: remark: coverage.cc:10:12: inlined hot callee '_Z3fool' with 172746 samples into 'main'
-; CHECK: remark: coverage.cc:9:19: Applied 23478 samples from profile (offset: 2.1)
+; CHECK: remark: coverage.cc:9:21: Applied 23478 samples from profile (offset: 2.1)
; CHECK: remark: coverage.cc:10:16: Applied 23478 samples from profile (offset: 3)
; CHECK: remark: coverage.cc:4:10: Applied 31878 samples from profile (offset: 1)
; CHECK: remark: coverage.cc:11:10: Applied 0 samples from profile (offset: 4)
@@ -90,16 +91,15 @@ for.end: ; preds = %for.cond
!llvm.module.flags = !{!13, !14}
!llvm.ident = !{!15}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251738) (llvm/trunk 251737)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "coverage.cc", directory: ".")
!2 = !{}
-!3 = !{!4, !9}
-!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fool", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !8}
!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
!8 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
-!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!9 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 7, type: !10, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!10 = !DISubroutineType(types: !11)
!11 = !{!12}
!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/SampleProfile/inline-hint.ll b/test/Transforms/SampleProfile/inline-hint.ll
deleted file mode 100644
index 16c4e64ec5bb..000000000000
--- a/test/Transforms/SampleProfile/inline-hint.ll
+++ /dev/null
@@ -1,38 +0,0 @@
-; RUN: opt %s -sample-profile -sample-profile-file=%S/Inputs/inline-hint.prof -pass-remarks=sample-profile -o /dev/null 2>&1 | FileCheck %s
-;
-; CHECK: Applied cold hint to globally cold function '_Z7cold_fnRxi' with 0.1
-define void @_Z7cold_fnRxi() !dbg !4 {
-entry:
- ret void, !dbg !29
-}
-
-; CHECK: Applied inline hint to globally hot function '_Z6hot_fnRxi' with 70.0
-define void @_Z6hot_fnRxi() #0 !dbg !10 {
-entry:
- ret void, !dbg !38
-}
-
-!llvm.module.flags = !{!17, !18}
-!llvm.ident = !{!19}
-
-!1 = !DIFile(filename: "inline-hint.cc", directory: ".")
-!2 = !{}
-!3 = !{!4, !10, !11, !14}
-!4 = distinct !DISubprogram(name: "cold_fn", linkageName: "_Z7cold_fnRxi", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
-!5 = !DISubroutineType(types: !6)
-!6 = !{null, !7, !9}
-!7 = !DIDerivedType(tag: DW_TAG_reference_type, baseType: !8, size: 64, align: 64)
-!8 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
-!9 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = distinct !DISubprogram(name: "hot_fn", linkageName: "_Z6hot_fnRxi", scope: !1, file: !1, line: 7, type: !5, isLocal: false, isDefinition: true, scopeLine: 7, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
-!11 = distinct !DISubprogram(name: "compute", linkageName: "_Z7computex", scope: !1, file: !1, line: 11, type: !12, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
-!12 = !DISubroutineType(types: !13)
-!13 = !{!8, !8}
-!14 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 21, type: !15, isLocal: false, isDefinition: true, scopeLine: 21, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
-!15 = !DISubroutineType(types: !16)
-!16 = !{!9}
-!17 = !{i32 2, !"Dwarf Version", i32 4}
-!18 = !{i32 2, !"Debug Info Version", i32 3}
-!19 = !{!"clang version 3.8.0 (trunk 254067) (llvm/trunk 254079)"}
-!29 = !DILocation(line: 5, column: 1, scope: !4)
-!38 = !DILocation(line: 9, column: 1, scope: !10)
diff --git a/test/Transforms/SampleProfile/inline.ll b/test/Transforms/SampleProfile/inline.ll
index 590a20f9d1d1..ed353834137b 100644
--- a/test/Transforms/SampleProfile/inline.ll
+++ b/test/Transforms/SampleProfile/inline.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/inline.prof -sample-profile-inline-hot-threshold=1 -S | FileCheck %s
; Original C++ test case
;
@@ -77,17 +78,17 @@ while.end: ; preds = %while.cond
declare i32 @printf(i8*, ...) #2
+!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: NoDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "calls.cc", directory: ".")
!2 = !{}
-!3 = !{!4, !7}
-!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
+!4 = distinct !DISubprogram(name: "sum", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
!5 = !DIFile(filename: "calls.cc", directory: ".")
!6 = !DISubroutineType(types: !2)
-!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
+!7 = distinct !DISubprogram(name: "main", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !0, scopeLine: 7, file: !1, scope: !5, type: !6, variables: !2)
!8 = !{i32 2, !"Dwarf Version", i32 4}
!9 = !{i32 1, !"Debug Info Version", i32 3}
!10 = !{!"clang version 3.5 "}
diff --git a/test/Transforms/SampleProfile/nolocinfo.ll b/test/Transforms/SampleProfile/nolocinfo.ll
index 08bca20984dd..bd6a10b33f2c 100644
--- a/test/Transforms/SampleProfile/nolocinfo.ll
+++ b/test/Transforms/SampleProfile/nolocinfo.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/nolocinfo.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
define i32 @foo(i32 %i) !dbg !4 {
entry:
@@ -23,11 +24,10 @@ if.end:
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang version 3.8.0 (trunk 251335) (llvm/trunk 251344)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "nolocinfo.c", directory: ".")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", scope: !1, file: !1, line: 1, type: !5, isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/SampleProfile/offset.ll b/test/Transforms/SampleProfile/offset.ll
index 499b2826402d..c8d52408e864 100644
--- a/test/Transforms/SampleProfile/offset.ll
+++ b/test/Transforms/SampleProfile/offset.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/offset.prof | opt -analyze -branch-prob | FileCheck %s
; Original C++ code for this test case:
;
@@ -54,11 +55,10 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!8, !9}
!llvm.ident = !{!10}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250750)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 250750)", isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "a.cc", directory: "/tmp")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, variables: !2)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooi", scope: !1, file: !1, line: 5, type: !5, isLocal: false, isDefinition: true, scopeLine: 5, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
!5 = !DISubroutineType(types: !6)
!6 = !{!7, !7}
!7 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
diff --git a/test/Transforms/SampleProfile/propagate.ll b/test/Transforms/SampleProfile/propagate.ll
index eef7b162eb7a..a5796695ca04 100644
--- a/test/Transforms/SampleProfile/propagate.ll
+++ b/test/Transforms/SampleProfile/propagate.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/propagate.prof | opt -analyze -branch-prob | FileCheck %s
; Original C++ code for this test case:
;
@@ -15,7 +16,7 @@
; y++;
; x += 3;
; } else {
-; for (unsigned j = 0; j < i; j++) {
+; for (unsigned j = 0; j < 100; j++) {
; x += j;
; y -= 3;
; }
@@ -28,216 +29,289 @@
; int main() {
; int x = 5678;
; int y = 1234;
-; long N = 999999;
+; long N = 9999999;
; printf("foo(%d, %d, %ld) = %ld\n", x, y, N, foo(x, y, N));
; return 0;
; }
; ModuleID = 'propagate.cc'
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"
@.str = private unnamed_addr constant [24 x i8] c"foo(%d, %d, %ld) = %ld\0A\00", align 1
; Function Attrs: nounwind uwtable
-define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 !dbg !4 {
+define i64 @_Z3fooiil(i32 %x, i32 %y, i64 %N) #0 !dbg !6 {
entry:
%retval = alloca i64, align 8
%x.addr = alloca i32, align 4
%y.addr = alloca i32, align 4
%N.addr = alloca i64, align 8
%i = alloca i64, align 8
- %j = alloca i32, align 4
+ %j = alloca i64, align 8
store i32 %x, i32* %x.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %x.addr, metadata !11, metadata !12), !dbg !13
store i32 %y, i32* %y.addr, align 4
+ call void @llvm.dbg.declare(metadata i32* %y.addr, metadata !14, metadata !12), !dbg !15
store i64 %N, i64* %N.addr, align 8
- %0 = load i32, i32* %x.addr, align 4, !dbg !11
- %1 = load i32, i32* %y.addr, align 4, !dbg !11
- %cmp = icmp slt i32 %0, %1, !dbg !11
- br i1 %cmp, label %if.then, label %if.else, !dbg !11
+ call void @llvm.dbg.declare(metadata i64* %N.addr, metadata !16, metadata !12), !dbg !17
+ %0 = load i32, i32* %x.addr, align 4, !dbg !18
+ %1 = load i32, i32* %y.addr, align 4, !dbg !20
+ %cmp = icmp slt i32 %0, %1, !dbg !21
+ br i1 %cmp, label %if.then, label %if.else, !dbg !22
if.then: ; preds = %entry
- %2 = load i32, i32* %y.addr, align 4, !dbg !13
- %3 = load i32, i32* %x.addr, align 4, !dbg !13
- %sub = sub nsw i32 %2, %3, !dbg !13
- %conv = sext i32 %sub to i64, !dbg !13
- store i64 %conv, i64* %retval, !dbg !13
- br label %return, !dbg !13
+ %2 = load i32, i32* %y.addr, align 4, !dbg !23
+ %3 = load i32, i32* %x.addr, align 4, !dbg !25
+ %sub = sub nsw i32 %2, %3, !dbg !26
+ %conv = sext i32 %sub to i64, !dbg !23
+ store i64 %conv, i64* %retval, align 8, !dbg !27
+ br label %return, !dbg !27
if.else: ; preds = %entry
- store i64 0, i64* %i, align 8, !dbg !15
- br label %for.cond, !dbg !15
+ call void @llvm.dbg.declare(metadata i64* %i, metadata !28, metadata !12), !dbg !31
+ store i64 0, i64* %i, align 8, !dbg !31
+ br label %for.cond, !dbg !32
-for.cond: ; preds = %for.inc16, %if.else
- %4 = load i64, i64* %i, align 8, !dbg !15
- %5 = load i64, i64* %N.addr, align 8, !dbg !15
- %cmp1 = icmp slt i64 %4, %5, !dbg !15
- br i1 %cmp1, label %for.body, label %for.end18, !dbg !15
-; CHECK: edge for.cond -> for.body probability is 0x745d1746 / 0x80000000 = 90.91% [HOT edge]
-; CHECK: edge for.cond -> for.end18 probability is 0x0ba2e8ba / 0x80000000 = 9.09%
+for.cond: ; preds = %for.inc17, %if.else
+ %4 = load i64, i64* %i, align 8, !dbg !33
+ %5 = load i64, i64* %N.addr, align 8, !dbg !36
+ %cmp1 = icmp slt i64 %4, %5, !dbg !37
+ br i1 %cmp1, label %for.body, label %for.end19, !dbg !38
for.body: ; preds = %for.cond
- %6 = load i64, i64* %i, align 8, !dbg !18
- %7 = load i64, i64* %N.addr, align 8, !dbg !18
- %div = sdiv i64 %7, 3, !dbg !18
- %cmp2 = icmp sgt i64 %6, %div, !dbg !18
- br i1 %cmp2, label %if.then3, label %if.end, !dbg !18
-; CHECK: edge for.body -> if.then3 probability is 0x1999999a / 0x80000000 = 20.00%
-; CHECK: edge for.body -> if.end probability is 0x66666666 / 0x80000000 = 80.00%
+ %6 = load i64, i64* %i, align 8, !dbg !39
+ %7 = load i64, i64* %N.addr, align 8, !dbg !42
+ %div = sdiv i64 %7, 3, !dbg !43
+ %cmp2 = icmp sgt i64 %6, %div, !dbg !44
+ br i1 %cmp2, label %if.then3, label %if.end, !dbg !45
+; CHECK: edge for.body -> if.then3 probability is 0x51451451 / 0x80000000 = 63.49%
+; CHECK: edge for.body -> if.end probability is 0x2ebaebaf / 0x80000000 = 36.51%
if.then3: ; preds = %for.body
- %8 = load i32, i32* %x.addr, align 4, !dbg !21
- %dec = add nsw i32 %8, -1, !dbg !21
- store i32 %dec, i32* %x.addr, align 4, !dbg !21
- br label %if.end, !dbg !21
+ %8 = load i32, i32* %x.addr, align 4, !dbg !46
+ %dec = add nsw i32 %8, -1, !dbg !46
+ store i32 %dec, i32* %x.addr, align 4, !dbg !46
+ br label %if.end, !dbg !47
if.end: ; preds = %if.then3, %for.body
- %9 = load i64, i64* %i, align 8, !dbg !22
- %10 = load i64, i64* %N.addr, align 8, !dbg !22
- %div4 = sdiv i64 %10, 4, !dbg !22
- %cmp5 = icmp sgt i64 %9, %div4, !dbg !22
- br i1 %cmp5, label %if.then6, label %if.else7, !dbg !22
-; CHECK: edge if.end -> if.then6 probability is 0x000f801f / 0x80000000 = 0.05%
-; CHECK: edge if.end -> if.else7 probability is 0x7ff07fe1 / 0x80000000 = 99.95% [HOT edge]
+ %9 = load i64, i64* %i, align 8, !dbg !48
+ %10 = load i64, i64* %N.addr, align 8, !dbg !50
+ %div4 = sdiv i64 %10, 4, !dbg !51
+ %cmp5 = icmp sgt i64 %9, %div4, !dbg !52
+ br i1 %cmp5, label %if.then6, label %if.else7, !dbg !53
+; CHECK: edge if.end -> if.then6 probability is 0x5dbaa1dc / 0x80000000 = 73.23%
+; CHECK: edge if.end -> if.else7 probability is 0x22455e24 / 0x80000000 = 26.77%
if.then6: ; preds = %if.end
- %11 = load i32, i32* %y.addr, align 4, !dbg !24
- %inc = add nsw i32 %11, 1, !dbg !24
- store i32 %inc, i32* %y.addr, align 4, !dbg !24
- %12 = load i32, i32* %x.addr, align 4, !dbg !26
- %add = add nsw i32 %12, 3, !dbg !26
- store i32 %add, i32* %x.addr, align 4, !dbg !26
- br label %if.end15, !dbg !27
+ %11 = load i32, i32* %y.addr, align 4, !dbg !54
+ %inc = add nsw i32 %11, 1, !dbg !54
+ store i32 %inc, i32* %y.addr, align 4, !dbg !54
+ %12 = load i32, i32* %x.addr, align 4, !dbg !56
+ %add = add nsw i32 %12, 3, !dbg !56
+ store i32 %add, i32* %x.addr, align 4, !dbg !56
+ br label %if.end16, !dbg !57
if.else7: ; preds = %if.end
- store i32 0, i32* %j, align 4, !dbg !28
- br label %for.cond8, !dbg !28
+ call void @llvm.dbg.declare(metadata i64* %j, metadata !58, metadata !12), !dbg !62
+ store i64 0, i64* %j, align 8, !dbg !62
+ br label %for.cond8, !dbg !63
for.cond8: ; preds = %for.inc, %if.else7
- %13 = load i32, i32* %j, align 4, !dbg !28
- %conv9 = zext i32 %13 to i64, !dbg !28
- %14 = load i64, i64* %i, align 8, !dbg !28
- %cmp10 = icmp slt i64 %conv9, %14, !dbg !28
- br i1 %cmp10, label %for.body11, label %for.end, !dbg !28
-; CHECK: edge for.cond8 -> for.body11 probability is 0x5bfc7472 / 0x80000000 = 71.86%
-; CHECK: edge for.cond8 -> for.end probability is 0x24038b8e / 0x80000000 = 28.14%
-
-for.body11: ; preds = %for.cond8
- %15 = load i32, i32* %j, align 4, !dbg !31
- %16 = load i32, i32* %x.addr, align 4, !dbg !31
- %add12 = add i32 %16, %15, !dbg !31
- store i32 %add12, i32* %x.addr, align 4, !dbg !31
- %17 = load i32, i32* %y.addr, align 4, !dbg !33
- %sub13 = sub nsw i32 %17, 3, !dbg !33
- store i32 %sub13, i32* %y.addr, align 4, !dbg !33
- br label %for.inc, !dbg !34
-
-for.inc: ; preds = %for.body11
- %18 = load i32, i32* %j, align 4, !dbg !28
- %inc14 = add i32 %18, 1, !dbg !28
- store i32 %inc14, i32* %j, align 4, !dbg !28
- br label %for.cond8, !dbg !28
+ %13 = load i64, i64* %j, align 8, !dbg !64
+ %cmp9 = icmp slt i64 %13, 100, !dbg !67
+ br i1 %cmp9, label %for.body10, label %for.end, !dbg !68
+; CHECK: edge for.cond8 -> for.body10 probability is 0x7e985735 / 0x80000000 = 98.90% [HOT edge]
+; CHECK: edge for.cond8 -> for.end probability is 0x0167a8cb / 0x80000000 = 1.10%
+
+
+for.body10: ; preds = %for.cond8
+ %14 = load i64, i64* %j, align 8, !dbg !69
+ %15 = load i32, i32* %x.addr, align 4, !dbg !71
+ %conv11 = sext i32 %15 to i64, !dbg !71
+ %add12 = add nsw i64 %conv11, %14, !dbg !71
+ %conv13 = trunc i64 %add12 to i32, !dbg !71
+ store i32 %conv13, i32* %x.addr, align 4, !dbg !71
+ %16 = load i32, i32* %y.addr, align 4, !dbg !72
+ %sub14 = sub nsw i32 %16, 3, !dbg !72
+ store i32 %sub14, i32* %y.addr, align 4, !dbg !72
+ br label %for.inc, !dbg !73
+
+for.inc: ; preds = %for.body10
+ %17 = load i64, i64* %j, align 8, !dbg !74
+ %inc15 = add nsw i64 %17, 1, !dbg !74
+ store i64 %inc15, i64* %j, align 8, !dbg !74
+ br label %for.cond8, !dbg !76
for.end: ; preds = %for.cond8
- br label %if.end15
-
-if.end15: ; preds = %for.end, %if.then6
- br label %for.inc16, !dbg !35
-
-for.inc16: ; preds = %if.end15
- %19 = load i64, i64* %i, align 8, !dbg !15
- %inc17 = add nsw i64 %19, 1, !dbg !15
- store i64 %inc17, i64* %i, align 8, !dbg !15
- br label %for.cond, !dbg !15
-
-for.end18: ; preds = %for.cond
- br label %if.end19
-
-if.end19: ; preds = %for.end18
- %20 = load i32, i32* %y.addr, align 4, !dbg !36
- %21 = load i32, i32* %x.addr, align 4, !dbg !36
- %mul = mul nsw i32 %20, %21, !dbg !36
- %conv20 = sext i32 %mul to i64, !dbg !36
- store i64 %conv20, i64* %retval, !dbg !36
- br label %return, !dbg !36
-
-return: ; preds = %if.end19, %if.then
- %22 = load i64, i64* %retval, !dbg !37
- ret i64 %22, !dbg !37
+ br label %if.end16
+
+if.end16: ; preds = %for.end, %if.then6
+ br label %for.inc17, !dbg !77
+
+for.inc17: ; preds = %if.end16
+ %18 = load i64, i64* %i, align 8, !dbg !78
+ %inc18 = add nsw i64 %18, 1, !dbg !78
+ store i64 %inc18, i64* %i, align 8, !dbg !78
+ br label %for.cond, !dbg !80
+
+for.end19: ; preds = %for.cond
+ br label %if.end20
+
+if.end20: ; preds = %for.end19
+ %19 = load i32, i32* %y.addr, align 4, !dbg !81
+ %20 = load i32, i32* %x.addr, align 4, !dbg !82
+ %mul = mul nsw i32 %19, %20, !dbg !83
+ %conv21 = sext i32 %mul to i64, !dbg !81
+ store i64 %conv21, i64* %retval, align 8, !dbg !84
+ br label %return, !dbg !84
+
+return: ; preds = %if.end20, %if.then
+ %21 = load i64, i64* %retval, align 8, !dbg !85
+ ret i64 %21, !dbg !85
}
-; Function Attrs: uwtable
-define i32 @main() #1 !dbg !7 {
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: norecurse uwtable
+define i32 @main() #2 !dbg !86 {
entry:
%retval = alloca i32, align 4
%x = alloca i32, align 4
%y = alloca i32, align 4
%N = alloca i64, align 8
- store i32 0, i32* %retval
- store i32 5678, i32* %x, align 4, !dbg !38
- store i32 1234, i32* %y, align 4, !dbg !39
- store i64 999999, i64* %N, align 8, !dbg !40
- %0 = load i32, i32* %x, align 4, !dbg !41
- %1 = load i32, i32* %y, align 4, !dbg !41
- %2 = load i64, i64* %N, align 8, !dbg !41
- %3 = load i32, i32* %x, align 4, !dbg !41
- %4 = load i32, i32* %y, align 4, !dbg !41
- %5 = load i64, i64* %N, align 8, !dbg !41
- %call = call i64 @_Z3fooiil(i32 %3, i32 %4, i64 %5), !dbg !41
- %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !41
- ret i32 0, !dbg !42
+ store i32 0, i32* %retval, align 4
+ call void @llvm.dbg.declare(metadata i32* %x, metadata !89, metadata !12), !dbg !90
+ store i32 5678, i32* %x, align 4, !dbg !90
+ call void @llvm.dbg.declare(metadata i32* %y, metadata !91, metadata !12), !dbg !92
+ store i32 1234, i32* %y, align 4, !dbg !92
+ call void @llvm.dbg.declare(metadata i64* %N, metadata !93, metadata !12), !dbg !94
+ store i64 9999999, i64* %N, align 8, !dbg !94
+ %0 = load i32, i32* %x, align 4, !dbg !95
+ %1 = load i32, i32* %y, align 4, !dbg !96
+ %2 = load i64, i64* %N, align 8, !dbg !97
+ %3 = load i32, i32* %x, align 4, !dbg !98
+ %4 = load i32, i32* %y, align 4, !dbg !99
+ %5 = load i64, i64* %N, align 8, !dbg !100
+ %call = call i64 @_Z3fooiil(i32 %3, i32 %4, i64 %5), !dbg !101
+ %call1 = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str, i32 0, i32 0), i32 %0, i32 %1, i64 %2, i64 %call), !dbg !102
+ ret i32 0, !dbg !104
}
-declare i32 @printf(i8*, ...) #2
+declare i32 @printf(i8*, ...) #3
-attributes #0 = { nounwind uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #2 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { nounwind readnone }
+attributes #2 = { norecurse uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-jump-tables"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #3 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2,+x87" "unsafe-fp-math"="false" "use-soft-float"="false" }
!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!8, !9}
-!llvm.ident = !{!10}
+!llvm.module.flags = !{!3, !4}
+!llvm.ident = !{!5}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, producer: "clang version 3.5 ", isOptimized: false, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.9.0 (trunk 266819)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2)
!1 = !DIFile(filename: "propagate.cc", directory: ".")
!2 = !{}
-!3 = !{!4, !7}
-!4 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 3, file: !1, scope: !5, type: !6, variables: !2)
-!5 = !DIFile(filename: "propagate.cc", directory: ".")
-!6 = !DISubroutineType(types: !{null})
-!7 = distinct !DISubprogram(name: "main", line: 24, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 24, file: !1, scope: !5, type: !6, variables: !2)
-!8 = !{i32 2, !"Dwarf Version", i32 4}
-!9 = !{i32 1, !"Debug Info Version", i32 3}
-!10 = !{!"clang version 3.5 "}
-!11 = !DILocation(line: 4, scope: !12)
-!12 = distinct !DILexicalBlock(line: 4, column: 0, file: !1, scope: !4)
-!13 = !DILocation(line: 5, scope: !14)
-!14 = distinct !DILexicalBlock(line: 4, column: 0, file: !1, scope: !12)
-!15 = !DILocation(line: 7, scope: !16)
-!16 = distinct !DILexicalBlock(line: 7, column: 0, file: !1, scope: !17)
-!17 = distinct !DILexicalBlock(line: 6, column: 0, file: !1, scope: !12)
-!18 = !DILocation(line: 8, scope: !19)
-!19 = distinct !DILexicalBlock(line: 8, column: 0, file: !1, scope: !20)
-!20 = distinct !DILexicalBlock(line: 7, column: 0, file: !1, scope: !16)
-!21 = !DILocation(line: 9, scope: !19)
-!22 = !DILocation(line: 10, scope: !23)
-!23 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !20)
-!24 = !DILocation(line: 11, scope: !25)
-!25 = distinct !DILexicalBlock(line: 10, column: 0, file: !1, scope: !23)
-!26 = !DILocation(line: 12, scope: !25)
-!27 = !DILocation(line: 13, scope: !25)
-!28 = !DILocation(line: 14, scope: !29)
-!29 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !30)
-!30 = distinct !DILexicalBlock(line: 13, column: 0, file: !1, scope: !23)
-!31 = !DILocation(line: 15, scope: !32)
-!32 = distinct !DILexicalBlock(line: 14, column: 0, file: !1, scope: !29)
-!33 = !DILocation(line: 16, scope: !32)
-!34 = !DILocation(line: 17, scope: !32)
-!35 = !DILocation(line: 19, scope: !20)
-!36 = !DILocation(line: 21, scope: !4)
-!37 = !DILocation(line: 22, scope: !4)
-!38 = !DILocation(line: 25, scope: !7)
-!39 = !DILocation(line: 26, scope: !7)
-!40 = !DILocation(line: 27, scope: !7)
-!41 = !DILocation(line: 28, scope: !7)
-!42 = !DILocation(line: 29, scope: !7)
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = !{!"clang version 3.9.0 (trunk 266819)"}
+!6 = distinct !DISubprogram(name: "foo", linkageName: "_Z3fooiil", scope: !1, file: !1, line: 3, type: !7, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!7 = !DISubroutineType(types: !8)
+!8 = !{!9, !10, !10, !9}
+!9 = !DIBasicType(name: "long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!10 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
+!11 = !DILocalVariable(name: "x", arg: 1, scope: !6, file: !1, line: 3, type: !10)
+!12 = !DIExpression()
+!13 = !DILocation(line: 3, column: 14, scope: !6)
+!14 = !DILocalVariable(name: "y", arg: 2, scope: !6, file: !1, line: 3, type: !10)
+!15 = !DILocation(line: 3, column: 21, scope: !6)
+!16 = !DILocalVariable(name: "N", arg: 3, scope: !6, file: !1, line: 3, type: !9)
+!17 = !DILocation(line: 3, column: 29, scope: !6)
+!18 = !DILocation(line: 4, column: 7, scope: !19)
+!19 = distinct !DILexicalBlock(scope: !6, file: !1, line: 4, column: 7)
+!20 = !DILocation(line: 4, column: 11, scope: !19)
+!21 = !DILocation(line: 4, column: 9, scope: !19)
+!22 = !DILocation(line: 4, column: 7, scope: !6)
+!23 = !DILocation(line: 5, column: 12, scope: !24)
+!24 = distinct !DILexicalBlock(scope: !19, file: !1, line: 4, column: 14)
+!25 = !DILocation(line: 5, column: 16, scope: !24)
+!26 = !DILocation(line: 5, column: 14, scope: !24)
+!27 = !DILocation(line: 5, column: 5, scope: !24)
+!28 = !DILocalVariable(name: "i", scope: !29, file: !1, line: 7, type: !9)
+!29 = distinct !DILexicalBlock(scope: !30, file: !1, line: 7, column: 5)
+!30 = distinct !DILexicalBlock(scope: !19, file: !1, line: 6, column: 10)
+!31 = !DILocation(line: 7, column: 15, scope: !29)
+!32 = !DILocation(line: 7, column: 10, scope: !29)
+!33 = !DILocation(line: 7, column: 22, scope: !34)
+!34 = !DILexicalBlockFile(scope: !35, file: !1, discriminator: 1)
+!35 = distinct !DILexicalBlock(scope: !29, file: !1, line: 7, column: 5)
+!36 = !DILocation(line: 7, column: 26, scope: !34)
+!37 = !DILocation(line: 7, column: 24, scope: !34)
+!38 = !DILocation(line: 7, column: 5, scope: !34)
+!39 = !DILocation(line: 8, column: 11, scope: !40)
+!40 = distinct !DILexicalBlock(scope: !41, file: !1, line: 8, column: 11)
+!41 = distinct !DILexicalBlock(scope: !35, file: !1, line: 7, column: 34)
+!42 = !DILocation(line: 8, column: 15, scope: !40)
+!43 = !DILocation(line: 8, column: 17, scope: !40)
+!44 = !DILocation(line: 8, column: 13, scope: !40)
+!45 = !DILocation(line: 8, column: 11, scope: !41)
+!46 = !DILocation(line: 9, column: 10, scope: !40)
+!47 = !DILocation(line: 9, column: 9, scope: !40)
+!48 = !DILocation(line: 10, column: 11, scope: !49)
+!49 = distinct !DILexicalBlock(scope: !41, file: !1, line: 10, column: 11)
+!50 = !DILocation(line: 10, column: 15, scope: !49)
+!51 = !DILocation(line: 10, column: 17, scope: !49)
+!52 = !DILocation(line: 10, column: 13, scope: !49)
+!53 = !DILocation(line: 10, column: 11, scope: !41)
+!54 = !DILocation(line: 11, column: 10, scope: !55)
+!55 = distinct !DILexicalBlock(scope: !49, file: !1, line: 10, column: 22)
+!56 = !DILocation(line: 12, column: 11, scope: !55)
+!57 = !DILocation(line: 13, column: 7, scope: !55)
+!58 = !DILocalVariable(name: "j", scope: !59, file: !1, line: 14, type: !61)
+!59 = distinct !DILexicalBlock(scope: !60, file: !1, line: 14, column: 9)
+!60 = distinct !DILexicalBlock(scope: !49, file: !1, line: 13, column: 14)
+!61 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
+!62 = !DILocation(line: 14, column: 24, scope: !59)
+!63 = !DILocation(line: 14, column: 14, scope: !59)
+!64 = !DILocation(line: 14, column: 31, scope: !65)
+!65 = !DILexicalBlockFile(scope: !66, file: !1, discriminator: 1)
+!66 = distinct !DILexicalBlock(scope: !59, file: !1, line: 14, column: 9)
+!67 = !DILocation(line: 14, column: 33, scope: !65)
+!68 = !DILocation(line: 14, column: 9, scope: !65)
+!69 = !DILocation(line: 15, column: 16, scope: !70)
+!70 = distinct !DILexicalBlock(scope: !66, file: !1, line: 14, column: 45)
+!71 = !DILocation(line: 15, column: 13, scope: !70)
+!72 = !DILocation(line: 16, column: 13, scope: !70)
+!73 = !DILocation(line: 17, column: 9, scope: !70)
+!74 = !DILocation(line: 14, column: 41, scope: !75)
+!75 = !DILexicalBlockFile(scope: !66, file: !1, discriminator: 2)
+!76 = !DILocation(line: 14, column: 9, scope: !75)
+!77 = !DILocation(line: 19, column: 5, scope: !41)
+!78 = !DILocation(line: 7, column: 30, scope: !79)
+!79 = !DILexicalBlockFile(scope: !35, file: !1, discriminator: 2)
+!80 = !DILocation(line: 7, column: 5, scope: !79)
+!81 = !DILocation(line: 21, column: 10, scope: !6)
+!82 = !DILocation(line: 21, column: 14, scope: !6)
+!83 = !DILocation(line: 21, column: 12, scope: !6)
+!84 = !DILocation(line: 21, column: 3, scope: !6)
+!85 = !DILocation(line: 22, column: 1, scope: !6)
+!86 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 24, type: !87, isLocal: false, isDefinition: true, scopeLine: 24, flags: DIFlagPrototyped, isOptimized: false, unit: !0, variables: !2)
+!87 = !DISubroutineType(types: !88)
+!88 = !{!10}
+!89 = !DILocalVariable(name: "x", scope: !86, file: !1, line: 25, type: !10)
+!90 = !DILocation(line: 25, column: 7, scope: !86)
+!91 = !DILocalVariable(name: "y", scope: !86, file: !1, line: 26, type: !10)
+!92 = !DILocation(line: 26, column: 7, scope: !86)
+!93 = !DILocalVariable(name: "N", scope: !86, file: !1, line: 27, type: !9)
+!94 = !DILocation(line: 27, column: 8, scope: !86)
+!95 = !DILocation(line: 28, column: 38, scope: !86)
+!96 = !DILocation(line: 28, column: 41, scope: !86)
+!97 = !DILocation(line: 28, column: 44, scope: !86)
+!98 = !DILocation(line: 28, column: 51, scope: !86)
+!99 = !DILocation(line: 28, column: 54, scope: !86)
+!100 = !DILocation(line: 28, column: 57, scope: !86)
+!101 = !DILocation(line: 28, column: 47, scope: !86)
+!102 = !DILocation(line: 28, column: 3, scope: !103)
+!103 = !DILexicalBlockFile(scope: !86, file: !1, discriminator: 1)
+!104 = !DILocation(line: 29, column: 3, scope: !86)
diff --git a/test/Transforms/SampleProfile/remarks.ll b/test/Transforms/SampleProfile/remarks.ll
index a0e6a9deb8a8..908e4f8b10b4 100644
--- a/test/Transforms/SampleProfile/remarks.ll
+++ b/test/Transforms/SampleProfile/remarks.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/remarks.prof -S -pass-remarks=sample-profile 2>&1 | FileCheck %s
;
; Original test case.
;
@@ -124,11 +125,10 @@ attributes #4 = { nounwind }
!llvm.module.flags = !{!16, !17}
!llvm.ident = !{!18}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)", isOptimized: true, runtimeVersion: 0, emissionKind: 1, enums: !2, subprograms: !3)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 251041) (llvm/trunk 251053)", isOptimized: true, runtimeVersion: 0, emissionKind: NoDebug, enums: !2)
!1 = !DIFile(filename: "remarks.cc", directory: ".")
!2 = !{}
-!3 = !{!4, !13}
-!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, variables: !8)
+!4 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !1, file: !1, line: 3, type: !5, isLocal: false, isDefinition: true, scopeLine: 3, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !8)
!5 = !DISubroutineType(types: !6)
!6 = !{!7}
!7 = !DIBasicType(name: "long long int", size: 64, align: 64, encoding: DW_ATE_signed)
@@ -137,7 +137,7 @@ attributes #4 = { nounwind }
!10 = !DILocalVariable(name: "i", scope: !11, file: !1, line: 5, type: !12)
!11 = distinct !DILexicalBlock(scope: !4, file: !1, line: 5, column: 3)
!12 = !DIBasicType(name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, variables: !2)
+!13 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 13, type: !14, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: true, unit: !0, variables: !2)
!14 = !DISubroutineType(types: !15)
!15 = !{!12}
!16 = !{i32 2, !"Dwarf Version", i32 4}
diff --git a/test/Transforms/SampleProfile/summary.ll b/test/Transforms/SampleProfile/summary.ll
new file mode 100644
index 000000000000..49b1d2c36333
--- /dev/null
+++ b/test/Transforms/SampleProfile/summary.ll
@@ -0,0 +1,14 @@
+; Test that we annotate entire program's summary to IR.
+; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/summary.prof -S | FileCheck %s
+
+define i32 @bar() #0 !dbg !1 {
+entry:
+ ret i32 1, !dbg !2
+}
+
+; CHECK-DAG: {{![0-9]+}} = !{i32 1, !"ProfileSummary", {{![0-9]+}}}
+; CHECK-DAG: {{![0-9]+}} = !{!"NumFunctions", i64 2}
+; CHECK-DAG: {{![0-9]+}} = !{!"MaxFunctionCount", i64 3}
+
+!1 = distinct !DISubprogram(name: "bar")
+!2 = !DILocation(line: 2, scope: !2)
diff --git a/test/Transforms/SampleProfile/syntax.ll b/test/Transforms/SampleProfile/syntax.ll
index debbc7c87ddb..7114dfa61574 100644
--- a/test/Transforms/SampleProfile/syntax.ll
+++ b/test/Transforms/SampleProfile/syntax.ll
@@ -7,6 +7,15 @@
; RUN: not opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/syntax.prof 2>&1 | FileCheck -check-prefix=NO-DEBUG %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=missing.prof 2>&1 | FileCheck -check-prefix=MISSING-FILE %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_fn_header.prof 2>&1 | FileCheck -check-prefix=BAD-FN-HEADER %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_sample_line.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLE-LINE %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_line_values.prof 2>&1 | FileCheck -check-prefix=BAD-LINE-VALUES %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_discriminator_value.prof 2>&1 | FileCheck -check-prefix=BAD-DISCRIMINATOR-VALUE %s
+; RUN: not opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_samples.prof 2>&1 | FileCheck -check-prefix=BAD-SAMPLES %s
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/bad_mangle.prof 2>&1 >/dev/null
+
define void @empty() {
entry:
ret void
diff --git a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll b/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
deleted file mode 100644
index 336c0a9dfa6b..000000000000
--- a/test/Transforms/ScalarRepl/2003-05-29-ArrayFail.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-; Test that an array is not incorrectly deconstructed.
-
-define i32 @test() nounwind {
- %X = alloca [4 x i32] ; <[4 x i32]*> [#uses=1]
- %Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0 ; <i32*> [#uses=1]
- ; Must preserve arrayness!
- %Z = getelementptr i32, i32* %Y, i64 1 ; <i32*> [#uses=1]
- %A = load i32, i32* %Z ; <i32> [#uses=1]
- ret i32 %A
-}
diff --git a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll b/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
deleted file mode 100644
index c5ca428be4bc..000000000000
--- a/test/Transforms/ScalarRepl/2003-09-12-IncorrectPromote.ll
+++ /dev/null
@@ -1,12 +0,0 @@
-; Scalar replacement was incorrectly promoting this alloca!!
-;
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-define i8* @test() {
- %A = alloca [30 x i8] ; <[30 x i8]*> [#uses=1]
- %B = getelementptr [30 x i8], [30 x i8]* %A, i64 0, i64 0 ; <i8*> [#uses=2]
- %C = getelementptr i8, i8* %B, i64 1 ; <i8*> [#uses=1]
- store i8 0, i8* %B
- ret i8* %C
-}
-; CHECK: alloca [
diff --git a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll b/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
deleted file mode 100644
index ce652737bc1e..000000000000
--- a/test/Transforms/ScalarRepl/2003-10-29-ArrayProblem.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep "alloca %%T"
-
-%T = type { [80 x i8], i32, i32 }
-declare i32 @.callback_1(i8*)
-
-declare void @.iter_2(i32 (i8*)*, i8*)
-
-define i32 @main() {
- %d = alloca %T ; <{ [80 x i8], i32, i32 }*> [#uses=2]
- %tmp.0 = getelementptr %T, %T* %d, i64 0, i32 2 ; <i32*> [#uses=1]
- store i32 0, i32* %tmp.0
- %tmp.1 = getelementptr %T, %T* %d, i64 0, i32 0, i64 0 ; <i8*> [#uses=1]
- call void @.iter_2( i32 (i8*)* @.callback_1, i8* %tmp.1 )
- ret i32 0
-}
-
diff --git a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll b/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
deleted file mode 100644
index 2701fdaea516..000000000000
--- a/test/Transforms/ScalarRepl/2006-11-07-InvalidArrayPromote.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @func(<4 x float> %v0, <4 x float> %v1) nounwind {
- %vsiidx = alloca [2 x <4 x i32>], align 16 ; <[2 x <4 x i32>]*> [#uses=3]
- %tmp = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v0 ) ; <<4 x i32>> [#uses=2]
- %tmp.upgrd.1 = bitcast <4 x i32> %tmp to <2 x i64> ; <<2 x i64>> [#uses=0]
- %tmp.upgrd.2 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0 ; <<4 x i32>*> [#uses=1]
- store <4 x i32> %tmp, <4 x i32>* %tmp.upgrd.2
- %tmp10 = call <4 x i32> @llvm.x86.sse2.cvttps2dq( <4 x float> %v1 ) ; <<4 x i32>> [#uses=2]
- %tmp10.upgrd.3 = bitcast <4 x i32> %tmp10 to <2 x i64> ; <<2 x i64>> [#uses=0]
- %tmp14 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 1 ; <<4 x i32>*> [#uses=1]
- store <4 x i32> %tmp10, <4 x i32>* %tmp14
- %tmp15 = getelementptr [2 x <4 x i32>], [2 x <4 x i32>]* %vsiidx, i32 0, i32 0, i32 4 ; <i32*> [#uses=1]
- %tmp.upgrd.4 = load i32, i32* %tmp15 ; <i32> [#uses=1]
- ret i32 %tmp.upgrd.4
-}
-
-declare <4 x i32> @llvm.x86.sse2.cvttps2dq(<4 x float>)
-
diff --git a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll b/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
deleted file mode 100644
index 966b17939fec..000000000000
--- a/test/Transforms/ScalarRepl/2007-05-29-MemcpyPreserve.ll
+++ /dev/null
@@ -1,24 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep memcpy
-; PR1421
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
-
-%struct.LongestMember = type { i8, i32 }
-%struct.MyString = type { i32 }
-%struct.UnionType = type { %struct.LongestMember }
-
-define void @_Z4testP9UnionTypePS0_(%struct.UnionType* %p, %struct.UnionType** %pointerToUnion) {
-entry:
- %tmp = alloca %struct.UnionType, align 8
- %tmp2 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
- %tmp13 = getelementptr %struct.UnionType, %struct.UnionType* %p, i32 0, i32 0, i32 0
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp13, i32 8, i32 0, i1 false)
- %tmp5 = load %struct.UnionType*, %struct.UnionType** %pointerToUnion
- %tmp56 = getelementptr %struct.UnionType, %struct.UnionType* %tmp5, i32 0, i32 0, i32 0
- %tmp7 = getelementptr %struct.UnionType, %struct.UnionType* %tmp, i32 0, i32 0, i32 0
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp56, i8* %tmp7, i32 8, i32 0, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll b/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
deleted file mode 100644
index 28f503a210ca..000000000000
--- a/test/Transforms/ScalarRepl/2007-11-03-bigendian_apint.ll
+++ /dev/null
@@ -1,36 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep shr
-
-; FIXME: I think this test is no longer valid.
-; It was working because SROA was aborting when
-; no datalayout was supplied
-; XFAIL: *
-
-
-%struct.S = type { i16 }
-
-define zeroext i1 @f(i16 signext %b) {
-entry:
- %b_addr = alloca i16 ; <i16*> [#uses=2]
- %retval = alloca i32 ; <i32*> [#uses=2]
- %s = alloca %struct.S ; <%struct.S*> [#uses=2]
- %tmp = alloca i32 ; <i32*> [#uses=2]
- %"alloca point" = bitcast i32 0 to i32 ; <i32> [#uses=0]
- store i16 %b, i16* %b_addr
- %tmp1 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0 ; <i16*> [#uses=1]
- %tmp2 = load i16, i16* %b_addr, align 2 ; <i16> [#uses=1]
- store i16 %tmp2, i16* %tmp1, align 2
- %tmp3 = getelementptr %struct.S, %struct.S* %s, i32 0, i32 0 ; <i16*> [#uses=1]
- %tmp34 = bitcast i16* %tmp3 to [2 x i1]* ; <[2 x i1]*> [#uses=1]
- %tmp5 = getelementptr [2 x i1], [2 x i1]* %tmp34, i32 0, i32 1 ; <i1*> [#uses=1]
- %tmp6 = load i1, i1* %tmp5, align 1 ; <i1> [#uses=1]
- %tmp67 = zext i1 %tmp6 to i32 ; <i32> [#uses=1]
- store i32 %tmp67, i32* %tmp, align 4
- %tmp8 = load i32, i32* %tmp, align 4 ; <i32> [#uses=1]
- store i32 %tmp8, i32* %retval, align 4
- br label %return
-
-return: ; preds = %entry
- %retval9 = load i32, i32* %retval ; <i32> [#uses=1]
- %retval910 = trunc i32 %retval9 to i1 ; <i1> [#uses=1]
- ret i1 %retval910
-}
diff --git a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll b/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
deleted file mode 100644
index 99366b36442b..000000000000
--- a/test/Transforms/ScalarRepl/2008-01-29-PromoteBug.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i8 17"
-; rdar://5707076
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-darwin9.1.0"
- %struct.T = type <{ i8, [3 x i8] }>
-
-define i8 @f() {
-entry:
- %s = alloca [1 x %struct.T], align 4 ; <[1 x %struct.T]*> [#uses=2]
- %T3 = bitcast [1 x %struct.T]* %s to i32*
- store i32 -61184, i32* %T3
-
- %tmp16 = getelementptr [1 x %struct.T], [1 x %struct.T]* %s, i32 0, i32 0 ; <%struct.T*> [#uses=1]
- %tmp17 = getelementptr %struct.T, %struct.T* %tmp16, i32 0, i32 1 ; <[3 x i8]*> [#uses=1]
- %tmp1718 = bitcast [3 x i8]* %tmp17 to i32* ; <i32*> [#uses=1]
- %tmp19 = load i32, i32* %tmp1718, align 4 ; <i32> [#uses=1]
- %mask = and i32 %tmp19, 16777215 ; <i32> [#uses=2]
- %mask2324 = trunc i32 %mask to i8 ; <i8> [#uses=1]
- ret i8 %mask2324
-}
-
diff --git a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll b/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
deleted file mode 100644
index f37b6529a546..000000000000
--- a/test/Transforms/ScalarRepl/2008-02-28-SubElementExtractCrash.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i686-apple-darwin8"
- %struct..0anon = type { <1 x i64> }
-
-define i32 @main(i32 %argc, i8** %argv) {
-entry:
- %c = alloca %struct..0anon ; <%struct..0anon*> [#uses=2]
- %tmp2 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0 ; <<1 x i64>*> [#uses=1]
- store <1 x i64> zeroinitializer, <1 x i64>* %tmp2, align 8
- %tmp7 = getelementptr %struct..0anon, %struct..0anon* %c, i32 0, i32 0 ; <<1 x i64>*> [#uses=1]
- %tmp78 = bitcast <1 x i64>* %tmp7 to [2 x i32]* ; <[2 x i32]*> [#uses=1]
- %tmp9 = getelementptr [2 x i32], [2 x i32]* %tmp78, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp10 = load i32, i32* %tmp9, align 4 ; <i32> [#uses=0]
- unreachable
-}
diff --git a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll b/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
deleted file mode 100644
index d1f331211745..000000000000
--- a/test/Transforms/ScalarRepl/2008-06-05-loadstore-agg.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; This test shows an alloca of a struct and an array that can be reduced to
-; multiple variables easily. However, the alloca is used by a store
-; instruction, which was not possible before aggregrates were first class
-; values. This checks of scalarrepl splits up the struct and array properly.
-
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @foo() {
- %target = alloca { i32, i32 } ; <{ i32, i32 }*> [#uses=1]
- ; Build a first class struct to store
- %res1 = insertvalue { i32, i32 } undef, i32 1, 0 ; <{ i32, i32 }> [#uses=1]
- %res2 = insertvalue { i32, i32 } %res1, i32 2, 1 ; <{ i32, i32 }> [#uses=1]
- ; And store it
- store { i32, i32 } %res2, { i32, i32 }* %target
- ; Actually use %target, so it doesn't get removed altogether
- %ptr = getelementptr { i32, i32 }, { i32, i32 }* %target, i32 0, i32 0
- %val = load i32, i32* %ptr
- ret i32 %val
-}
-
-define i32 @bar() {
- %target = alloca [ 2 x i32 ] ; <{ i32, i32 }*> [#uses=1]
- ; Build a first class array to store
- %res1 = insertvalue [ 2 x i32 ] undef, i32 1, 0 ; <{ i32, i32 }> [#uses=1]
- %res2 = insertvalue [ 2 x i32 ] %res1, i32 2, 1 ; <{ i32, i32 }> [#uses=1]
- ; And store it
- store [ 2 x i32 ] %res2, [ 2 x i32 ]* %target
- ; Actually use %target, so it doesn't get removed altogether
- %ptr = getelementptr [ 2 x i32 ], [ 2 x i32 ]* %target, i32 0, i32 0
- %val = load i32, i32* %ptr
- ret i32 %val
-}
diff --git a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll b/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
deleted file mode 100644
index f597613ef2b7..000000000000
--- a/test/Transforms/ScalarRepl/2008-06-22-LargeArray.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep "call.*mem"
-; PR2369
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-
-define void @memtest1(i8* %dst, i8* %src) nounwind {
-entry:
- %temp = alloca [200 x i8]
- %temp1 = bitcast [200 x i8]* %temp to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %temp1, i8* %src, i32 200, i32 1, i1 false)
- %temp3 = bitcast [200 x i8]* %temp to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dst, i8* %temp3, i32 200, i32 1, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll b/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
deleted file mode 100644
index c0ff25f3541c..000000000000
--- a/test/Transforms/ScalarRepl/2008-08-22-out-of-range-array-promote.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | grep "s = alloca .struct.x"
-; PR2423
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin8"
-
-%struct.x = type { [1 x i32], i32, i32 }
-
-define i32 @b() nounwind {
-entry:
- %s = alloca %struct.x
- %r = alloca %struct.x
- %0 = call i32 @a(%struct.x* %s) nounwind
- %r1 = bitcast %struct.x* %r to i8*
- %s2 = bitcast %struct.x* %s to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %r1, i8* %s2, i32 12, i32 8, i1 false)
- %1 = getelementptr %struct.x, %struct.x* %r, i32 0, i32 0, i32 1
- %2 = load i32, i32* %1, align 4
- ret i32 %2
-}
-
-declare i32 @a(%struct.x*)
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll b/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
deleted file mode 100644
index 16d9108bb24c..000000000000
--- a/test/Transforms/ScalarRepl/2008-09-22-vector-gep.ll
+++ /dev/null
@@ -1,25 +0,0 @@
-; This test checks to see if scalarrepl also works when a gep with all zeroes is
-; used instead of a bitcast to prepare a memmove pointer argument. Previously,
-; this would not work when there was a vector involved in the struct, preventing
-; scalarrepl from removing the alloca below.
-
-; RUN: opt < %s -scalarrepl -S > %t
-; RUN: cat %t | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-%struct.two = type <{ < 2 x i8 >, i16 }>
-
-define void @main(%struct.two* %D, i16 %V) {
-entry:
- %S = alloca %struct.two
- %S.2 = getelementptr %struct.two, %struct.two* %S, i32 0, i32 1
- store i16 %V, i16* %S.2
- ; This gep is effectively a bitcast to i8*, but is sometimes generated
- ; because the type of the first element in %struct.two is i8.
- %tmpS = getelementptr %struct.two, %struct.two* %S, i32 0, i32 0, i32 0
- %tmpD = bitcast %struct.two* %D to i8*
- call void @llvm.memmove.p0i8.p0i8.i32(i8* %tmpD, i8* %tmpS, i32 4, i32 1, i1 false)
- ret void
-}
-
-declare void @llvm.memmove.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll b/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
deleted file mode 100644
index f0af1caa4610..000000000000
--- a/test/Transforms/ScalarRepl/2009-02-02-ScalarPromoteOutOfRange.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i32 %x"
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32"
-target triple = "i386-pc-linux-gnu"
-
-%pair = type { [1 x i32], i32 }
-
-define i32 @f(i32 %x, i32 %y) {
- %instance = alloca %pair
- %first = getelementptr %pair, %pair* %instance, i32 0, i32 0
- %cast = bitcast [1 x i32]* %first to i32*
- store i32 %x, i32* %cast
- %second = getelementptr %pair, %pair* %instance, i32 0, i32 1
- store i32 %y, i32* %second
- %v = load i32, i32* %cast
- ret i32 %v
-}
diff --git a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll b/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
deleted file mode 100644
index 56375ffe7933..000000000000
--- a/test/Transforms/ScalarRepl/2009-02-05-LoadFCA.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -scalarrepl -instcombine -inline -instcombine -S | grep "ret i32 42"
-; PR3489
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "x86_64-apple-darwin10.0"
- %struct.anon = type <{ i32, i32, i32 }>
-
-define i32 @f({ i64, i64 }) nounwind {
-entry:
- %tmp = alloca { i64, i64 }, align 8 ; <{ i64, i64 }*> [#uses=2]
- store { i64, i64 } %0, { i64, i64 }* %tmp
- %1 = bitcast { i64, i64 }* %tmp to %struct.anon* ; <%struct.anon*> [#uses=1]
- %2 = load %struct.anon, %struct.anon* %1, align 8 ; <%struct.anon> [#uses=1]
- %tmp3 = extractvalue %struct.anon %2, 0
- ret i32 %tmp3
-}
-
-define i32 @g() {
- %a = call i32 @f({i64,i64} { i64 42, i64 1123123123123123 })
- ret i32 %a
-}
diff --git a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll b/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
deleted file mode 100644
index 025578c7f44d..000000000000
--- a/test/Transforms/ScalarRepl/2009-03-04-MemCpyAlign.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; The store into %p should end up with a known alignment of 1, since the memcpy
-; is only known to access it with 1-byte alignment.
-; RUN: opt < %s -scalarrepl -S | grep "store i16 1, .*, align 1"
-; PR3720
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
- %struct.st = type { i16 }
-
-define void @f(i8* %p) nounwind {
-entry:
- %s = alloca %struct.st, align 4 ; <%struct.st*> [#uses=2]
- %0 = getelementptr %struct.st, %struct.st* %s, i32 0, i32 0 ; <i16*> [#uses=1]
- store i16 1, i16* %0, align 4
- %s1 = bitcast %struct.st* %s to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %p, i8* %s1, i32 2, i32 1, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll b/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
deleted file mode 100644
index d1cc4244ccfc..000000000000
--- a/test/Transforms/ScalarRepl/2009-12-11-NeonTypes.ll
+++ /dev/null
@@ -1,90 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; Radar 7441282
-
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
-
-%struct.__neon_int16x8x2_t = type { <8 x i16>, <8 x i16> }
-%struct.int16x8_t = type { <8 x i16> }
-%struct.int16x8x2_t = type { [2 x %struct.int16x8_t] }
-%union..0anon = type { %struct.int16x8x2_t }
-
-define void @test(<8 x i16> %tmp.0, %struct.int16x8x2_t* %dst) nounwind {
-; CHECK-LABEL: @test(
-; CHECK-NOT: alloca
-; CHECK: "alloca point"
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-
-entry:
- %tmp_addr = alloca %struct.int16x8_t
- %dst_addr = alloca %struct.int16x8x2_t*
- %__rv = alloca %union..0anon
- %__bx = alloca %struct.int16x8_t
- %__ax = alloca %struct.int16x8_t
- %tmp2 = alloca %struct.int16x8x2_t
- %0 = alloca %struct.int16x8x2_t
- %"alloca point" = bitcast i32 0 to i32
- %1 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
- store <8 x i16> %tmp.0, <8 x i16>* %1
- store %struct.int16x8x2_t* %dst, %struct.int16x8x2_t** %dst_addr
- %2 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
- %3 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
- %4 = load <8 x i16>, <8 x i16>* %3, align 16
- store <8 x i16> %4, <8 x i16>* %2, align 16
- %5 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
- %6 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %tmp_addr, i32 0, i32 0
- %7 = load <8 x i16>, <8 x i16>* %6, align 16
- store <8 x i16> %7, <8 x i16>* %5, align 16
- %8 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__ax, i32 0, i32 0
- %9 = load <8 x i16>, <8 x i16>* %8, align 16
- %10 = getelementptr inbounds %struct.int16x8_t, %struct.int16x8_t* %__bx, i32 0, i32 0
- %11 = load <8 x i16>, <8 x i16>* %10, align 16
- %12 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
- %13 = bitcast %struct.int16x8x2_t* %12 to %struct.__neon_int16x8x2_t*
- %14 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
- %15 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 0
- store <8 x i16> %14, <8 x i16>* %15
- %16 = shufflevector <8 x i16> %9, <8 x i16> %11, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
- %17 = getelementptr inbounds %struct.__neon_int16x8x2_t, %struct.__neon_int16x8x2_t* %13, i32 0, i32 1
- store <8 x i16> %16, <8 x i16>* %17
- %18 = getelementptr inbounds %union..0anon, %union..0anon* %__rv, i32 0, i32 0
- %19 = bitcast %struct.int16x8x2_t* %0 to i8*
- %20 = bitcast %struct.int16x8x2_t* %18 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %19, i8* %20, i32 32, i32 16, i1 false)
- %tmp21 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
- %21 = bitcast %struct.int16x8x2_t* %0 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp21, i8* %21, i32 32, i32 16, i1 false)
- %22 = load %struct.int16x8x2_t*, %struct.int16x8x2_t** %dst_addr, align 4
- %23 = bitcast %struct.int16x8x2_t* %22 to i8*
- %tmp22 = bitcast %struct.int16x8x2_t* %tmp2 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %23, i8* %tmp22, i32 32, i32 16, i1 false)
- br label %return
-
-return: ; preds = %entry
- ret void
-}
-
-; Radar 7466574
-%struct._NSRange = type { i64 }
-
-define void @test_memcpy_self() nounwind {
-entry:
- %range = alloca %struct._NSRange
- br i1 undef, label %cond.true, label %cond.false
-
-cond.true: ; preds = %entry
- %tmp3 = bitcast %struct._NSRange* %range to i8*
- %tmp4 = bitcast %struct._NSRange* %range to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp3, i8* %tmp4, i32 8, i32 8, i1 false)
- ret void
-
-cond.false: ; preds = %entry
- ret void
-
-; CHECK-LABEL: @test_memcpy_self(
-; CHECK-NOT: alloca
-; CHECK: br i1
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll b/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
deleted file mode 100644
index b926b021caf1..000000000000
--- a/test/Transforms/ScalarRepl/2010-01-18-SelfCopy.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; Radar 7552893
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-
-%struct.test = type { [3 x double] }
-
-define void @test_memcpy_self() nounwind {
-; CHECK-LABEL: @test_memcpy_self(
-; CHECK-NOT: alloca
-; CHECK: ret void
- %1 = alloca %struct.test
- %2 = bitcast %struct.test* %1 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %2, i8* %2, i32 24, i32 4, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll b/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
deleted file mode 100644
index 997d03b059e9..000000000000
--- a/test/Transforms/ScalarRepl/2011-05-06-CapturedAlloca.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -instcombine -S | FileCheck %s
-; PR9820
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-@func_1.l_10 = internal unnamed_addr constant [4 x i32] [i32 1, i32 0, i32 0, i32 0], align 16
-
-define i32* @noop(i32* %p_29) nounwind readnone {
-entry:
- ret i32* %p_29
-}
-
-define i32 @main() nounwind {
-entry:
- %l_10 = alloca [4 x i32], align 16
- %tmp = bitcast [4 x i32]* %l_10 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* bitcast ([4 x i32]* @func_1.l_10 to i8*), i64 16, i32 16, i1 false)
-; CHECK: call void @llvm.memcpy
- %arrayidx = getelementptr inbounds [4 x i32], [4 x i32]* %l_10, i64 0, i64 0
- %call = call i32* @noop(i32* %arrayidx)
- store i32 0, i32* %call
- ret i32 0
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll b/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
deleted file mode 100644
index dee27f8e3068..000000000000
--- a/test/Transforms/ScalarRepl/2011-06-08-VectorExtractValue.ll
+++ /dev/null
@@ -1,75 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-; RUN: opt < %s -S -scalarrepl-ssa | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.7.0"
-
-%0 = type { <2 x float>, float }
-%struct.PointC3 = type { %struct.array }
-%struct.Point_3 = type { %struct.PointC3 }
-%struct.array = type { [3 x float], [4 x i8] }
-
-; CHECK: main
-; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer, i32 0
-
-define void @main() uwtable ssp {
-entry:
- %ref.tmp2 = alloca %0, align 16
- %tmpcast = bitcast %0* %ref.tmp2 to %struct.Point_3*
- %0 = getelementptr %0, %0* %ref.tmp2, i64 0, i32 0
- store <2 x float> zeroinitializer, <2 x float>* %0, align 16
- %1 = getelementptr inbounds %struct.Point_3, %struct.Point_3* %tmpcast, i64 0, i32 0
- %base.i.i.i = getelementptr inbounds %struct.PointC3, %struct.PointC3* %1, i64 0, i32 0
- %arrayidx.i.i.i.i = getelementptr inbounds %struct.array, %struct.array* %base.i.i.i, i64 0, i32 0, i64 0
- %tmp5.i.i = load float, float* %arrayidx.i.i.i.i, align 4
- ret void
-}
-
-; CHECK: test1
-; CHECK-NOT: alloca
-; CHECK: extractelement <2 x float> zeroinitializer, i32 0
-
-define void @test1() uwtable ssp {
-entry:
- %ref.tmp2 = alloca {<2 x float>, float}, align 16
- %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
- %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
- store <2 x float> zeroinitializer, <2 x float>* %0, align 16
- %tmp5.i.i = load float, float* %tmpcast, align 4
- ret void
-}
-
-; CHECK: test2
-; CHECK-NOT: alloca
-; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> zeroinitializer, i32 0
-; CHECK: fadd float %[[A]], 1.000000e+00
-; CHECK-NOT: insertelement
-; CHECK-NOT: extractelement
-
-define float @test2() uwtable ssp {
-entry:
- %ref.tmp2 = alloca {<2 x float>, float}, align 16
- %tmpcast = bitcast {<2 x float>, float}* %ref.tmp2 to float*
- %tmpcast2 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 1
- %0 = getelementptr {<2 x float>, float}, {<2 x float>, float}* %ref.tmp2, i64 0, i32 0
- store <2 x float> zeroinitializer, <2 x float>* %0, align 16
- store float 1.0, float* %tmpcast2, align 4
- %r1 = load float, float* %tmpcast, align 4
- %r2 = load float, float* %tmpcast2, align 4
- %r = fadd float %r1, %r2
- ret float %r
-}
-
-; CHECK: test3
-; CHECK: %[[A:[a-z0-9]*]] = extractelement <2 x float> <float 2.000000e+00, float 3.000000e+00>, i32 1
-; CHECK: ret float %[[A]]
-
-define float @test3() {
-entry:
- %ai = alloca { <2 x float>, <2 x float> }, align 8
- store { <2 x float>, <2 x float> } {<2 x float> <float 0.0, float 1.0>, <2 x float> <float 2.0, float 3.0>}, { <2 x float>, <2 x float> }* %ai, align 8
- %tmpcast = bitcast { <2 x float>, <2 x float> }* %ai to [4 x float]*
- %arrayidx = getelementptr inbounds [4 x float], [4 x float]* %tmpcast, i64 0, i64 3
- %f = load float, float* %arrayidx, align 4
- ret float %f
-}
diff --git a/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll b/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
deleted file mode 100644
index af6d1f36faea..000000000000
--- a/test/Transforms/ScalarRepl/2011-06-17-VectorPartialMemset.ll
+++ /dev/null
@@ -1,37 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:32:64-v128:32:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10"
-
-; CHECK: f
-; CHECK-NOT: alloca
-; CHECK: %[[A:[a-z0-9]*]] = and i128 undef, -16777216
-; CHECK: %[[B:[a-z0-9]*]] = bitcast i128 %[[A]] to <4 x float>
-; CHECK: %[[C:[a-z0-9]*]] = extractelement <4 x float> %[[B]], i32 0
-; CHECK: ret float %[[C]]
-
-define float @f() nounwind ssp {
-entry:
- %a = alloca <4 x float>, align 16
- %p = bitcast <4 x float>* %a to i8*
- call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 3, i32 16, i1 false)
- %vec = load <4 x float>, <4 x float>* %a, align 8
- %val = extractelement <4 x float> %vec, i32 0
- ret float %val
-}
-
-; CHECK: g
-; CHECK-NOT: alloca
-; CHECK: and i128
-
-define void @g() nounwind ssp {
-entry:
- %a = alloca { <4 x float> }, align 16
- %p = bitcast { <4 x float> }* %a to i8*
- call void @llvm.memset.p0i8.i32(i8* %p, i8 0, i32 16, i32 16, i1 false)
- %q = bitcast { <4 x float> }* %a to [2 x <2 x float>]*
- %arrayidx = getelementptr inbounds [2 x <2 x float>], [2 x <2 x float>]* %q, i32 0, i32 0
- store <2 x float> undef, <2 x float>* %arrayidx, align 8
- ret void
-}
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll b/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
deleted file mode 100644
index bff6566d1781..000000000000
--- a/test/Transforms/ScalarRepl/2011-09-22-PHISpeculateInvoke.ll
+++ /dev/null
@@ -1,40 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; PR10987
-
-; Make sure scalarrepl doesn't move a load across an invoke which could
-; modify the loaded value.
-; (The PHI could theoretically be transformed by splitting the critical
-; edge, but scalarrepl doesn't modify the CFG, at least at the moment.)
-
-declare void @extern_fn(i32*)
-declare i32 @extern_fn2(i32)
-declare i32 @__gcc_personality_v0(i32, i64, i8*, i8*)
-
-define void @odd_fn(i1) noinline personality i32 (i32, i64, i8*, i8*)* @__gcc_personality_v0 {
- %retptr1 = alloca i32
- %retptr2 = alloca i32
- br i1 %0, label %then, label %else
-
-then: ; preds = %2
- invoke void @extern_fn(i32* %retptr1)
- to label %join unwind label %unwind
-
-else: ; preds = %2
- store i32 3, i32* %retptr2
- br label %join
-
-join: ; preds = %then, %else
- %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
- %storemerge = load i32, i32* %storemerge.in
- %x3 = call i32 @extern_fn2(i32 %storemerge)
- ret void
-
-unwind: ; preds = %then
- %info = landingpad { i8*, i32 }
- cleanup
- call void @extern_fn(i32* null)
- unreachable
-}
-
-; CHECK-LABEL: define void @odd_fn(
-; CHECK: %storemerge.in = phi i32* [ %retptr2, %else ], [ %retptr1, %then ]
diff --git a/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll b/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
deleted file mode 100644
index 9e3123149297..000000000000
--- a/test/Transforms/ScalarRepl/2011-10-11-VectorMemset.ll
+++ /dev/null
@@ -1,22 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin11.0.1"
-
-; CHECK: test
-; CHECK-NOT: alloca
-
-define void @test() nounwind {
-entry:
- %a156286 = alloca [4 x <4 x float>], align 16
- br i1 undef, label %cif_done, label %for_test158.preheader
-
-for_test158.preheader: ; preds = %entry
- %a156286305 = bitcast [4 x <4 x float>]* %a156286 to i8*
- call void @llvm.memset.p0i8.i64(i8* %a156286305, i8 -1, i64 64, i32 16, i1 false)
- unreachable
-
-cif_done: ; preds = %entry
- ret void
-}
-
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll b/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
deleted file mode 100644
index c9c1a148a484..000000000000
--- a/test/Transforms/ScalarRepl/2011-10-22-VectorCrash.ll
+++ /dev/null
@@ -1,19 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:32:64-v128:32:128-a0:0:32-n32-S32"
-target triple = "thumbv7-apple-ios5.0.0"
-
-%union.anon = type { <4 x float> }
-
-; CHECK-LABEL: @test(
-; CHECK-NOT: alloca
-
-define void @test() nounwind {
-entry:
- %u = alloca %union.anon, align 16
- %u164 = bitcast %union.anon* %u to [4 x i32]*
- %arrayidx165 = getelementptr inbounds [4 x i32], [4 x i32]* %u164, i32 0, i32 0
- store i32 undef, i32* %arrayidx165, align 4
- %v186 = bitcast %union.anon* %u to <4 x float>*
- store <4 x float> undef, <4 x float>* %v186, align 16
- ret void
-}
diff --git a/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll b/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
deleted file mode 100644
index 51d1d146a90b..000000000000
--- a/test/Transforms/ScalarRepl/2011-11-11-EmptyStruct.ll
+++ /dev/null
@@ -1,26 +0,0 @@
-; RUN: opt < %s -S -scalarrepl | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-
-%struct.S = type { [2 x %struct.anon], double }
-%struct.anon = type {}
-
-; CHECK: @test()
-; CHECK-NOT: alloca
-; CHECK: ret double 1.0
-
-define double @test() nounwind uwtable ssp {
-entry:
- %retval = alloca %struct.S, align 8
- %ret = alloca %struct.S, align 8
- %b = getelementptr inbounds %struct.S, %struct.S* %ret, i32 0, i32 1
- store double 1.000000e+00, double* %b, align 8
- %0 = bitcast %struct.S* %retval to i8*
- %1 = bitcast %struct.S* %ret to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* %1, i64 8, i32 8, i1 false)
- %2 = bitcast %struct.S* %retval to double*
- %3 = load double, double* %2, align 1
- ret double %3
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/AggregatePromote.ll b/test/Transforms/ScalarRepl/AggregatePromote.ll
deleted file mode 100644
index f6dfdf55346a..000000000000
--- a/test/Transforms/ScalarRepl/AggregatePromote.ll
+++ /dev/null
@@ -1,51 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | \
-; RUN: not grep alloca
-
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8.0.0"
-
-define i64 @test1(i64 %X) {
- %A = alloca i64 ; <i64*> [#uses=3]
- store i64 %X, i64* %A
- %B = bitcast i64* %A to i32* ; <i32*> [#uses=1]
- %C = bitcast i32* %B to i8* ; <i8*> [#uses=1]
- store i8 0, i8* %C
- %Y = load i64, i64* %A ; <i64> [#uses=1]
- ret i64 %Y
-}
-
-define i8 @test2(i64 %X) {
- %X_addr = alloca i64 ; <i64*> [#uses=2]
- store i64 %X, i64* %X_addr
- %tmp.0 = bitcast i64* %X_addr to i32* ; <i32*> [#uses=1]
- %tmp.1 = getelementptr i32, i32* %tmp.0, i32 1 ; <i32*> [#uses=1]
- %tmp.2 = bitcast i32* %tmp.1 to i8* ; <i8*> [#uses=1]
- %tmp.3 = getelementptr i8, i8* %tmp.2, i32 3 ; <i8*> [#uses=1]
- %tmp.2.upgrd.1 = load i8, i8* %tmp.3 ; <i8> [#uses=1]
- ret i8 %tmp.2.upgrd.1
-}
-
-define i16 @crafty(i64 %X) {
- %a = alloca { i64 } ; <{ i64 }*> [#uses=2]
- %tmp.0 = getelementptr { i64 }, { i64 }* %a, i32 0, i32 0 ; <i64*> [#uses=1]
- store i64 %X, i64* %tmp.0
- %tmp.3 = bitcast { i64 }* %a to [4 x i16]* ; <[4 x i16]*> [#uses=2]
- %tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3 ; <i16*> [#uses=1]
- %tmp.5 = load i16, i16* %tmp.4 ; <i16> [#uses=1]
- %tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2 ; <i16*> [#uses=1]
- %tmp.9 = load i16, i16* %tmp.8 ; <i16> [#uses=1]
- %tmp.10 = or i16 %tmp.9, %tmp.5 ; <i16> [#uses=1]
- ret i16 %tmp.10
-}
-
-define i16 @crafty2(i64 %X) {
- %a = alloca i64 ; <i64*> [#uses=2]
- store i64 %X, i64* %a
- %tmp.3 = bitcast i64* %a to [4 x i16]* ; <[4 x i16]*> [#uses=2]
- %tmp.4 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 3 ; <i16*> [#uses=1]
- %tmp.5 = load i16, i16* %tmp.4 ; <i16> [#uses=1]
- %tmp.8 = getelementptr [4 x i16], [4 x i16]* %tmp.3, i32 0, i32 2 ; <i16*> [#uses=1]
- %tmp.9 = load i16, i16* %tmp.8 ; <i16> [#uses=1]
- %tmp.10 = or i16 %tmp.9, %tmp.5 ; <i16> [#uses=1]
- ret i16 %tmp.10
-}
diff --git a/test/Transforms/ScalarRepl/DifferingTypes.ll b/test/Transforms/ScalarRepl/DifferingTypes.ll
deleted file mode 100644
index 3860f6cd757f..000000000000
--- a/test/Transforms/ScalarRepl/DifferingTypes.ll
+++ /dev/null
@@ -1,16 +0,0 @@
-; This is a feature test. Hopefully one day this will be implemented. The
-; generated code should perform the appropriate masking operations required
-; depending on the endianness of the target...
-; RUN: opt < %s -scalarrepl -S | \
-; RUN: not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @testfunc(i32 %i, i8 %j) {
- %I = alloca i32 ; <i32*> [#uses=3]
- store i32 %i, i32* %I
- %P = bitcast i32* %I to i8* ; <i8*> [#uses=1]
- store i8 %j, i8* %P
- %t = load i32, i32* %I ; <i32> [#uses=1]
- ret i32 %t
-}
-
diff --git a/test/Transforms/ScalarRepl/address-space.ll b/test/Transforms/ScalarRepl/address-space.ll
deleted file mode 100644
index b8b90efefc3b..000000000000
--- a/test/Transforms/ScalarRepl/address-space.ll
+++ /dev/null
@@ -1,35 +0,0 @@
-; RUN: opt -S -scalarrepl < %s | FileCheck %s
-; PR7437 - Make sure SROA preserves address space of memcpy when
-; hacking on it.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10"
-
-%struct.anon = type { [1 x float] }
-
-; CHECK-LABEL: define void @Test(
-; CHECK: load float, float addrspace(2)*
-; CHECK-NEXT: fsub float
-; CHECK: store float {{.*}}, float addrspace(2)*
-define void @Test(%struct.anon addrspace(2)* %pPtr) nounwind {
-entry:
- %s = alloca %struct.anon, align 4 ; <%struct.anon*> [#uses=3]
- %arrayidx = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
- %tmp1 = bitcast %struct.anon* %s to i8* ; <i8*> [#uses=1]
- %tmp2 = bitcast %struct.anon addrspace(2)* %arrayidx to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
- call void @llvm.memcpy.p0i8.p2i8.i64(i8* %tmp1, i8 addrspace(2)* %tmp2, i64 4, i32 4, i1 false)
- %tmp3 = getelementptr inbounds %struct.anon, %struct.anon* %s, i32 0, i32 0 ; <[1 x float]*> [#uses=1]
- %arrayidx4 = getelementptr inbounds [1 x float], [1 x float]* %tmp3, i32 0, i64 0 ; <float*> [#uses=2]
- %tmp5 = load float, float* %arrayidx4 ; <float> [#uses=1]
- %sub = fsub float %tmp5, 5.000000e+00 ; <float> [#uses=1]
- store float %sub, float* %arrayidx4
- %arrayidx7 = getelementptr inbounds %struct.anon, %struct.anon addrspace(2)* %pPtr, i64 0 ; <%struct.anon addrspace(2)*> [#uses=1]
- %tmp8 = bitcast %struct.anon addrspace(2)* %arrayidx7 to i8 addrspace(2)* ; <i8 addrspace(2)*> [#uses=1]
- %tmp9 = bitcast %struct.anon* %s to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* %tmp8, i8* %tmp9, i64 4, i32 4, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p2i8.i64(i8* nocapture, i8 addrspace(2)* nocapture, i64, i32, i1) nounwind
-
-declare void @llvm.memcpy.p2i8.p0i8.i64(i8 addrspace(2)* nocapture, i8* nocapture, i64, i32, i1) nounwind
-
diff --git a/test/Transforms/ScalarRepl/arraytest.ll b/test/Transforms/ScalarRepl/arraytest.ll
deleted file mode 100644
index 486e725fa6a5..000000000000
--- a/test/Transforms/ScalarRepl/arraytest.ll
+++ /dev/null
@@ -1,11 +0,0 @@
-; RUN: opt < %s -scalarrepl -mem2reg -S | not grep alloca
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @test() {
- %X = alloca [4 x i32] ; <[4 x i32]*> [#uses=1]
- %Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 0 ; <i32*> [#uses=2]
- store i32 0, i32* %Y
- %Z = load i32, i32* %Y ; <i32> [#uses=1]
- ret i32 %Z
-}
-
diff --git a/test/Transforms/ScalarRepl/badarray.ll b/test/Transforms/ScalarRepl/badarray.ll
deleted file mode 100644
index 6f5bc95b43ba..000000000000
--- a/test/Transforms/ScalarRepl/badarray.ll
+++ /dev/null
@@ -1,57 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
-
-
-; PR3466
-; Off end of array, don't transform.
-define i32 @test1() {
-; CHECK-LABEL: @test1(
-; CHECK-NOT: = alloca
- %X = alloca [4 x i32]
- %Y = getelementptr [4 x i32], [4 x i32]* %X, i64 0, i64 6 ; <i32*> [#uses=2]
- store i32 0, i32* %Y
- %Z = load i32, i32* %Y ; <i32> [#uses=1]
- ret i32 %Z
-}
-
-
-; Off end of array, don't transform.
-define i32 @test2() nounwind {
-entry:
-; CHECK-LABEL: @test2(
-; CHECK-NOT: = alloca
- %yx2.i = alloca float, align 4 ; <float*> [#uses=1]
- %yx26.i = bitcast float* %yx2.i to i64* ; <i64*> [#uses=1]
- %0 = load i64, i64* %yx26.i, align 8 ; <i64> [#uses=0]
- unreachable
-}
-
-%base = type { i32, [0 x i8] }
-%padded = type { %base, [1 x i32] }
-
-; PR5436
-define void @test3() {
-entry:
-; CHECK-LABEL: @test3(
-; CHECK-NOT: = alloca
-; CHECK: store i64
- %var_1 = alloca %padded, align 8 ; <%padded*> [#uses=3]
- %0 = getelementptr inbounds %padded, %padded* %var_1, i32 0, i32 0 ; <%base*> [#uses=2]
-
- %p2 = getelementptr inbounds %base, %base* %0, i32 0, i32 1, i32 0 ; <i8*> [#uses=1]
- store i8 72, i8* %p2, align 1
-
- ; 72 -> a[0].
-
- %callret = call %padded *@test3f() ; <i32> [#uses=2]
- %callretcast = bitcast %padded* %callret to i8* ; <i8*> [#uses=1]
- %var_11 = bitcast %padded* %var_1 to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %callretcast, i8* %var_11, i32 8, i32 4, i1 false)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-
-declare %padded* @test3f()
diff --git a/test/Transforms/ScalarRepl/basictest.ll b/test/Transforms/ScalarRepl/basictest.ll
deleted file mode 100644
index 35d4d3ba86fe..000000000000
--- a/test/Transforms/ScalarRepl/basictest.ll
+++ /dev/null
@@ -1,30 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @test1() {
- %X = alloca { i32, float } ; <{ i32, float }*> [#uses=1]
- %Y = getelementptr { i32, float }, { i32, float }* %X, i64 0, i32 0 ; <i32*> [#uses=2]
- store i32 0, i32* %Y
- %Z = load i32, i32* %Y ; <i32> [#uses=1]
- ret i32 %Z
-; CHECK-LABEL: @test1(
-; CHECK-NOT: alloca
-; CHECK: ret i32 0
-}
-
-; PR8980
-define i64 @test2(i64 %X) {
- %A = alloca [8 x i8]
- %B = bitcast [8 x i8]* %A to i64*
-
- store i64 %X, i64* %B
- br label %L2
-
-L2:
- %Z = load i64, i64* %B ; <i32> [#uses=1]
- ret i64 %Z
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: ret i64 %X
-}
-
diff --git a/test/Transforms/ScalarRepl/bitfield-sroa.ll b/test/Transforms/ScalarRepl/bitfield-sroa.ll
deleted file mode 100644
index 52986b0a49d7..000000000000
--- a/test/Transforms/ScalarRepl/bitfield-sroa.ll
+++ /dev/null
@@ -1,17 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; rdar://6532315
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-%t = type { { i32, i16, i8, i8 } }
-
-define i8 @foo(i64 %A) {
- %ALL = alloca %t, align 8
- %tmp59172 = bitcast %t* %ALL to i64*
- store i64 %A, i64* %tmp59172, align 8
- %C = getelementptr %t, %t* %ALL, i32 0, i32 0, i32 1
- %D = bitcast i16* %C to i32*
- %E = load i32, i32* %D, align 4
- %F = bitcast %t* %ALL to i8*
- %G = load i8, i8* %F, align 8
- ret i8 %G
-}
-
diff --git a/test/Transforms/ScalarRepl/copy-aggregate.ll b/test/Transforms/ScalarRepl/copy-aggregate.ll
deleted file mode 100644
index 97977dbf11f1..000000000000
--- a/test/Transforms/ScalarRepl/copy-aggregate.ll
+++ /dev/null
@@ -1,107 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; PR3290
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-;; Store of integer to whole alloca struct.
-define i32 @test1(i64 %V) nounwind {
-; CHECK: test1
-; CHECK-NOT: alloca
- %X = alloca {{i32, i32}}
- %Y = bitcast {{i32,i32}}* %X to i64*
- store i64 %V, i64* %Y
-
- %A = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 0
- %B = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 1
- %a = load i32, i32* %A
- %b = load i32, i32* %B
- %c = add i32 %a, %b
- ret i32 %c
-}
-
-;; Store of integer to whole struct/array alloca.
-define float @test2(i128 %V) nounwind {
-; CHECK: test2
-; CHECK-NOT: alloca
- %X = alloca {[4 x float]}
- %Y = bitcast {[4 x float]}* %X to i128*
- store i128 %V, i128* %Y
-
- %A = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 0
- %B = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 3
- %a = load float, float* %A
- %b = load float, float* %B
- %c = fadd float %a, %b
- ret float %c
-}
-
-;; Load of whole alloca struct as integer
-define i64 @test3(i32 %a, i32 %b) nounwind {
-; CHECK: test3
-; CHECK-NOT: alloca
- %X = alloca {{i32, i32}}
-
- %A = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 0
- %B = getelementptr {{i32,i32}}, {{i32,i32}}* %X, i32 0, i32 0, i32 1
- store i32 %a, i32* %A
- store i32 %b, i32* %B
-
- %Y = bitcast {{i32,i32}}* %X to i64*
- %Z = load i64, i64* %Y
- ret i64 %Z
-}
-
-;; load of integer from whole struct/array alloca.
-define i128 @test4(float %a, float %b) nounwind {
-; CHECK: test4
-; CHECK-NOT: alloca
- %X = alloca {[4 x float]}
- %A = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 0
- %B = getelementptr {[4 x float]}, {[4 x float]}* %X, i32 0, i32 0, i32 3
- store float %a, float* %A
- store float %b, float* %B
-
- %Y = bitcast {[4 x float]}* %X to i128*
- %V = load i128, i128* %Y
- ret i128 %V
-}
-
-;; If the elements of a struct or array alloca contain padding, SROA can still
-;; split up the alloca as long as there is no padding between the elements.
-%padded = type { i16, i8 }
-define void @test5([4 x %padded]* %p, [4 x %padded]* %q) {
-entry:
-; CHECK: test5
-; CHECK-NOT: i128
- %var = alloca [4 x %padded], align 4
- %vari8 = bitcast [4 x %padded]* %var to i8*
- %pi8 = bitcast [4 x %padded]* %p to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %vari8, i8* %pi8, i32 16, i32 4, i1 false)
- %qi8 = bitcast [4 x %padded]* %q to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %qi8, i8* %vari8, i32 16, i32 4, i1 false)
- ret void
-}
-
-;; Check that an array alloca can be split up when it is also accessed with
-;; a load or store as a homogeneous structure with the same element type and
-;; number of elements as the array.
-%homogeneous = type { <8 x i16>, <8 x i16>, <8 x i16> }
-%wrapped_array = type { [3 x <8 x i16>] }
-define void @test6(i8* %p, %wrapped_array* %arr) {
-entry:
-; CHECK: test6
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
-; CHECK: store <8 x i16>
- %var = alloca %wrapped_array, align 16
- %res = call %homogeneous @test6callee(i8* %p)
- %varcast = bitcast %wrapped_array* %var to %homogeneous*
- store %homogeneous %res, %homogeneous* %varcast
- %tmp1 = bitcast %wrapped_array* %arr to i8*
- %tmp2 = bitcast %wrapped_array* %var to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp1, i8* %tmp2, i32 48, i32 16, i1 false)
- ret void
-}
-
-declare %homogeneous @test6callee(i8* nocapture) nounwind
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/crash.ll b/test/Transforms/ScalarRepl/crash.ll
deleted file mode 100644
index 72e9f090fb68..000000000000
--- a/test/Transforms/ScalarRepl/crash.ll
+++ /dev/null
@@ -1,286 +0,0 @@
-; RUN: opt -scalarrepl -disable-output < %s
-; RUN: opt -scalarrepl-ssa -disable-output < %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-; PR9017
-define void @test1() nounwind readnone ssp {
-entry:
- %l_72 = alloca i32*, align 8
- unreachable
-
-for.cond: ; preds = %for.cond
- %tmp1.i = load i32*, i32** %l_72, align 8
- store i32* %tmp1.i, i32** %l_72, align 8
- br label %for.cond
-
-if.end: ; No predecessors!
- ret void
-}
-
-
-define void @test2() {
- %E = alloca { { i32, float, double, i64 }, { i32, float, double, i64 } } ; <{ { i32, float, double, i64 }, { i32, float, double, i64 } }*> [#uses=1]
- %tmp.151 = getelementptr { { i32, float, double, i64 }, { i32, float, double, i64 } }, { { i32, float, double, i64 }, { i32, float, double, i64 } }* %E, i64 0, i32 1, i32 3 ; <i64*> [#uses=0]
- ret void
-}
-
-define i32 @test3() {
- %X = alloca { [4 x i32] } ; <{ [4 x i32] }*> [#uses=1]
- %Y = getelementptr { [4 x i32] }, { [4 x i32] }* %X, i64 0, i32 0, i64 2 ; <i32*> [#uses=2]
- store i32 4, i32* %Y
- %Z = load i32, i32* %Y ; <i32> [#uses=1]
- ret i32 %Z
-}
-
-
-%struct.rtx_def = type { [2 x i8], i32, [1 x %union.rtunion_def] }
-%union.rtunion_def = type { i32 }
-
-define void @test4() {
-entry:
- %c_addr.i = alloca i8 ; <i8*> [#uses=1]
- switch i32 0, label %return [
- i32 36, label %label.7
- i32 34, label %label.7
- i32 41, label %label.5
- ]
-label.5: ; preds = %entry
- ret void
-label.7: ; preds = %entry, %entry
- br i1 false, label %then.4, label %switchexit.0
-then.4: ; preds = %label.7
- %tmp.0.i = bitcast i8* %c_addr.i to i32* ; <i32*> [#uses=1]
- store i32 44, i32* %tmp.0.i
- ret void
-switchexit.0: ; preds = %label.7
- ret void
-return: ; preds = %entry
- ret void
-}
-
-
-define void @test5() {
-entry:
- %source_ptr = alloca i8*, align 4 ; <i8**> [#uses=2]
- br i1 false, label %bb1357, label %cond_next583
-cond_next583: ; preds = %entry
- ret void
-bb1357: ; preds = %entry
- br i1 false, label %bb1365, label %bb27055
-bb1365: ; preds = %bb1357
- switch i32 0, label %cond_next10377 [
- i32 0, label %bb4679
- i32 1, label %bb4679
- i32 2, label %bb4679
- i32 3, label %bb4679
- i32 4, label %bb5115
- i32 5, label %bb6651
- i32 6, label %bb7147
- i32 7, label %bb8683
- i32 8, label %bb9131
- i32 9, label %bb9875
- i32 10, label %bb4679
- i32 11, label %bb4859
- i32 12, label %bb4679
- i32 16, label %bb10249
- ]
-bb4679: ; preds = %bb1365, %bb1365, %bb1365, %bb1365, %bb1365, %bb1365
- ret void
-bb4859: ; preds = %bb1365
- ret void
-bb5115: ; preds = %bb1365
- ret void
-bb6651: ; preds = %bb1365
- ret void
-bb7147: ; preds = %bb1365
- ret void
-bb8683: ; preds = %bb1365
- ret void
-bb9131: ; preds = %bb1365
- ret void
-bb9875: ; preds = %bb1365
- %source_ptr9884 = bitcast i8** %source_ptr to i8** ; <i8**> [#uses=1]
- %tmp9885 = load i8*, i8** %source_ptr9884 ; <i8*> [#uses=0]
- ret void
-bb10249: ; preds = %bb1365
- %source_ptr10257 = bitcast i8** %source_ptr to i16** ; <i16**> [#uses=1]
- %tmp10258 = load i16*, i16** %source_ptr10257 ; <i16*> [#uses=0]
- ret void
-cond_next10377: ; preds = %bb1365
- ret void
-bb27055: ; preds = %bb1357
- ret void
-}
-
-
- %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>" = type { %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"* }
- %"struct.__gnu_cxx::bitmap_allocator<char>" = type { i8 }
- %"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block" = type { [8 x i8] }
-
-; PR1045
-define void @test6() {
-entry:
- %this_addr.i = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"**> [#uses=3]
- %tmp = alloca %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", align 4 ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
- store %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i
- %tmp.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
- %tmp.i.upgrd.1 = bitcast %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp.i to %"struct.__gnu_cxx::bitmap_allocator<char>"* ; <%"struct.__gnu_cxx::bitmap_allocator<char>"*> [#uses=0]
- %tmp1.i = load %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*, %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"** %this_addr.i ; <%"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"*> [#uses=1]
- %tmp.i.upgrd.2 = getelementptr %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>", %"struct.__gnu_cxx::balloc::_Inclusive_between<__gnu_cxx::bitmap_allocator<char>::_Alloc_block*>"* %tmp1.i, i32 0, i32 0 ; <%"struct.__gnu_cxx::bitmap_allocator<char>::_Alloc_block"**> [#uses=0]
- unreachable
-}
-
- %struct.CGPoint = type { float, float }
- %struct.aal_big_range_t = type { i32, i32 } %struct.aal_callback_t = type { i8* (i8*, i32)*, void (i8*, i8*)* } %struct.aal_edge_pool_t = type { %struct.aal_edge_pool_t*, i32, i32, [0 x %struct.aal_edge_t] } %struct.aal_edge_t = type { %struct.CGPoint, %struct.CGPoint, i32 }
- %struct.aal_range_t = type { i16, i16 }
- %struct.aal_span_pool_t = type { %struct.aal_span_pool_t*, [341 x %struct.aal_span_t] }
- %struct.aal_span_t = type { %struct.aal_span_t*, %struct.aal_big_range_t }
- %struct.aal_spanarray_t = type { [2 x %struct.aal_range_t] }
- %struct.aal_spanbucket_t = type { i16, [2 x i8], %struct.anon }
- %struct.aal_state_t = type { %struct.CGPoint, %struct.CGPoint, %struct.CGPoint, i32, float, float, float, float, %struct.CGPoint, %struct.CGPoint, float, float, float, float, i32, i32, i32, i32, float, float, i8*, i32, i32, %struct.aal_edge_pool_t*, %struct.aal_edge_pool_t*, i8*, %struct.aal_callback_t*, i32, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_t*, %struct.aal_span_pool_t*, i8, float, i8, i32 }
- %struct.anon = type { %struct.aal_spanarray_t }
-
-
-
-define fastcc void @test7() {
-entry:
- %SB = alloca %struct.aal_spanbucket_t, align 4 ; <%struct.aal_spanbucket_t*> [#uses=2]
- br i1 false, label %cond_true, label %cond_next79
-
-cond_true: ; preds = %entry
- br i1 false, label %cond_next, label %cond_next114.i
-
-cond_next114.i: ; preds = %cond_true
- ret void
-
-cond_next: ; preds = %cond_true
- %SB19 = bitcast %struct.aal_spanbucket_t* %SB to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %SB19, i8* null, i32 12, i32 0, i1 false)
- br i1 false, label %cond_next34, label %cond_next79
-
-cond_next34: ; preds = %cond_next
- %i.2.reload22 = load i32, i32* null ; <i32> [#uses=1]
- %tmp51 = getelementptr %struct.aal_spanbucket_t, %struct.aal_spanbucket_t* %SB, i32 0, i32 2, i32 0, i32 0, i32 %i.2.reload22, i32 1
- ; <i16*> [#uses=0]
- ret void
-
-cond_next79: ; preds = %cond_next, %entry
- ret void
-}
-
-
- %struct.c37304a__vrec = type { i8, %struct.c37304a__vrec___disc___XVN }
- %struct.c37304a__vrec___disc___XVN = type {
-%struct.c37304a__vrec___disc___XVN___O }
- %struct.c37304a__vrec___disc___XVN___O = type { }
-
-; PR3304
-define void @test8() {
-entry:
- %v = alloca %struct.c37304a__vrec
- %0 = getelementptr %struct.c37304a__vrec, %struct.c37304a__vrec* %v, i32 0, i32 0
- store i8 8, i8* %0, align 1
- unreachable
-}
-
-
-
-; rdar://6808691 - ZeroLengthMemSet
- %0 = type <{ i32, i16, i8, i8, i64, i64, i16, [0 x i16] }>
-
-define i32 @test9() {
-entry:
- %.compoundliteral = alloca %0
- %tmp228 = getelementptr %0, %0* %.compoundliteral, i32 0, i32 7
- %tmp229 = bitcast [0 x i16]* %tmp228 to i8*
- call void @llvm.memset.p0i8.i64(i8* %tmp229, i8 0, i64 0, i32 2, i1 false)
- unreachable
-}
-
-declare void @llvm.memset.i64(i8* nocapture, i8, i64, i32) nounwind
-
-
-; PR4146 - i1 handling
-%wrapper = type { i1 }
-define void @test10() {
-entry:
- %w = alloca %wrapper, align 8 ; <%wrapper*> [#uses=1]
- %0 = getelementptr %wrapper, %wrapper* %w, i64 0, i32 0 ; <i1*>
- store i1 true, i1* %0
- ret void
-}
-
-
- %struct.singlebool = type <{ i8 }>
-; PR4286
-define zeroext i8 @test11() nounwind {
-entry:
- %a = alloca %struct.singlebool, align 1 ; <%struct.singlebool*> [#uses=2]
- %storetmp.i = bitcast %struct.singlebool* %a to i1* ; <i1*> [#uses=1]
- store i1 true, i1* %storetmp.i
- %tmp = getelementptr %struct.singlebool, %struct.singlebool* %a, i64 0, i32 0 ; <i8*> [#uses=1]
- %tmp1 = load i8, i8* %tmp ; <i8> [#uses=1]
- ret i8 %tmp1
-}
-
-
- %struct.Item = type { [4 x i16], %struct.rule* }
- %struct.rule = type { [4 x i16], i32, i32, i32, %struct.nonterminal*, %struct.pattern*, i8 }
- %struct.nonterminal = type { i8*, i32, i32, i32, %struct.plankMap*, %struct.rule* }
- %struct.plankMap = type { %struct.list*, i32, %struct.stateMap* }
- %struct.list = type { i8*, %struct.list* }
- %struct.stateMap = type { i8*, %struct.plank*, i32, i16* }
- %struct.plank = type { i8*, %struct.list*, i32 }
- %struct.pattern = type { %struct.nonterminal*, %struct.operator*, [2 x %struct.nonterminal*] }
- %struct.operator = type { i8*, i8, i32, i32, i32, i32, %struct.table* }
- %struct.table = type { %struct.operator*, %struct.list*, i16*, [2 x %struct.dimension*], %struct.item_set** }
- %struct.dimension = type { i16*, %struct.Index_Map, %struct.mapping*, i32, %struct.plankMap* }
- %struct.Index_Map = type { i32, %struct.item_set** }
- %struct.item_set = type { i32, i32, %struct.operator*, [2 x %struct.item_set*], %struct.item_set*, i16*, %struct.Item*, %struct.Item* }
- %struct.mapping = type { %struct.list**, i32, i32, i32, %struct.item_set** }
-
-; VLAs.
-define void @test12() {
-bb4.i:
- %malloccall = tail call i8* @malloc(i32 0)
- %0 = bitcast i8* %malloccall to [0 x %struct.Item]*
- %.sub.i.c.i = getelementptr [0 x %struct.Item], [0 x %struct.Item]* %0, i32 0, i32 0 ; <%struct.Item*> [#uses=0]
- unreachable
-}
-declare noalias i8* @malloc(i32)
-
-; PR8680
-define void @test13() nounwind {
-entry:
- %memtmp = alloca i32, align 4
- %0 = bitcast i32* %memtmp to void ()*
- call void %0() nounwind
- ret void
-}
-
-; rdar://11861001 - The dynamic GEP here was incorrectly making all accesses
-; to the alloca think they were also dynamic. Inserts and extracts created to
-; access the vector were all being based from the dynamic access, even in BBs
-; not dominated by the GEP.
-define fastcc void @test() optsize inlinehint ssp align 2 {
-entry:
- %alloc.0.0 = alloca <4 x float>, align 16
- %bitcast = bitcast <4 x float>* %alloc.0.0 to [4 x float]*
- %idx3 = getelementptr inbounds [4 x float], [4 x float]* %bitcast, i32 0, i32 3
- store float 0.000000e+00, float* %idx3, align 4
- br label %for.body10
-
-for.body10: ; preds = %for.body10, %entry
- %loopidx = phi i32 [ 0, %entry ], [ undef, %for.body10 ]
- %unusedidx = getelementptr inbounds <4 x float>, <4 x float>* %alloc.0.0, i32 0, i32 %loopidx
- br i1 undef, label %for.end, label %for.body10
-
-for.end: ; preds = %for.body10
- store <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float 0.000000e+00>, <4 x float>* %alloc.0.0, align 16
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/debuginfo-preserved.ll b/test/Transforms/ScalarRepl/debuginfo-preserved.ll
deleted file mode 100644
index 4daa610ccdcb..000000000000
--- a/test/Transforms/ScalarRepl/debuginfo-preserved.ll
+++ /dev/null
@@ -1,65 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-macosx10.6.0"
-
-; CHECK: f
-; CHECK-NOT: llvm.dbg.declare
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-; CHECK: llvm.dbg.value
-
-define i32 @f(i32 %a, i32 %b) nounwind ssp !dbg !1 {
-entry:
- %a.addr = alloca i32, align 4
- %b.addr = alloca i32, align 4
- %c = alloca i32, align 4
- store i32 %a, i32* %a.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %a.addr, metadata !6, metadata !DIExpression()), !dbg !7
- store i32 %b, i32* %b.addr, align 4
- call void @llvm.dbg.declare(metadata i32* %b.addr, metadata !8, metadata !DIExpression()), !dbg !9
- call void @llvm.dbg.declare(metadata i32* %c, metadata !10, metadata !DIExpression()), !dbg !12
- %tmp = load i32, i32* %a.addr, align 4, !dbg !13
- store i32 %tmp, i32* %c, align 4, !dbg !13
- %tmp1 = load i32, i32* %a.addr, align 4, !dbg !14
- %tmp2 = load i32, i32* %b.addr, align 4, !dbg !14
- %add = add nsw i32 %tmp1, %tmp2, !dbg !14
- store i32 %add, i32* %a.addr, align 4, !dbg !14
- %tmp3 = load i32, i32* %c, align 4, !dbg !15
- %tmp4 = load i32, i32* %b.addr, align 4, !dbg !15
- %sub = sub nsw i32 %tmp3, %tmp4, !dbg !15
- store i32 %sub, i32* %b.addr, align 4, !dbg !15
- %tmp5 = load i32, i32* %a.addr, align 4, !dbg !16
- %tmp6 = load i32, i32* %b.addr, align 4, !dbg !16
- %add7 = add nsw i32 %tmp5, %tmp6, !dbg !16
- ret i32 %add7, !dbg !16
-}
-
-declare void @llvm.dbg.declare(metadata, metadata, metadata) nounwind readnone
-
-!llvm.dbg.cu = !{!0}
-!llvm.module.flags = !{!20}
-
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.0 (trunk 131941)", isOptimized: false, emissionKind: 0, file: !18, enums: !19, retainedTypes: !19, subprograms: !17)
-!1 = distinct !DISubprogram(name: "f", line: 1, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, scopeLine: 1, file: !18, scope: !2, type: !3)
-!2 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
-!3 = !DISubroutineType(types: !4)
-!4 = !{!5}
-!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!6 = !DILocalVariable(name: "a", line: 1, arg: 1, scope: !1, file: !2, type: !5)
-!7 = !DILocation(line: 1, column: 11, scope: !1)
-!8 = !DILocalVariable(name: "b", line: 1, arg: 2, scope: !1, file: !2, type: !5)
-!9 = !DILocation(line: 1, column: 18, scope: !1)
-!10 = !DILocalVariable(name: "c", line: 2, scope: !11, file: !2, type: !5)
-!11 = distinct !DILexicalBlock(line: 1, column: 21, file: !18, scope: !1)
-!12 = !DILocation(line: 2, column: 9, scope: !11)
-!13 = !DILocation(line: 2, column: 14, scope: !11)
-!14 = !DILocation(line: 3, column: 5, scope: !11)
-!15 = !DILocation(line: 4, column: 5, scope: !11)
-!16 = !DILocation(line: 5, column: 5, scope: !11)
-!17 = !{!1}
-!18 = !DIFile(filename: "/d/j/debug-test.c", directory: "/Volumes/Data/b")
-!19 = !{}
-!20 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/ScalarRepl/inline-vector.ll b/test/Transforms/ScalarRepl/inline-vector.ll
deleted file mode 100644
index 85f37414e656..000000000000
--- a/test/Transforms/ScalarRepl/inline-vector.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-; RUN: opt < %s -scalarrepl-ssa -S | FileCheck %s
-target datalayout = "e-p:32:32:32-i1:8:32-i8:8:32-i16:16:32-i32:32:32-i64:32:32-f32:32:32-f64:32:32-v64:64:64-v128:128:128-a0:0:32-n32"
-target triple = "thumbv7-apple-darwin10.0.0"
-
-%struct.Vector4 = type { float, float, float, float }
-@f.vector = internal constant %struct.Vector4 { float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00 }, align 16
-
-; CHECK-LABEL: define void @f(
-; CHECK-NOT: alloca
-; CHECK: phi <4 x float>
-
-define void @f() nounwind ssp {
-entry:
- %i = alloca i32, align 4
- %vector = alloca %struct.Vector4, align 16
- %agg.tmp = alloca %struct.Vector4, align 16
- %tmp = bitcast %struct.Vector4* %vector to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp, i8* bitcast (%struct.Vector4* @f.vector to i8*), i32 16, i32 16, i1 false)
- br label %for.cond
-
-for.cond: ; preds = %for.body, %entry
- %storemerge = phi i32 [ 0, %entry ], [ %inc, %for.body ]
- store i32 %storemerge, i32* %i, align 4
- %cmp = icmp slt i32 %storemerge, 1000000
- br i1 %cmp, label %for.body, label %for.end
-
-for.body: ; preds = %for.cond
- %tmp2 = bitcast %struct.Vector4* %agg.tmp to i8*
- %tmp3 = bitcast %struct.Vector4* %vector to i8*
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %tmp2, i8* %tmp3, i32 16, i32 16, i1 false)
- %0 = bitcast %struct.Vector4* %agg.tmp to [2 x i64]*
- %1 = load [2 x i64], [2 x i64]* %0, align 16
- %tmp2.i = extractvalue [2 x i64] %1, 0
- %tmp3.i = zext i64 %tmp2.i to i128
- %tmp10.i = bitcast i128 %tmp3.i to <4 x float>
- %sub.i.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %tmp10.i
- %2 = bitcast %struct.Vector4* %vector to <4 x float>*
- store <4 x float> %sub.i.i, <4 x float>* %2, align 16
- %tmp4 = load i32, i32* %i, align 4
- %inc = add nsw i32 %tmp4, 1
- br label %for.cond
-
-for.end: ; preds = %for.cond
- %x = getelementptr inbounds %struct.Vector4, %struct.Vector4* %vector, i32 0, i32 0
- %tmp5 = load float, float* %x, align 16
- %conv = fpext float %tmp5 to double
- %call = call i32 (...) @printf(double %conv) nounwind
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-declare i32 @printf(...)
diff --git a/test/Transforms/ScalarRepl/lifetime.ll b/test/Transforms/ScalarRepl/lifetime.ll
deleted file mode 100644
index c0ddfb58bbdf..000000000000
--- a/test/Transforms/ScalarRepl/lifetime.ll
+++ /dev/null
@@ -1,139 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-unknown-linux-gnu"
-
-declare void @llvm.lifetime.start(i64, i8*)
-declare void @llvm.lifetime.end(i64, i8*)
-
-%t1 = type {i32, i32, i32}
-
-define void @test1() {
-; CHECK-LABEL: @test1(
- %A = alloca %t1
- %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
- %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
- %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
- %B = bitcast i32* %A1 to i8*
- store i32 0, i32* %A1
- call void @llvm.lifetime.start(i64 -1, i8* %B)
- ret void
-; CHECK-NEXT: ret void
-}
-
-define void @test2() {
-; CHECK-LABEL: @test2(
- %A = alloca %t1
- %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
- %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
- %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
- %B = bitcast i32* %A2 to i8*
- store i32 0, i32* %A2
- call void @llvm.lifetime.start(i64 -1, i8* %B)
- %C = load i32, i32* %A2
- ret void
-; CHECK: ret void
-}
-
-define void @test3() {
-; CHECK-LABEL: @test3(
- %A = alloca %t1
- %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
- %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
- %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
- %B = bitcast i32* %A2 to i8*
- store i32 0, i32* %A2
- call void @llvm.lifetime.start(i64 6, i8* %B)
- %C = load i32, i32* %A2
- ret void
-; CHECK-NEXT: ret void
-}
-
-define void @test4() {
-; CHECK-LABEL: @test4(
- %A = alloca %t1
- %A1 = getelementptr %t1, %t1* %A, i32 0, i32 0
- %A2 = getelementptr %t1, %t1* %A, i32 0, i32 1
- %A3 = getelementptr %t1, %t1* %A, i32 0, i32 2
- %B = bitcast i32* %A2 to i8*
- store i32 0, i32* %A2
- call void @llvm.lifetime.start(i64 1, i8* %B)
- %C = load i32, i32* %A2
- ret void
-; CHECK-NEXT: ret void
-}
-
-%t2 = type {i32, [4 x i8], i32}
-
-define void @test5() {
-; CHECK-LABEL: @test5(
- %A = alloca %t2
-; CHECK: alloca{{.*}}i8
-; CHECK: alloca{{.*}}i8
-; CHECK: alloca{{.*}}i8
-
- %A21 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 0
- %A22 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 1
- %A23 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 2
- %A24 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 3
-; CHECK-NOT: store i8 1
- store i8 1, i8* %A21
- store i8 2, i8* %A22
- store i8 3, i8* %A23
- store i8 4, i8* %A24
-
- %A1 = getelementptr %t2, %t2* %A, i32 0, i32 0
- %A2 = getelementptr %t2, %t2* %A, i32 0, i32 1, i32 1
- %A3 = getelementptr %t2, %t2* %A, i32 0, i32 2
- store i8 0, i8* %A2
- call void @llvm.lifetime.start(i64 5, i8* %A2)
-; CHECK: llvm.lifetime{{.*}}i64 1
-; CHECK: llvm.lifetime{{.*}}i64 1
-; CHECK: llvm.lifetime{{.*}}i64 1
- %C = load i8, i8* %A2
- ret void
-}
-
-%t3 = type {[4 x i16], [4 x i8]}
-
-define void @test6() {
-; CHECK-LABEL: @test6(
- %A = alloca %t3
-; CHECK: alloca i8
-; CHECK: alloca i8
-; CHECK: alloca i8
-
- %A11 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 0
- %A12 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 1
- %A13 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 2
- %A14 = getelementptr %t3, %t3* %A, i32 0, i32 0, i32 3
- store i16 11, i16* %A11
- store i16 12, i16* %A12
- store i16 13, i16* %A13
- store i16 14, i16* %A14
-; CHECK-NOT: store i16 11
-; CHECK-NOT: store i16 12
-; CHECK-NOT: store i16 13
-; CHECK-NOT: store i16 14
-
- %A21 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 0
- %A22 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 1
- %A23 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 2
- %A24 = getelementptr %t3, %t3* %A, i32 0, i32 1, i32 3
- store i8 21, i8* %A21
- store i8 22, i8* %A22
- store i8 23, i8* %A23
- store i8 24, i8* %A24
-; CHECK: store i8 21
-; CHECK: store i8 22
-; CHECK: store i8 23
-; CHECK-NOT: store i8 24
-
- %B = bitcast i16* %A13 to i8*
- call void @llvm.lifetime.start(i64 7, i8* %B)
-; CHECK: lifetime.start{{.*}}i64 1
-; CHECK: lifetime.start{{.*}}i64 1
-; CHECK: lifetime.start{{.*}}i64 1
-
- ret void
-}
diff --git a/test/Transforms/ScalarRepl/load-store-aggregate.ll b/test/Transforms/ScalarRepl/load-store-aggregate.ll
deleted file mode 100644
index 88299f3679c2..000000000000
--- a/test/Transforms/ScalarRepl/load-store-aggregate.ll
+++ /dev/null
@@ -1,31 +0,0 @@
-; This testcase shows that scalarrepl is able to replace struct alloca's which
-; are directly loaded from or stored to (using the first class aggregates
-; feature).
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-; RUN: opt < %s -scalarrepl -S > %t
-; RUN: cat %t | not grep alloca
-
-%struct.foo = type { i32, i32 }
-
-define i32 @test(%struct.foo* %P) {
-entry:
- %L = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2]
- %V = load %struct.foo, %struct.foo* %P
- store %struct.foo %V, %struct.foo* %L
-
- %tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp5 = load i32, i32* %tmp4 ; <i32> [#uses=1]
- ret i32 %tmp5
-}
-
-define %struct.foo @test2(i32 %A, i32 %B) {
-entry:
- %L = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2]
- %L.0 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0
- store i32 %A, i32* %L.0
- %L.1 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 1
- store i32 %B, i32* %L.1
- %V = load %struct.foo, %struct.foo* %L
- ret %struct.foo %V
-}
diff --git a/test/Transforms/ScalarRepl/memcpy-align.ll b/test/Transforms/ScalarRepl/memcpy-align.ll
deleted file mode 100644
index 29a1bb8a4b5a..000000000000
--- a/test/Transforms/ScalarRepl/memcpy-align.ll
+++ /dev/null
@@ -1,32 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-; PR6832
-target datalayout =
-"e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-n32"
-target triple = "arm-u-u"
-
-%0 = type { %struct.anon, %struct.anon }
-%struct.anon = type { [4 x i8] }
-
-@c = external global %0 ; <%0*> [#uses=1]
-
-define void @good() nounwind {
-entry:
- %x0 = alloca %struct.anon, align 4 ; <%struct.anon*> [#uses=2]
- %tmp = bitcast %struct.anon* %x0 to i8* ; <i8*> [#uses=1]
- call void @llvm.memset.p0i8.i32(i8* %tmp, i8 0, i32 4, i32 4, i1 false)
- %tmp1 = bitcast %struct.anon* %x0 to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* getelementptr inbounds (%0, %0* @c, i32
-0, i32 0, i32 0, i32 0), i8* %tmp1, i32 4, i32 4, i1 false)
- ret void
-
-; CHECK: store i8 0, i8*{{.*}}, align 4
-; CHECK: store i8 0, i8*{{.*}}, align 1
-; CHECK: store i8 0, i8*{{.*}}, align 2
-; CHECK: store i8 0, i8*{{.*}}, align 1
-}
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32,
-i1) nounwind
-
diff --git a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll b/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
deleted file mode 100644
index e8088c121fbc..000000000000
--- a/test/Transforms/ScalarRepl/memset-aggregate-byte-leader.ll
+++ /dev/null
@@ -1,23 +0,0 @@
-; PR1226
-; RUN: opt < %s -scalarrepl -S | \
-; RUN: not grep "call void @llvm.memcpy.p0i8.p0i8.i32"
-; RUN: opt < %s -scalarrepl -S | grep getelementptr
-; END.
-
-target datalayout = "E-p:32:32"
-target triple = "powerpc-apple-darwin8.8.0"
- %struct.foo = type { i8, i8 }
-
-
-define i32 @test1(%struct.foo* %P) {
-entry:
- %L = alloca %struct.foo, align 2 ; <%struct.foo*> [#uses=1]
- %L2 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0 ; <i8*> [#uses=2]
- %tmp13 = getelementptr %struct.foo, %struct.foo* %P, i32 0, i32 0 ; <i8*> [#uses=1]
- call void @llvm.memcpy.p0i8.p0i8.i32( i8* %L2, i8* %tmp13, i32 2, i32 1, i1 false)
- %tmp5 = load i8, i8* %L2 ; <i8> [#uses=1]
- %tmp56 = sext i8 %tmp5 to i32 ; <i32> [#uses=1]
- ret i32 %tmp56
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1)
diff --git a/test/Transforms/ScalarRepl/memset-aggregate.ll b/test/Transforms/ScalarRepl/memset-aggregate.ll
deleted file mode 100644
index 98e2dddefe7f..000000000000
--- a/test/Transforms/ScalarRepl/memset-aggregate.ll
+++ /dev/null
@@ -1,67 +0,0 @@
-; PR1226
-; RUN: opt < %s -scalarrepl -S | grep "ret i32 16843009"
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret i16 514"
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "i686-apple-darwin8"
- %struct.bar = type { %struct.foo, i64, double }
- %struct.foo = type { i32, i32 }
-
-
-define i32 @test1(%struct.foo* %P) {
-entry:
- %L = alloca %struct.foo, align 8 ; <%struct.foo*> [#uses=2]
- %L2 = bitcast %struct.foo* %L to i8* ; <i8*> [#uses=1]
- %tmp13 = bitcast %struct.foo* %P to i8* ; <i8*> [#uses=1]
- call void @llvm.memcpy.p0i8.p0i8.i32(i8* %L2, i8* %tmp13, i32 8, i32 4, i1 false)
- %tmp4 = getelementptr %struct.foo, %struct.foo* %L, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp5 = load i32, i32* %tmp4 ; <i32> [#uses=1]
- ret i32 %tmp5
-}
-
-
-define i32 @test2() {
-entry:
- %L = alloca [4 x %struct.foo], align 16 ; <[4 x %struct.foo]*> [#uses=2]
- %L12 = bitcast [4 x %struct.foo]* %L to i8* ; <i8*> [#uses=1]
- call void @llvm.memset.p0i8.i32(i8* %L12, i8 0, i32 32, i32 16, i1 false)
- %tmp4 = getelementptr [4 x %struct.foo], [4 x %struct.foo]* %L, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
- %tmp5 = load i32, i32* %tmp4 ; <i32> [#uses=1]
- ret i32 %tmp5
-}
-
-
-define i32 @test3() {
-entry:
- %B = alloca %struct.bar, align 16 ; <%struct.bar*> [#uses=4]
- %B1 = bitcast %struct.bar* %B to i8* ; <i8*> [#uses=1]
- call void @llvm.memset.p0i8.i32(i8* %B1, i8 1, i32 24, i32 16, i1 false)
- %tmp3 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 0 ; <i32*> [#uses=1]
- store i32 1, i32* %tmp3
- %tmp4 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 2 ; <double*> [#uses=1]
- store double 1.000000e+01, double* %tmp4
- %tmp6 = getelementptr %struct.bar, %struct.bar* %B, i32 0, i32 0, i32 1 ; <i32*> [#uses=1]
- %tmp7 = load i32, i32* %tmp6 ; <i32> [#uses=1]
- ret i32 %tmp7
-}
-
-
- %struct.f = type { i32, i32, i32, i32, i32, i32 }
-
-define i16 @test4() nounwind {
-entry:
- %A = alloca %struct.f, align 8 ; <%struct.f*> [#uses=3]
- %0 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 0 ; <i32*> [#uses=1]
- store i32 1, i32* %0, align 8
- %1 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 1 ; <i32*> [#uses=1]
- %2 = bitcast i32* %1 to i8* ; <i8*> [#uses=1]
- call void @llvm.memset.p0i8.i32(i8* %2, i8 2, i32 12, i32 4, i1 false)
- %3 = getelementptr %struct.f, %struct.f* %A, i32 0, i32 2 ; <i32*> [#uses=1]
- %4 = load i32, i32* %3, align 8 ; <i32> [#uses=1]
- %retval12 = trunc i32 %4 to i16 ; <i16> [#uses=1]
- ret i16 %retval12
-}
-declare void @llvm.memcpy.p0i8.p0i8.i32(i8* nocapture, i8* nocapture, i32, i32, i1) nounwind
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/negative-memset.ll b/test/Transforms/ScalarRepl/negative-memset.ll
deleted file mode 100644
index 458d9610cd73..000000000000
--- a/test/Transforms/ScalarRepl/negative-memset.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; PR12202
-; RUN: opt < %s -scalarrepl -S
-; Ensure that we do not hang or crash when feeding a negative value to memset
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f80:128:128-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32-S32"
-target triple = "i686-pc-win32"
-
-define i32 @test() nounwind {
-entry:
- %retval = alloca i32, align 4
- %buff = alloca [1 x i8], align 1
- store i32 0, i32* %retval
- %0 = bitcast [1 x i8]* %buff to i8*
- call void @llvm.memset.p0i8.i32(i8* %0, i8 0, i32 1, i32 1, i1 false)
- %arraydecay = getelementptr inbounds [1 x i8], [1 x i8]* %buff, i32 0, i32 0
- call void @llvm.memset.p0i8.i32(i8* %arraydecay, i8 -1, i32 -8, i32 1, i1 false) ; Negative 8!
- ret i32 0
-}
-
-declare void @llvm.memset.p0i8.i32(i8* nocapture, i8, i32, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/nonzero-first-index.ll b/test/Transforms/ScalarRepl/nonzero-first-index.ll
deleted file mode 100644
index da757b08d454..000000000000
--- a/test/Transforms/ScalarRepl/nonzero-first-index.ll
+++ /dev/null
@@ -1,53 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:32:32-n8:16:32"
-target triple = "i386-pc-linux-gnu"
-
-%nested = type { i32, [4 x i32] }
-
-; Check that a GEP with a non-zero first index does not prevent SROA as long
-; as the resulting offset corresponds to an element in the alloca.
-define i32 @test1() {
-; CHECK-LABEL: @test1(
-; CHECK-NOT: = i160
-; CHECK: ret i32 undef
- %A = alloca %nested
- %B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0
- %C = getelementptr i32, i32* %B, i32 2
- %D = load i32, i32* %C
- ret i32 %D
-}
-
-; But, if the offset is out of range, then it should not be transformed.
-define i32 @test2() {
-; CHECK-LABEL: @test2(
-; CHECK: i160
- %A = alloca %nested
- %B = getelementptr %nested, %nested* %A, i32 0, i32 1, i32 0
- %C = getelementptr i32, i32* %B, i32 4
- %D = load i32, i32* %C
- ret i32 %D
-}
-
-; Try it with a bitcast and single GEP....
-define i32 @test3() {
-; CHECK-LABEL: @test3(
-; CHECK-NOT: = i160
-; CHECK: ret i32 undef
- %A = alloca %nested
- %B = bitcast %nested* %A to i32*
- %C = getelementptr i32, i32* %B, i32 2
- %D = load i32, i32* %C
- ret i32 %D
-}
-
-; ...and again make sure that out-of-range accesses are not transformed.
-define i32 @test4() {
-; CHECK-LABEL: @test4(
-; CHECK: i160
- %A = alloca %nested
- %B = bitcast %nested* %A to i32*
- %C = getelementptr i32, i32* %B, i32 -1
- %D = load i32, i32* %C
- ret i32 %D
-}
diff --git a/test/Transforms/ScalarRepl/not-a-vector.ll b/test/Transforms/ScalarRepl/not-a-vector.ll
deleted file mode 100644
index 04c1f93617b9..000000000000
--- a/test/Transforms/ScalarRepl/not-a-vector.ll
+++ /dev/null
@@ -1,20 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | not grep alloca
-; RUN: opt < %s -scalarrepl -S | not grep "7 x double"
-; RUN: opt < %s -scalarrepl -instcombine -S | grep "ret double %B"
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define double @test(double %A, double %B) {
- %ARR = alloca [7 x i64]
- %C = bitcast [7 x i64]* %ARR to double*
- store double %A, double* %C
-
- %D = getelementptr [7 x i64], [7 x i64]* %ARR, i32 0, i32 4
- %E = bitcast i64* %D to double*
- store double %B, double* %E
-
- %F = getelementptr double, double* %C, i32 4
- %G = load double, double* %F
- ret double %G
-}
-
-
diff --git a/test/Transforms/ScalarRepl/only-memcpy-uses.ll b/test/Transforms/ScalarRepl/only-memcpy-uses.ll
deleted file mode 100644
index d0ed20b26bca..000000000000
--- a/test/Transforms/ScalarRepl/only-memcpy-uses.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-%struct.S = type { [12 x i32] }
-
-; CHECK-LABEL: @bar4(
-define void @bar4(%struct.S* byval %s) nounwind ssp {
-entry:
-; CHECK: alloca
-; CHECK-NOT: load
-; CHECK: memcpy
- %t = alloca %struct.S, align 4
- %agg.tmp = alloca %struct.S, align 4
- %tmp = bitcast %struct.S* %t to i8*
- %tmp1 = bitcast %struct.S* %s to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp, i8* %tmp1, i64 48, i32 4, i1 false)
- %tmp2 = bitcast %struct.S* %agg.tmp to i8*
- %tmp3 = bitcast %struct.S* %t to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp2, i8* %tmp3, i64 48, i32 4, i1 false)
- %call = call i32 (...) @bazz(%struct.S* byval %agg.tmp)
- ret void
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-
-declare i32 @bazz(...)
diff --git a/test/Transforms/ScalarRepl/phi-cycle.ll b/test/Transforms/ScalarRepl/phi-cycle.ll
deleted file mode 100644
index a44f08180643..000000000000
--- a/test/Transforms/ScalarRepl/phi-cycle.ll
+++ /dev/null
@@ -1,80 +0,0 @@
-; RUN: opt -S -scalarrepl-ssa < %s | FileCheck %s
-; rdar://10589171
-
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
-%struct.foo = type { i32, i32 }
-
-@.str = private unnamed_addr constant [6 x i8] c"x=%d\0A\00", align 1
-
-define i32 @main(i32 %argc, i8** nocapture %argv) nounwind uwtable {
-entry:
- %f = alloca %struct.foo, align 4
- %x.i = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 0
- store i32 1, i32* %x.i, align 4
- %y.i = getelementptr inbounds %struct.foo, %struct.foo* %f, i64 0, i32 1
- br label %while.cond.i
-
-; CHECK: while.cond.i:
-; CHECK-NEXT: %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
-; CHECK-NEXT: %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
-; CHECK-NEXT: %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
-; CHECK-NOT: phi
-while.cond.i: ; preds = %while.cond.backedge.i, %entry
- %tmp = phi i32 [ 1, %entry ], [ %tmp2, %while.cond.backedge.i ]
- %pos.0.i = phi i32 [ 1, %entry ], [ %xtmp.i, %while.cond.backedge.i ]
- %left.0.i = phi i32 [ 1, %entry ], [ %dec.i, %while.cond.backedge.i ]
- %cmp.i = icmp sgt i32 %left.0.i, 0
- br i1 %cmp.i, label %while.body.i, label %while.cond.i.func.exit_crit_edge
-
-while.cond.i.func.exit_crit_edge: ; preds = %while.cond.i
- br label %func.exit
-
-while.body.i: ; preds = %while.cond.i
- %dec.i = add nsw i32 %left.0.i, -1
- switch i32 1, label %while.body.i.func.exit_crit_edge [
- i32 0, label %while.cond.backedge.i
- i32 1, label %sw.bb.i
- ]
-
-while.body.i.func.exit_crit_edge: ; preds = %while.body.i
- br label %func.exit
-
-sw.bb.i: ; preds = %while.body.i
- %cmp2.i = icmp eq i32 %tmp, 1
- br i1 %cmp2.i, label %if.then.i, label %if.end.i
-
-if.then.i: ; preds = %sw.bb.i
- store i32 %pos.0.i, i32* %x.i, align 4
- br label %if.end.i
-
-; CHECK: if.end.i:
-; CHECK-NEXT: %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
-; CHECK-NOT: phi
-if.end.i: ; preds = %if.then.i, %sw.bb.i
- %tmp1 = phi i32 [ %pos.0.i, %if.then.i ], [ %tmp, %sw.bb.i ]
- store i32 %tmp1, i32* %y.i, align 4
- br label %while.cond.backedge.i
-
-; CHECK: while.cond.backedge.i:
-; CHECK-NEXT: %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
-; CHECK-NOT: phi
-while.cond.backedge.i: ; preds = %if.end.i, %while.body.i
- %tmp2 = phi i32 [ %tmp1, %if.end.i ], [ %tmp, %while.body.i ]
- %xtmp.i = add i32 %pos.0.i, 1
- br label %while.cond.i
-
-; CHECK: func.exit:
-; CHECK-NOT: load
-; CHECK: %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp) [[NUW:#[0-9]+]]
-func.exit: ; preds = %while.body.i.func.exit_crit_edge, %while.cond.i.func.exit_crit_edge
- %tmp3 = load i32, i32* %x.i, align 4
- %call = call i32 (i8*, ...) @printf(i8* getelementptr inbounds ([6 x i8], [6 x i8]* @.str, i64 0, i64 0), i32 %tmp3) nounwind
- ret i32 0
-}
-
-declare i32 @printf(i8* nocapture, ...) nounwind
-
-; CHECK: attributes #0 = { nounwind uwtable }
-; CHECK: attributes [[NUW]] = { nounwind }
diff --git a/test/Transforms/ScalarRepl/phi-select.ll b/test/Transforms/ScalarRepl/phi-select.ll
deleted file mode 100644
index a6c7135a4925..000000000000
--- a/test/Transforms/ScalarRepl/phi-select.ll
+++ /dev/null
@@ -1,153 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-; Test promotion of allocas that have phis and select users.
-target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
-target triple = "x86_64-apple-darwin10.2"
-
-%struct.X = type { i32 }
-%PairTy = type {i32, i32}
-
-; CHECK-LABEL: @test1(
-; CHECK: %a.0 = alloca i32
-; CHECK: %b.0 = alloca i32
-define i32 @test1(i32 %x) nounwind readnone ssp {
-entry:
- %a = alloca %struct.X, align 8 ; <%struct.X*> [#uses=2]
- %b = alloca %struct.X, align 8 ; <%struct.X*> [#uses=2]
- %0 = getelementptr inbounds %struct.X, %struct.X* %a, i64 0, i32 0 ; <i32*> [#uses=1]
- store i32 1, i32* %0, align 8
- %1 = getelementptr inbounds %struct.X, %struct.X* %b, i64 0, i32 0 ; <i32*> [#uses=1]
- store i32 2, i32* %1, align 8
- %2 = icmp eq i32 %x, 0 ; <i1> [#uses=1]
- %p.0 = select i1 %2, %struct.X* %b, %struct.X* %a ; <%struct.X*> [#uses=1]
- %3 = getelementptr inbounds %struct.X, %struct.X* %p.0, i64 0, i32 0 ; <i32*> [#uses=1]
- %4 = load i32, i32* %3, align 8 ; <i32> [#uses=1]
- ret i32 %4
-}
-
-; CHECK-LABEL: @test2(
-; CHECK: %X.ld = phi i32 [ 1, %entry ], [ 2, %T ]
-; CHECK-NEXT: ret i32 %X.ld
-define i32 @test2(i1 %c) {
-entry:
- %A = alloca {i32, i32}
- %B = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 0
- store i32 1, i32* %B
- br i1 %c, label %T, label %F
-T:
- %C = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 1
- store i32 2, i32* %C
- br label %F
-F:
- %X = phi i32* [%B, %entry], [%C, %T]
- %Q = load i32, i32* %X
- ret i32 %Q
-}
-
-; CHECK-LABEL: @test3(
-; CHECK-NEXT: %Q = select i1 %c, i32 1, i32 2
-; CHECK-NEXT: ret i32 %Q
-; rdar://8904039
-define i32 @test3(i1 %c) {
- %A = alloca {i32, i32}
- %B = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 0
- store i32 1, i32* %B
- %C = getelementptr {i32, i32}, {i32, i32}* %A, i32 0, i32 1
- store i32 2, i32* %C
-
- %X = select i1 %c, i32* %B, i32* %C
- %Q = load i32, i32* %X
- ret i32 %Q
-}
-
-;; We can't scalarize this, a use of the select is not an element access.
-define i64 @test4(i1 %c) {
-entry:
- %A = alloca %PairTy
- ; CHECK-LABEL: @test4(
- ; CHECK: %A = alloca %PairTy
- %B = getelementptr %PairTy, %PairTy* %A, i32 0, i32 0
- store i32 1, i32* %B
- %C = getelementptr %PairTy, %PairTy* %A, i32 0, i32 1
- store i32 2, i32* %B
-
- %X = select i1 %c, i32* %B, i32* %C
- %Y = bitcast i32* %X to i64*
- %Q = load i64, i64* %Y
- ret i64 %Q
-}
-
-
-;;
-;; Tests for promoting allocas used by selects.
-;; rdar://7339113
-;;
-
-define i32 @test5(i32 *%P) nounwind readnone ssp {
-entry:
- %b = alloca i32, align 8
- store i32 2, i32* %b, align 8
-
- ;; Select on constant condition should be folded.
- %p.0 = select i1 false, i32* %b, i32* %P
- store i32 123, i32* %p.0
-
- %r = load i32, i32* %b, align 8
- ret i32 %r
-
-; CHECK-LABEL: @test5(
-; CHECK: store i32 123, i32* %P
-; CHECK: ret i32 2
-}
-
-define i32 @test6(i32 %x, i1 %c) nounwind readnone ssp {
- %a = alloca i32, align 8
- %b = alloca i32, align 8
- store i32 1, i32* %a, align 8
- store i32 2, i32* %b, align 8
- %p.0 = select i1 %c, i32* %b, i32* %a
- %r = load i32, i32* %p.0, align 8
- ret i32 %r
-; CHECK-LABEL: @test6(
-; CHECK-NEXT: %r = select i1 %c, i32 2, i32 1
-; CHECK-NEXT: ret i32 %r
-}
-
-; Verify that the loads happen where the loads are, not where the select is.
-define i32 @test7(i32 %x, i1 %c) nounwind readnone ssp {
- %a = alloca i32, align 8
- %b = alloca i32, align 8
- store i32 1, i32* %a
- store i32 2, i32* %b
- %p.0 = select i1 %c, i32* %b, i32* %a
-
- store i32 0, i32* %a
-
- %r = load i32, i32* %p.0, align 8
- ret i32 %r
-; CHECK-LABEL: @test7(
-; CHECK-NOT: alloca i32
-; CHECK: %r = select i1 %c, i32 2, i32 0
-; CHECK: ret i32 %r
-}
-
-;; Promote allocs that are PHI'd together by moving the loads.
-define i32 @test8(i32 %x) nounwind readnone ssp {
-; CHECK-LABEL: @test8(
-; CHECK-NOT: load i32
-; CHECK-NOT: store i32
-; CHECK: %p.0.ld = phi i32 [ 2, %entry ], [ 1, %T ]
-; CHECK-NEXT: ret i32 %p.0.ld
-entry:
- %a = alloca i32, align 8
- %b = alloca i32, align 8
- store i32 1, i32* %a, align 8
- store i32 2, i32* %b, align 8
- %c = icmp eq i32 %x, 0
- br i1 %c, label %T, label %Cont
-T:
- br label %Cont
-Cont:
- %p.0 = phi i32* [%b, %entry],[%a, %T]
- %r = load i32, i32* %p.0, align 8
- ret i32 %r
-}
diff --git a/test/Transforms/ScalarRepl/phinodepromote.ll b/test/Transforms/ScalarRepl/phinodepromote.ll
deleted file mode 100644
index c3af62485db1..000000000000
--- a/test/Transforms/ScalarRepl/phinodepromote.ll
+++ /dev/null
@@ -1,34 +0,0 @@
-; RUN: opt < %s -simplifycfg -instcombine -mem2reg -S | not grep alloca
-;
-; This tests to see if mem2reg can promote alloca instructions whose addresses
-; are used by PHI nodes that are immediately loaded. The LLVM C++ front-end
-; often generates code that looks like this (when it codegen's ?: exprs as
-; lvalues), so handling this simple extension is quite useful.
-;
-; This testcase is what the following program looks like when it reaches
-; instcombine:
-;
-; template<typename T>
-; const T& max(const T& a1, const T& a2) { return a1 < a2 ? a1 : a2; }
-; int main() { return max(0, 1); }
-;
-; This test checks to make sure the combination of instcombine and mem2reg
-; perform the transformation.
-
-define i32 @main() {
-entry:
- %mem_tmp.0 = alloca i32 ; <i32*> [#uses=3]
- %mem_tmp.1 = alloca i32 ; <i32*> [#uses=3]
- store i32 0, i32* %mem_tmp.0
- store i32 1, i32* %mem_tmp.1
- %tmp.1.i = load i32, i32* %mem_tmp.1 ; <i32> [#uses=1]
- %tmp.3.i = load i32, i32* %mem_tmp.0 ; <i32> [#uses=1]
- %tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i ; <i1> [#uses=1]
- br i1 %tmp.4.i, label %cond_true.i, label %cond_continue.i
-cond_true.i: ; preds = %entry
- br label %cond_continue.i
-cond_continue.i: ; preds = %cond_true.i, %entry
- %mem_tmp.i.0 = phi i32* [ %mem_tmp.1, %cond_true.i ], [ %mem_tmp.0, %entry ] ; <i32*> [#uses=1]
- %tmp.3 = load i32, i32* %mem_tmp.i.0 ; <i32> [#uses=1]
- ret i32 %tmp.3
-}
diff --git a/test/Transforms/ScalarRepl/select_promote.ll b/test/Transforms/ScalarRepl/select_promote.ll
deleted file mode 100644
index b4ef8c46fa09..000000000000
--- a/test/Transforms/ScalarRepl/select_promote.ll
+++ /dev/null
@@ -1,18 +0,0 @@
-; Test promotion of loads that use the result of a select instruction. This
-; should be simplified by the instcombine pass.
-
-; RUN: opt < %s -instcombine -mem2reg -S | not grep alloca
-
-define i32 @main() {
- %mem_tmp.0 = alloca i32 ; <i32*> [#uses=3]
- %mem_tmp.1 = alloca i32 ; <i32*> [#uses=3]
- store i32 0, i32* %mem_tmp.0
- store i32 1, i32* %mem_tmp.1
- %tmp.1.i = load i32, i32* %mem_tmp.1 ; <i32> [#uses=1]
- %tmp.3.i = load i32, i32* %mem_tmp.0 ; <i32> [#uses=1]
- %tmp.4.i = icmp sle i32 %tmp.1.i, %tmp.3.i ; <i1> [#uses=1]
- %mem_tmp.i.0 = select i1 %tmp.4.i, i32* %mem_tmp.1, i32* %mem_tmp.0 ; <i32*> [#uses=1]
- %tmp.3 = load i32, i32* %mem_tmp.i.0 ; <i32> [#uses=1]
- ret i32 %tmp.3
-}
-
diff --git a/test/Transforms/ScalarRepl/sroa-fca.ll b/test/Transforms/ScalarRepl/sroa-fca.ll
deleted file mode 100644
index c6e7c23ab066..000000000000
--- a/test/Transforms/ScalarRepl/sroa-fca.ll
+++ /dev/null
@@ -1,21 +0,0 @@
-; RUN: opt < %s -scalarrepl | llvm-dis
-; Make sure that SROA "scalar conversion" can handle first class aggregates.
-
-define i64 @test({i32, i32} %A) {
- %X = alloca i64
- %Y = bitcast i64* %X to {i32,i32}*
- store {i32,i32} %A, {i32,i32}* %Y
-
- %Q = load i64, i64* %X
- ret i64 %Q
-}
-
-define {i32,i32} @test2(i64 %A) {
- %X = alloca i64
- %Y = bitcast i64* %X to {i32,i32}*
- store i64 %A, i64* %X
-
- %Q = load {i32,i32}, {i32,i32}* %Y
- ret {i32,i32} %Q
-}
-
diff --git a/test/Transforms/ScalarRepl/sroa_two.ll b/test/Transforms/ScalarRepl/sroa_two.ll
deleted file mode 100644
index f2285ef21a49..000000000000
--- a/test/Transforms/ScalarRepl/sroa_two.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -scalarrepl | llvm-dis
-
-define i32 @test(i32 %X) {
- %Arr = alloca [2 x i32] ; <[2 x i32]*> [#uses=3]
- %tmp.0 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 0 ; <i32*> [#uses=1]
- store i32 1, i32* %tmp.0
- %tmp.1 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 1 ; <i32*> [#uses=1]
- store i32 2, i32* %tmp.1
- %tmp.3 = getelementptr [2 x i32], [2 x i32]* %Arr, i32 0, i32 %X ; <i32*> [#uses=1]
- %tmp.4 = load i32, i32* %tmp.3 ; <i32> [#uses=1]
- ret i32 %tmp.4
-}
-
diff --git a/test/Transforms/ScalarRepl/union-fp-int.ll b/test/Transforms/ScalarRepl/union-fp-int.ll
deleted file mode 100644
index fa64b60685f4..000000000000
--- a/test/Transforms/ScalarRepl/union-fp-int.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | \
-; RUN: not grep alloca
-; RUN: opt < %s -scalarrepl -S | \
-; RUN: grep "bitcast.*float.*i32"
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define i32 @test(float %X) {
- %X_addr = alloca float ; <float*> [#uses=2]
- store float %X, float* %X_addr
- %X_addr.upgrd.1 = bitcast float* %X_addr to i32* ; <i32*> [#uses=1]
- %tmp = load i32, i32* %X_addr.upgrd.1 ; <i32> [#uses=1]
- ret i32 %tmp
-}
-
diff --git a/test/Transforms/ScalarRepl/union-packed.ll b/test/Transforms/ScalarRepl/union-packed.ll
deleted file mode 100644
index 741de76d581b..000000000000
--- a/test/Transforms/ScalarRepl/union-packed.ll
+++ /dev/null
@@ -1,14 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | \
-; RUN: not grep alloca
-; RUN: opt < %s -scalarrepl -S | \
-; RUN: grep bitcast
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define <4 x i32> @test(<4 x float> %X) {
- %X_addr = alloca <4 x float> ; <<4 x float>*> [#uses=2]
- store <4 x float> %X, <4 x float>* %X_addr
- %X_addr.upgrd.1 = bitcast <4 x float>* %X_addr to <4 x i32>* ; <<4 x i32>*> [#uses=1]
- %tmp = load <4 x i32>, <4 x i32>* %X_addr.upgrd.1 ; <<4 x i32>> [#uses=1]
- ret <4 x i32> %tmp
-}
-
diff --git a/test/Transforms/ScalarRepl/union-pointer.ll b/test/Transforms/ScalarRepl/union-pointer.ll
deleted file mode 100644
index 6a5db1c4d6bd..000000000000
--- a/test/Transforms/ScalarRepl/union-pointer.ll
+++ /dev/null
@@ -1,74 +0,0 @@
-; PR892
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-
-target datalayout = "e-p:32:32-p1:16:16-n8:16:32"
-target triple = "i686-apple-darwin8.7.2"
-
-%struct.Val = type { i32*, i32 }
-
-define i8* @test(i16* %X) {
-; CHECK-LABEL: @test(
-; CHECK-NOT: alloca
-; CHECK: ret i8*
- %X_addr = alloca i16* ; <i16**> [#uses=2]
- store i16* %X, i16** %X_addr
- %X_addr.upgrd.1 = bitcast i16** %X_addr to i8** ; <i8**> [#uses=1]
- %tmp = load i8*, i8** %X_addr.upgrd.1 ; <i8*> [#uses=1]
- ret i8* %tmp
-}
-
-define i8 addrspace(1)* @test_as1(i16 addrspace(1)* %x) {
-; CHECK-LABEL: @test_as1(
-; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
-; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
-; CHECK-NEXT: ret i8 addrspace(1)* %2
- %x_addr = alloca i16 addrspace(1)*
- store i16 addrspace(1)* %x, i16 addrspace(1)** %x_addr
- %x_addr.upgrd.1 = bitcast i16 addrspace(1)** %x_addr to i8 addrspace(1)**
- %tmp = load i8 addrspace(1)*, i8 addrspace(1)** %x_addr.upgrd.1
- ret i8 addrspace(1)* %tmp
-}
-
-define i8 addrspace(1)* @test_as1_array(i16 addrspace(1)* %x) {
-; CHECK-LABEL: @test_as1_array(
-; CHECK-NEXT: %1 = ptrtoint i16 addrspace(1)* %x to i16
-; CHECK-NEXT: %2 = inttoptr i16 %1 to i8 addrspace(1)*
-; CHECK-NEXT: ret i8 addrspace(1)* %2
- %as_ptr_array = alloca [4 x i16 addrspace(1)*]
- %elem1 = getelementptr [4 x i16 addrspace(1)*], [4 x i16 addrspace(1)*]* %as_ptr_array, i32 0, i32 1
- store i16 addrspace(1)* %x, i16 addrspace(1)** %elem1
- %elem1.cast = bitcast i16 addrspace(1)** %elem1 to i8 addrspace(1)**
- %tmp = load i8 addrspace(1)*, i8 addrspace(1)** %elem1.cast
- ret i8 addrspace(1)* %tmp
-}
-
-
-define void @test2(i64 %Op.0) {
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: ret void
-
- %tmp = alloca %struct.Val, align 8 ; <%struct.Val*> [#uses=3]
- %tmp1 = alloca %struct.Val, align 8 ; <%struct.Val*> [#uses=3]
- %tmp.upgrd.2 = call i64 @_Z3foov( ) ; <i64> [#uses=1]
- %tmp1.upgrd.3 = bitcast %struct.Val* %tmp1 to i64* ; <i64*> [#uses=1]
- store i64 %tmp.upgrd.2, i64* %tmp1.upgrd.3
- %tmp.upgrd.4 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 0 ; <i32**> [#uses=1]
- %tmp2 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 0 ; <i32**> [#uses=1]
- %tmp.upgrd.5 = load i32*, i32** %tmp2 ; <i32*> [#uses=1]
- store i32* %tmp.upgrd.5, i32** %tmp.upgrd.4
- %tmp3 = getelementptr %struct.Val, %struct.Val* %tmp, i32 0, i32 1 ; <i32*> [#uses=1]
- %tmp4 = getelementptr %struct.Val, %struct.Val* %tmp1, i32 0, i32 1 ; <i32*> [#uses=1]
- %tmp.upgrd.6 = load i32, i32* %tmp4 ; <i32> [#uses=1]
- store i32 %tmp.upgrd.6, i32* %tmp3
- %tmp7 = bitcast %struct.Val* %tmp to { i64 }* ; <{ i64 }*> [#uses=1]
- %tmp8 = getelementptr { i64 }, { i64 }* %tmp7, i32 0, i32 0 ; <i64*> [#uses=1]
- %tmp9 = load i64, i64* %tmp8 ; <i64> [#uses=1]
- call void @_Z3bar3ValS_( i64 %Op.0, i64 %tmp9 )
- ret void
-}
-
-declare i64 @_Z3foov()
-
-declare void @_Z3bar3ValS_(i64, i64)
diff --git a/test/Transforms/ScalarRepl/vector_memcpy.ll b/test/Transforms/ScalarRepl/vector_memcpy.ll
deleted file mode 100644
index 031ad5e38ca7..000000000000
--- a/test/Transforms/ScalarRepl/vector_memcpy.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; RUN: opt < %s -scalarrepl -S > %t
-; RUN: grep "ret <16 x float> %A" %t
-; RUN: grep "ret <16 x float> zeroinitializer" %t
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-
-define <16 x float> @foo(<16 x float> %A) nounwind {
- %tmp = alloca <16 x float>, align 16
- %tmp2 = alloca <16 x float>, align 16
- store <16 x float> %A, <16 x float>* %tmp
- %s = bitcast <16 x float>* %tmp to i8*
- %s2 = bitcast <16 x float>* %tmp2 to i8*
- call void @llvm.memcpy.p0i8.p0i8.i64(i8* %s2, i8* %s, i64 64, i32 16, i1 false)
- %R = load <16 x float>, <16 x float>* %tmp2
- ret <16 x float> %R
-}
-
-define <16 x float> @foo2(<16 x float> %A) nounwind {
- %tmp2 = alloca <16 x float>, align 16
-
- %s2 = bitcast <16 x float>* %tmp2 to i8*
- call void @llvm.memset.p0i8.i64(i8* %s2, i8 0, i64 64, i32 16, i1 false)
-
- %R = load <16 x float>, <16 x float>* %tmp2
- ret <16 x float> %R
-}
-
-declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture, i64, i32, i1) nounwind
-declare void @llvm.memset.p0i8.i64(i8* nocapture, i8, i64, i32, i1) nounwind
diff --git a/test/Transforms/ScalarRepl/vector_promote.ll b/test/Transforms/ScalarRepl/vector_promote.ll
deleted file mode 100644
index 3c2377fc0f28..000000000000
--- a/test/Transforms/ScalarRepl/vector_promote.ll
+++ /dev/null
@@ -1,137 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64"
-target triple = "x86_64-apple-darwin10.0.0"
-
-define void @test1(<4 x float>* %F, float %f) {
-entry:
- %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
- %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
- %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp3, <4 x float>* %G
- %G.upgrd.1 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0 ; <float*> [#uses=1]
- store float %f, float* %G.upgrd.1
- %tmp4 = load <4 x float>, <4 x float>* %G ; <<4 x float>> [#uses=2]
- %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp6, <4 x float>* %F
- ret void
-; CHECK-LABEL: @test1(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 0
-}
-
-define void @test2(<4 x float>* %F, float %f) {
-entry:
- %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=3]
- %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
- %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp3, <4 x float>* %G
- %tmp.upgrd.2 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2 ; <float*> [#uses=1]
- store float %f, float* %tmp.upgrd.2
- %tmp4 = load <4 x float>, <4 x float>* %G ; <<4 x float>> [#uses=2]
- %tmp6 = fadd <4 x float> %tmp4, %tmp4 ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp6, <4 x float>* %F
- ret void
-; CHECK-LABEL: @test2(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: insertelement <4 x float> %tmp3, float %f, i32 2
-}
-
-define void @test3(<4 x float>* %F, float* %f) {
-entry:
- %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
- %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
- %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp3, <4 x float>* %G
- %tmp.upgrd.3 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 2 ; <float*> [#uses=1]
- %tmp.upgrd.4 = load float, float* %tmp.upgrd.3 ; <float> [#uses=1]
- store float %tmp.upgrd.4, float* %f
- ret void
-; CHECK-LABEL: @test3(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 2
-}
-
-define void @test4(<4 x float>* %F, float* %f) {
-entry:
- %G = alloca <4 x float>, align 16 ; <<4 x float>*> [#uses=2]
- %tmp = load <4 x float>, <4 x float>* %F ; <<4 x float>> [#uses=2]
- %tmp3 = fadd <4 x float> %tmp, %tmp ; <<4 x float>> [#uses=1]
- store <4 x float> %tmp3, <4 x float>* %G
- %G.upgrd.5 = getelementptr <4 x float>, <4 x float>* %G, i32 0, i32 0 ; <float*> [#uses=1]
- %tmp.upgrd.6 = load float, float* %G.upgrd.5 ; <float> [#uses=1]
- store float %tmp.upgrd.6, float* %f
- ret void
-; CHECK-LABEL: @test4(
-; CHECK-NOT: alloca
-; CHECK: %tmp = load <4 x float>, <4 x float>* %F
-; CHECK: fadd <4 x float> %tmp, %tmp
-; CHECK-NEXT: extractelement <4 x float> %tmp3, i32 0
-}
-
-define i32 @test5(float %X) { ;; should turn into bitcast.
- %X_addr = alloca [4 x float]
- %X1 = getelementptr [4 x float], [4 x float]* %X_addr, i32 0, i32 2
- store float %X, float* %X1
- %a = bitcast float* %X1 to i32*
- %tmp = load i32, i32* %a
- ret i32 %tmp
-; CHECK-LABEL: @test5(
-; CHECK-NEXT: bitcast float %X to i32
-; CHECK-NEXT: ret i32
-}
-
-define i64 @test6(<2 x float> %X) {
- %X_addr = alloca <2 x float>
- store <2 x float> %X, <2 x float>* %X_addr
- %P = bitcast <2 x float>* %X_addr to i64*
- %tmp = load i64, i64* %P
- ret i64 %tmp
-; CHECK-LABEL: @test6(
-; CHECK: bitcast <2 x float> %X to i64
-; CHECK: ret i64
-}
-
-%struct.test7 = type { [6 x i32] }
-
-define void @test7() {
-entry:
- %memtmp = alloca %struct.test7, align 16
- %0 = bitcast %struct.test7* %memtmp to <4 x i32>*
- store <4 x i32> zeroinitializer, <4 x i32>* %0, align 16
- %1 = getelementptr inbounds %struct.test7, %struct.test7* %memtmp, i64 0, i32 0, i64 5
- store i32 0, i32* %1, align 4
- ret void
-; CHECK-LABEL: @test7(
-; CHECK-NOT: alloca
-; CHECK: and i192
-}
-
-; When promoting an alloca to a 1-element vector type, instructions that
-; produce that same vector type should not be changed to insert one element
-; into a new vector. <rdar://problem/14249078>
-define <1 x i64> @test8(<1 x i64> %a) {
-entry:
- %a.addr = alloca <1 x i64>, align 8
- %__a = alloca <1 x i64>, align 8
- %tmp = alloca <1 x i64>, align 8
- store <1 x i64> %a, <1 x i64>* %a.addr, align 8
- %0 = load <1 x i64>, <1 x i64>* %a.addr, align 8
- store <1 x i64> %0, <1 x i64>* %__a, align 8
- %1 = load <1 x i64>, <1 x i64>* %__a, align 8
- %2 = bitcast <1 x i64> %1 to <8 x i8>
- %3 = bitcast <8 x i8> %2 to <1 x i64>
- %vshl_n = shl <1 x i64> %3, <i64 4>
- store <1 x i64> %vshl_n, <1 x i64>* %tmp
- %4 = load <1 x i64>, <1 x i64>* %tmp
- ret <1 x i64> %4
-; CHECK-LABEL: @test8(
-; CHECK-NOT: alloca
-; CHECK-NOT: insertelement
-; CHECK: ret <1 x i64>
-}
diff --git a/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll b/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
deleted file mode 100644
index 154883122df6..000000000000
--- a/test/Transforms/ScalarRepl/vectors-with-mismatched-elements.ll
+++ /dev/null
@@ -1,27 +0,0 @@
-; RUN: opt -scalarrepl -S < %s | FileCheck %s
-; rdar://9786827
-
-; SROA should be able to handle the mixed types and eliminate the allocas here.
-
-; TODO: Currently it does this by falling back to integer "bags of bits".
-; With enough cleverness, it should be possible to convert between <3 x i32>
-; and <2 x i64> by using a combination of a bitcast and a shuffle.
-
-; CHECK: {
-; CHECK-NOT: alloca
-; CHECK: }
-
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128-n8:16:32"
-target triple = "i386-apple-darwin11.0.0"
-
-define <2 x i64> @foo() nounwind {
-entry:
- %retval = alloca <3 x i32>, align 16
- %z = alloca <4 x i32>, align 16
- %tmp = load <4 x i32>, <4 x i32>* %z
- %tmp1 = shufflevector <4 x i32> %tmp, <4 x i32> undef, <3 x i32> <i32 0, i32 1, i32 2>
- store <3 x i32> %tmp1, <3 x i32>* %retval
- %0 = bitcast <3 x i32>* %retval to <2 x i64>*
- %1 = load <2 x i64>, <2 x i64>* %0, align 1
- ret <2 x i64> %1
-}
diff --git a/test/Transforms/ScalarRepl/volatile.ll b/test/Transforms/ScalarRepl/volatile.ll
deleted file mode 100644
index 2a600b3b2797..000000000000
--- a/test/Transforms/ScalarRepl/volatile.ll
+++ /dev/null
@@ -1,13 +0,0 @@
-; RUN: opt < %s -scalarrepl -S | FileCheck %s
-
-define i32 @voltest(i32 %T) {
- %A = alloca {i32, i32}
- %B = getelementptr {i32,i32}, {i32,i32}* %A, i32 0, i32 0
- store volatile i32 %T, i32* %B
-; CHECK: store volatile
-
- %C = getelementptr {i32,i32}, {i32,i32}* %A, i32 0, i32 1
- %X = load volatile i32, i32* %C
-; CHECK: load volatile
- ret i32 %X
-}
diff --git a/test/Transforms/Scalarizer/crash-bug.ll b/test/Transforms/Scalarizer/crash-bug.ll
new file mode 100644
index 000000000000..36430a93be71
--- /dev/null
+++ b/test/Transforms/Scalarizer/crash-bug.ll
@@ -0,0 +1,24 @@
+; RUN: opt %s -scalarizer -S -o - | FileCheck %s
+
+; Don't crash
+
+define void @foo() {
+ br label %bb1
+
+bb2: ; preds = %bb1
+ %bb2_vec = shufflevector <2 x i16> <i16 0, i16 10000>,
+ <2 x i16> %bb1_vec,
+ <2 x i32> <i32 0, i32 3>
+ br label %bb1
+
+bb1: ; preds = %bb2, %0
+ %bb1_vec = phi <2 x i16> [ <i16 100, i16 200>, %0 ], [ %bb2_vec, %bb2 ]
+;CHECK: bb1:
+;CHECK: %bb1_vec.i0 = phi i16 [ 100, %0 ], [ 0, %bb2 ]
+;CHECK: %bb1_vec.i1 = phi i16 [ 200, %0 ], [ %bb1_vec.i1, %bb2 ]
+ br i1 undef, label %bb3, label %bb2
+
+bb3:
+ ret void
+}
+
diff --git a/test/Transforms/Scalarizer/dbginfo.ll b/test/Transforms/Scalarizer/dbginfo.ll
index 09252a09d4b4..8b60d69df99c 100644
--- a/test/Transforms/Scalarizer/dbginfo.ll
+++ b/test/Transforms/Scalarizer/dbginfo.ll
@@ -57,11 +57,10 @@ attributes #1 = { nounwind readnone }
!llvm.module.flags = !{!18, !26}
!llvm.ident = !{!19}
-!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: 0, file: !1, enums: !2, retainedTypes: !2, subprograms: !3, globals: !2, imports: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 3.4 (trunk 194134) (llvm/trunk 194126)", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !2, retainedTypes: !2, globals: !2, imports: !2)
!1 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
!2 = !{}
-!3 = !{!4}
-!4 = distinct !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !14)
+!4 = distinct !DISubprogram(name: "f1", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: true, unit: !0, scopeLine: 4, file: !1, scope: !5, type: !6, variables: !14)
!5 = !DIFile(filename: "/tmp/add.c", directory: "/home/richards/llvm/build")
!6 = !DISubroutineType(types: !7)
!7 = !{null, !8, !8, !8}
diff --git a/test/Transforms/Scalarizer/dbgloc-bug.ll b/test/Transforms/Scalarizer/dbgloc-bug.ll
new file mode 100644
index 000000000000..b3666be1abee
--- /dev/null
+++ b/test/Transforms/Scalarizer/dbgloc-bug.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -march=x86 -scalarizer %s | FileCheck %s
+
+; Reproducer for pr27938
+; https://llvm.org/bugs/show_bug.cgi?id=27938
+
+define i16 @f1() !dbg !5 {
+ ret i16 undef, !dbg !9
+}
+
+define void @f2() !dbg !10 {
+bb1:
+ %_tmp7 = tail call i16 @f1(), !dbg !13
+; CHECK: call i16 @f1(), !dbg !13
+ %broadcast.splatinsert5 = insertelement <4 x i16> undef, i16 %_tmp7, i32 0
+ %broadcast.splat6 = shufflevector <4 x i16> %broadcast.splatinsert5, <4 x i16> undef, <4 x i32> zeroinitializer
+ br label %vector.body
+
+vector.body:
+ br i1 undef, label %middle.block, label %vector.body
+
+middle.block:
+ ret void, !dbg !15
+}
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!3, !4}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, enums: !2, retainedTypes: !2)
+!1 = !DIFile(filename: "dbgloc-bug.c", directory: ".")
+!2 = !{}
+!3 = !{i32 2, !"Dwarf Version", i32 4}
+!4 = !{i32 2, !"Debug Info Version", i32 3}
+!5 = distinct !DISubprogram(name: "f1", scope: !1, file: !1, line: 9, type: !6, isLocal: false, isDefinition: true, scopeLine: 10, isOptimized: true, unit: !0, variables: !2)
+!6 = !DISubroutineType(types: !7)
+!7 = !{!8}
+!8 = !DIBasicType(name: "short", size: 16, align: 16, encoding: DW_ATE_signed)
+!9 = !DILocation(line: 11, column: 5, scope: !5)
+!10 = distinct !DISubprogram(name: "f2", scope: !1, file: !1, line: 14, type: !11, isLocal: false, isDefinition: true, scopeLine: 15, isOptimized: true, unit: !0, variables: !2)
+!11 = !DISubroutineType(types: !12)
+!12 = !{null}
+!13 = !DILocation(line: 24, column: 9, scope: !14)
+!14 = !DILexicalBlock(scope: !10, file: !1, line: 17, column: 5)
+!15 = !DILocation(line: 28, column: 1, scope: !10)
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
index 6f117697dded..5815ae627373 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/AMDGPU/split-gep-and-gvn-addrspace-addressing-modes.ll
@@ -5,7 +5,7 @@ target datalayout = "e-p:32:32-p1:64:64-p2:64:64-p3:32:32-p4:64:64-p5:32:32-p24:
@array = internal addrspace(2) constant [4096 x [32 x float]] zeroinitializer, align 4
; IR-LABEL: @sum_of_array(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [32 x float]], [4096 x [32 x float]] addrspace(2)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 1
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 32
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 33
@@ -37,7 +37,7 @@ define void @sum_of_array(i32 %x, i32 %y, float addrspace(1)* nocapture %output)
; Some of the indices go over the maximum mubuf offset, so don't split them.
; IR-LABEL: @sum_of_array_over_max_mubuf_offset(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(2)* [[BASE_PTR]], i64 255
; IR: add i32 %x, 256
; IR: getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(2)* @array2, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
@@ -70,7 +70,7 @@ define void @sum_of_array_over_max_mubuf_offset(i32 %x, i32 %y, float addrspace(
; DS instructions have a larger immediate offset, so make sure these are OK.
; IR-LABEL: @sum_of_lds_array_over_max_mubuf_offset(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [4096 x [4 x float]], [4096 x [4 x float]] addrspace(3)* @lds_array, i32 0, i32 %{{[a-zA-Z0-9]+}}, i32 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 255
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16128
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i32 16383
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
index e7b3545839c3..07004f90b0fe 100644
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
+++ b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/split-gep-and-gvn.ll
@@ -51,7 +51,9 @@ define void @sum_of_array(i32 %x, i32 %y, float* nocapture %output) {
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; TODO: GVN is unable to preserve the "inbounds" keyword on the first GEP. Need
+; some infrastructure changes to enable such optimizations.
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
@@ -94,7 +96,7 @@ define void @sum_of_array2(i32 %x, i32 %y, float* nocapture %output) {
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array2(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
@@ -144,7 +146,7 @@ define void @sum_of_array3(i32 %x, i32 %y, float* nocapture %output) {
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array3(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
@@ -190,7 +192,7 @@ define void @sum_of_array4(i32 %x, i32 %y, float* nocapture %output) {
; PTX: ld.shared.f32 {{%f[0-9]+}}, {{\[}}[[BASE_REG]]+132{{\]}}
; IR-LABEL: @sum_of_array4(
-; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr inbounds [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
+; IR: [[BASE_PTR:%[a-zA-Z0-9]+]] = getelementptr [32 x [32 x float]], [32 x [32 x float]] addrspace(3)* @array, i64 0, i64 %{{[a-zA-Z0-9]+}}, i64 %{{[a-zA-Z0-9]+}}
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 1
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 32
; IR: getelementptr inbounds float, float addrspace(3)* [[BASE_PTR]], i64 33
diff --git a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll b/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
deleted file mode 100644
index 601ca5291353..000000000000
--- a/test/Transforms/SeparateConstOffsetFromGEP/NVPTX/value-tracking-domtree.ll
+++ /dev/null
@@ -1,33 +0,0 @@
-; RUN: opt < %s -separate-const-offset-from-gep -value-tracking-dom-conditions -reassociate-geps-verify-no-dead-code -S | FileCheck %s
-
-target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
-target triple = "nvptx64-unknown-unknown"
-
-; if (i == 4)
-; p = &input[i | 3];
-;
-; =>
-;
-; if (i == 4) {
-; base = &input[i];
-; p = &base[3];
-; }
-;
-; We should treat (i | 3) as (i + 3) because i is guaranteed to be 4, which
-; does not share any set bits with 3.
-define float* @guarded_or(float* %input, i64 %i) {
-; CHECK-LABEL: @guarded_or(
-entry:
- %is4 = icmp eq i64 %i, 4
- br i1 %is4, label %then, label %exit
-
-then:
- %or = or i64 %i, 3
- %p = getelementptr inbounds float, float* %input, i64 %or
-; CHECK: [[base:[^ ]+]] = getelementptr float, float* %input, i64 %i
-; CHECK: getelementptr inbounds float, float* [[base]], i64 3
- ret float* %p
-
-exit:
- ret float* null
-}
diff --git a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
index 806659635e4b..a81e7a6caaa3 100644
--- a/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
+++ b/test/Transforms/SimplifyCFG/2003-08-17-FoldSwitch.ll
@@ -1,81 +1,104 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -simplifycfg -S | FileCheck %s
-; CHECK-NOT: switch
-
; Test normal folding
define i32 @test1() {
- switch i32 5, label %Default [
- i32 0, label %Foo
- i32 1, label %Bar
- i32 2, label %Baz
- i32 5, label %TheDest
- ]
-Default: ; preds = %0
- ret i32 -1
-Foo: ; preds = %0
- ret i32 -2
-Bar: ; preds = %0
- ret i32 -3
-Baz: ; preds = %0
- ret i32 -4
-TheDest: ; preds = %0
- ret i32 1234
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: TheDest:
+; CHECK-NEXT: ret i32 1234
+;
+ switch i32 5, label %Default [
+ i32 0, label %Foo
+ i32 1, label %Bar
+ i32 2, label %Baz
+ i32 5, label %TheDest
+ ]
+Default:
+ ret i32 -1
+Foo:
+ ret i32 -2
+Bar:
+ ret i32 -3
+Baz:
+ ret i32 -4
+TheDest:
+ ret i32 1234
}
; Test folding to default dest
define i32 @test2() {
- switch i32 3, label %Default [
- i32 0, label %Foo
- i32 1, label %Bar
- i32 2, label %Baz
- i32 5, label %TheDest
- ]
-Default: ; preds = %0
- ret i32 1234
-Foo: ; preds = %0
- ret i32 -2
-Bar: ; preds = %0
- ret i32 -5
-Baz: ; preds = %0
- ret i32 -6
-TheDest: ; preds = %0
- ret i32 -8
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: Default:
+; CHECK-NEXT: ret i32 1234
+;
+ switch i32 3, label %Default [
+ i32 0, label %Foo
+ i32 1, label %Bar
+ i32 2, label %Baz
+ i32 5, label %TheDest
+ ]
+Default:
+ ret i32 1234
+Foo:
+ ret i32 -2
+Bar:
+ ret i32 -5
+Baz:
+ ret i32 -6
+TheDest:
+ ret i32 -8
}
; Test folding all to same dest
define i32 @test3(i1 %C) {
- br i1 %C, label %Start, label %TheDest
+; CHECK-LABEL: @test3(
+; CHECK-NEXT: TheDest:
+; CHECK-NEXT: ret i32 1234
+;
+ br i1 %C, label %Start, label %TheDest
Start: ; preds = %0
- switch i32 3, label %TheDest [
- i32 0, label %TheDest
- i32 1, label %TheDest
- i32 2, label %TheDest
- i32 5, label %TheDest
- ]
-TheDest: ; preds = %Start, %Start, %Start, %Start, %Start, %0
- ret i32 1234
+ switch i32 3, label %TheDest [
+ i32 0, label %TheDest
+ i32 1, label %TheDest
+ i32 2, label %TheDest
+ i32 5, label %TheDest
+ ]
+TheDest:
+ ret i32 1234
}
; Test folding switch -> branch
define i32 @test4(i32 %C) {
- switch i32 %C, label %L1 [
- i32 0, label %L2
- ]
-L1: ; preds = %0
- ret i32 0
-L2: ; preds = %0
- ret i32 1
+; CHECK-LABEL: @test4(
+; CHECK-NEXT: L1:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 %C, 0
+; CHECK-NEXT: [[DOT:%.*]] = select i1 [[COND]], i32 1, i32 0
+; CHECK-NEXT: ret i32 [[DOT]]
+;
+ switch i32 %C, label %L1 [
+ i32 0, label %L2
+ ]
+L1:
+ ret i32 0
+L2:
+ ret i32 1
}
; Can fold into a cond branch!
define i32 @test5(i32 %C) {
- switch i32 %C, label %L1 [
- i32 0, label %L2
- i32 123, label %L1
- ]
-L1: ; preds = %0, %0
- ret i32 0
-L2: ; preds = %0
- ret i32 1
+; CHECK-LABEL: @test5(
+; CHECK-NEXT: L1:
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 %C, 0
+; CHECK-NEXT: [[DOT:%.*]] = select i1 [[COND]], i32 1, i32 0
+; CHECK-NEXT: ret i32 [[DOT]]
+;
+ switch i32 %C, label %L1 [
+ i32 0, label %L2
+ i32 123, label %L1
+ ]
+L1:
+ ret i32 0
+L2:
+ ret i32 1
}
diff --git a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
index 13ccad6a1eeb..21e9bc7b7f4e 100644
--- a/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/2008-05-16-PHIBlockMerge.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -simplifycfg -S > %t
; RUN: not grep "^BB.tomerge" %t
-; RUN: grep "^BB.nomerge" %t | count 2
+; RUN: grep "^BB.nomerge" %t | count 4
; ModuleID = '<stdin>'
declare i1 @foo()
@@ -54,24 +54,24 @@ Exit: ; preds = %Succ
ret void
}
-; This function can be merged
+; This function can't be merged (for keeping canonical loop structures)
define void @c() {
entry:
- br label %BB.tomerge
+ br label %BB.nomerge
-BB.tomerge: ; preds = %Common, %entry
+BB.nomerge: ; preds = %Common, %entry
br label %Succ
Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit
; This phi has identical values for Common and (through BB) Common,
; blocks can't be merged
- %b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
+ %b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Pre-Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
- br i1 %cond, label %BB.tomerge, label %Succ
+ br i1 %cond, label %BB.nomerge, label %Succ
Pre-Exit: ; preds = %Succ
; This adds a backedge, so the %b phi node gets a third branch and is
@@ -83,25 +83,25 @@ Exit: ; preds = %Pre-Exit
ret void
}
-; This function can be merged
+; This function can't be merged (for keeping canonical loop structures)
define void @d() {
entry:
- br label %BB.tomerge
+ br label %BB.nomerge
-BB.tomerge: ; preds = %Common, %entry
+BB.nomerge: ; preds = %Common, %entry
; This phi has a matching value (0) with below phi (0), so blocks
; can be merged.
%a = phi i32 [ 1, %entry ], [ 0, %Common ] ; <i32> [#uses=1]
br label %Succ
Succ: ; preds = %Common, %BB.tomerge
- %b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; <i32> [#uses=0]
+ %b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; <i32> [#uses=0]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
- br i1 %cond, label %BB.tomerge, label %Succ
+ br i1 %cond, label %BB.nomerge, label %Succ
Exit: ; preds = %Succ
ret void
@@ -110,21 +110,21 @@ Exit: ; preds = %Succ
; This function can be merged
define void @e() {
entry:
- br label %BB.tomerge
+ br label %Succ
-BB.tomerge: ; preds = %Use, %entry
+Succ: ; preds = %Use, %entry
; This phi is used somewhere else than Succ, but this should not prevent
; merging this block
%a = phi i32 [ 1, %entry ], [ 0, %Use ] ; <i32> [#uses=1]
- br label %Succ
+ br label %BB.tomerge
-Succ: ; preds = %BB.tomerge
+BB.tomerge: ; preds = %BB.tomerge
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Use, label %Exit
Use: ; preds = %Succ
%cond = call i1 @bar( i32 %a ) ; <i1> [#uses=1]
- br i1 %cond, label %BB.tomerge, label %Exit
+ br i1 %cond, label %Succ, label %Exit
Exit: ; preds = %Use, %Succ
ret void
diff --git a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
index 154677b0747f..dee2e9b3294f 100644
--- a/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
+++ b/test/Transforms/SimplifyCFG/2008-07-13-InfLoopMiscompile.ll
@@ -1,4 +1,5 @@
-; RUN: opt < %s -simplifycfg -S | grep "%outval = phi i32 .*mux"
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
; PR2540
; Outval should end up with a select from 0/2, not all constants.
@@ -8,29 +9,47 @@ target triple = "i386-pc-linux-gnu"
@.str = internal constant [4 x i8] c"%d\0A\00" ; <[4 x i8]*> [#uses=1]
define i32 @main() nounwind {
+; CHECK-LABEL: @main(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[L:%.*]] = load i32, i32* @g_37, align 4
+; CHECK-NEXT: [[CMPA:%.*]] = icmp ne i32 [[L]], 0
+; CHECK-NEXT: br i1 [[CMPA]], label %func_1.exit, label %mooseblock
+; CHECK: mooseblock:
+; CHECK-NEXT: [[CMPB:%.*]] = icmp eq i1 [[CMPA]], false
+; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMPB]], [[CMPA]]
+; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPB]], i32 0, i32 2
+; CHECK-NEXT: br i1 [[BRMERGE]], label %func_1.exit, label %infloop
+; CHECK: func_1.exit:
+; CHECK-NEXT: [[OUTVAL:%.*]] = phi i32 [ 1, %entry ], [ [[DOTMUX]], %mooseblock ]
+; CHECK-NEXT: [[POUT:%.*]] = tail call i32 (i8*, ...) @printf
+; CHECK-NEXT: ret i32 0
+; CHECK: infloop:
+; CHECK-NEXT: br label %infloop
+;
entry:
- %l = load i32, i32* @g_37, align 4 ; <i32> [#uses=1]
- %cmpa = icmp ne i32 %l, 0 ; <i1> [#uses=3]
- br i1 %cmpa, label %func_1.exit, label %mooseblock
+ %l = load i32, i32* @g_37, align 4 ; <i32> [#uses=1]
+ %cmpa = icmp ne i32 %l, 0 ; <i1> [#uses=3]
+ br i1 %cmpa, label %func_1.exit, label %mooseblock
mooseblock: ; preds = %entry
- %cmpb = icmp eq i1 %cmpa, false ; <i1> [#uses=2]
- br i1 %cmpb, label %monkeyblock, label %beeblock
+ %cmpb = icmp eq i1 %cmpa, false ; <i1> [#uses=2]
+ br i1 %cmpb, label %monkeyblock, label %beeblock
monkeyblock: ; preds = %monkeyblock, %mooseblock
- br i1 %cmpb, label %cowblock, label %monkeyblock
+ br i1 %cmpb, label %cowblock, label %monkeyblock
beeblock: ; preds = %beeblock, %mooseblock
- br i1 %cmpa, label %cowblock, label %beeblock
+ br i1 %cmpa, label %cowblock, label %beeblock
cowblock: ; preds = %beeblock, %monkeyblock
- %cowval = phi i32 [ 2, %beeblock ], [ 0, %monkeyblock ] ; <i32> [#uses=1]
- br label %func_1.exit
+ %cowval = phi i32 [ 2, %beeblock ], [ 0, %monkeyblock ] ; <i32> [#uses=1]
+ br label %func_1.exit
func_1.exit: ; preds = %cowblock, %entry
- %outval = phi i32 [ %cowval, %cowblock ], [ 1, %entry ] ; <i32> [#uses=1]
- %pout = tail call i32 (i8*, ...) @printf( i8* noalias getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %outval ) nounwind ; <i32> [#uses=0]
- ret i32 0
+ %outval = phi i32 [ %cowval, %cowblock ], [ 1, %entry ] ; <i32> [#uses=1]
+ %pout = tail call i32 (i8*, ...) @printf( i8* noalias getelementptr ([4 x i8], [4 x i8]* @.str, i32 0, i32 0), i32 %outval ) nounwind ; <i32> [#uses=0]
+ ret i32 0
}
-declare i32 @printf(i8*, ...) nounwind
+declare i32 @printf(i8*, ...) nounwind
+
diff --git a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
index b07ef970a20a..6e8593755c7d 100644
--- a/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/EqualPHIEdgeBlockMerge.ll
@@ -5,7 +5,7 @@
; RUN: not grep X: %t
; RUN: not grep 'switch i32[^U]+%U' %t
; RUN: not grep "^BB.tomerge" %t
-; RUN: grep "^BB.nomerge" %t | count 2
+; RUN: grep "^BB.nomerge" %t | count 4
;
; ModuleID = '<stdin>'
@@ -179,24 +179,24 @@ Exit: ; preds = %Succ
ret void
}
-; This function can be merged
+; This function can't be merged (for keeping canonical loop structures)
define void @c() {
entry:
- br label %BB.tomerge
+ br label %BB.nomerge
-BB.tomerge: ; preds = %Common, %entry
+BB.nomerge: ; preds = %Common, %entry
br label %Succ
Succ: ; preds = %Common, %BB.tomerge, %Pre-Exit
; This phi has identical values for Common and (through BB) Common,
; blocks can't be merged
- %b = phi i32 [ 1, %BB.tomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
+ %b = phi i32 [ 1, %BB.nomerge ], [ 1, %Common ], [ 2, %Pre-Exit ]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Pre-Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
- br i1 %cond, label %BB.tomerge, label %Succ
+ br i1 %cond, label %BB.nomerge, label %Succ
Pre-Exit: ; preds = %Succ
; This adds a backedge, so the %b phi node gets a third branch and is
@@ -208,25 +208,25 @@ Exit: ; preds = %Pre-Exit
ret void
}
-; This function can be merged
+; This function can't be merged (for keeping canonical loop structures)
define void @d() {
entry:
- br label %BB.tomerge
+ br label %BB.nomerge
-BB.tomerge: ; preds = %Common, %entry
+BB.nomerge: ; preds = %Common, %entry
; This phi has a matching value (0) with below phi (0), so blocks
; can be merged.
%a = phi i32 [ 1, %entry ], [ 0, %Common ] ; <i32> [#uses=1]
br label %Succ
Succ: ; preds = %Common, %BB.tomerge
- %b = phi i32 [ %a, %BB.tomerge ], [ 0, %Common ] ; <i32> [#uses=0]
+ %b = phi i32 [ %a, %BB.nomerge ], [ 0, %Common ] ; <i32> [#uses=0]
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Common, label %Exit
Common: ; preds = %Succ
%cond = call i1 @foo( ) ; <i1> [#uses=1]
- br i1 %cond, label %BB.tomerge, label %Succ
+ br i1 %cond, label %BB.nomerge, label %Succ
Exit: ; preds = %Succ
ret void
@@ -235,21 +235,21 @@ Exit: ; preds = %Succ
; This function can be merged
define void @e() {
entry:
- br label %BB.tomerge
+ br label %Succ
-BB.tomerge: ; preds = %Use, %entry
+Succ: ; preds = %Use, %entry
; This phi is used somewhere else than Succ, but this should not prevent
; merging this block
%a = phi i32 [ 1, %entry ], [ 0, %Use ] ; <i32> [#uses=1]
- br label %Succ
+ br label %BB.tomerge
-Succ: ; preds = %BB.tomerge
+BB.tomerge: ; preds = %Succ
%conde = call i1 @foo( ) ; <i1> [#uses=1]
br i1 %conde, label %Use, label %Exit
Use: ; preds = %Succ
%cond = call i1 @bar( i32 %a ) ; <i1> [#uses=1]
- br i1 %cond, label %BB.tomerge, label %Exit
+ br i1 %cond, label %Succ, label %Exit
Exit: ; preds = %Use, %Succ
ret void
diff --git a/test/Transforms/SimplifyCFG/InfLoop.ll b/test/Transforms/SimplifyCFG/InfLoop.ll
new file mode 100644
index 000000000000..a56076e42ce1
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/InfLoop.ll
@@ -0,0 +1,101 @@
+; RUN: opt < %s -simplifycfg -disable-output
+; END.
+
+target datalayout = "e-m:o-p:32:32-f64:32:64-v64:32:64-v128:32:128-a:0:32-n32-S32"
+target triple = "thumbv7-apple-ios9.0.0"
+
+%struct.anon = type { %struct.anon.0, i32, i32, %union.T1 }
+%struct.anon.0 = type { i32, [256 x i32], [256 x i8] }
+%union.T1 = type { %struct.F}
+%struct.F = type { i32 }
+
+@U = internal global %struct.anon zeroinitializer, align 4
+
+define void @main() {
+entry:
+ %0 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @U, i32 0, i32 2), align 4
+ %cmp.i = icmp eq i32 %0, -1
+ br i1 %cmp.i, label %if.then, label %if.end
+
+if.then: ; preds = %entry
+ br label %if.end
+
+if.end: ; preds = %entry, %if.then
+ %1 = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @U, i32 0, i32 2), align 4
+ %bf.load = load i32, i32* getelementptr inbounds (%struct.anon, %struct.anon* @U, i32 0, i32 3, i32 0, i32 0), align 4
+ %cmp = icmp slt i32 %0, 0
+ br i1 %cmp, label %if.end7, label %cond.false
+
+cond.false: ; preds = %if.end
+ %add = and i32 %bf.load, 30
+ %shl = add nuw nsw i32 %add, 2
+ br label %if.end7
+
+if.end7: ; preds = %if.end, %cond.false
+ %2 = icmp eq i32 %0, 1
+ br i1 %2, label %if.then9, label %if.else10
+
+if.then9: ; preds = %if.end7
+ br label %if.end29
+
+if.else10: ; preds = %if.end7
+ %cmp11 = icmp ugt i32 %0, 13
+ br i1 %cmp11, label %if.then12, label %if.else14
+
+if.then12: ; preds = %if.else10
+ br label %if.end26
+
+if.else14: ; preds = %if.else10
+ %tobool = icmp eq i1 %2, 0
+ br i1 %tobool, label %lor.rhs, label %if.then18
+
+lor.rhs: ; preds = %if.else14
+ %tobool.not.i = icmp eq i1 %2, 0
+ br i1 %tobool.not.i, label %if.else21, label %if.end.i54
+
+if.end.i54: ; preds = %lor.rhs
+ br label %for.cond.i
+
+for.cond.i: ; preds = %if.end6.i, %if.end.i54
+ %ix.0.i = phi i32 [ 0, %if.end.i54 ], [ %inc.i55, %if.end6.i ]
+ %ret.0.off0.i = phi i1 [ false, %if.end.i54 ], [ %.ret.0.off0.i, %if.end6.i ]
+ %cmp2.i = icmp ult i32 %ix.0.i, 2
+ br i1 %cmp2.i, label %for.body.i, label %TmpSimpleNeedExt.exit
+
+for.body.i: ; preds = %for.cond.i
+ %arrayidx.i = getelementptr inbounds %struct.anon, %struct.anon* @U, i32 0, i32 0, i32 2, i32 %ix.0.i
+ %elt = load i8, i8* %arrayidx.i, align 1
+ %cmp3.i = icmp sgt i8 %elt, 7
+ br i1 %cmp3.i, label %if.else21, label %if.end6.i
+
+if.end6.i: ; preds = %for.body.i
+ %cmp10.i = icmp ugt i8 %elt, 59
+ %.ret.0.off0.i = or i1 %ret.0.off0.i, %cmp10.i
+ %inc.i55 = add i32 %ix.0.i, 1
+ br label %for.cond.i
+
+TmpSimpleNeedExt.exit: ; preds = %for.body.i
+ br i1 %ret.0.off0.i, label %if.then18, label %if.else21
+
+if.then18: ; preds = %if.else14, %TmpSimpleNeedExt.exit
+ br label %if.end26
+
+if.else21: ; preds = %for.body.i, %lor.rhs, %TmpSimpleNeedExt.exit
+ br label %if.end26
+
+if.end26: ; preds = %if.then18, %if.else21, %if.then12
+ %cmp.i51 = icmp slt i32 %0, 7
+ br i1 %cmp.i51, label %if.then.i, label %if.end.i
+
+if.then.i: ; preds = %if.end26
+ br label %if.end.i
+
+if.end.i: ; preds = %if.then.i, %if.end26
+ br label %if.end29
+
+if.then2.i: ; preds = %if.end.i
+ br label %if.end29
+
+if.end29: ; preds = %if.end.i, %if.then2.i, %if.then9
+ ret void
+}
diff --git a/test/Transforms/SimplifyCFG/PR16069.ll b/test/Transforms/SimplifyCFG/PR16069.ll
index 0b3d67794513..9048b5680c75 100644
--- a/test/Transforms/SimplifyCFG/PR16069.ll
+++ b/test/Transforms/SimplifyCFG/PR16069.ll
@@ -1,9 +1,13 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; RUN: opt < %s -simplifycfg -S | FileCheck %s
@b = extern_weak global i32
define i32 @foo(i1 %y) {
-; CHECK: define i32 @foo(i1 %y) {
+; CHECK-LABEL: @foo(
+; CHECK: [[COND_I:%.*]] = phi i32 [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb2 ], [ 0, %0 ]
+; CHECK-NEXT: ret i32 [[COND_I]]
+;
br i1 %y, label %bb1, label %bb2
bb1:
br label %bb3
@@ -11,18 +15,19 @@ bb2:
br label %bb3
bb3:
%cond.i = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb2 ]
-; CHECK: phi i32 {{.*}} srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb2
ret i32 %cond.i
}
define i32 @foo2(i1 %x) {
-; CHECK: define i32 @foo2(i1 %x) {
+; CHECK-LABEL: @foo2(
+; CHECK: [[COND:%.*]] = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb0 ]
+; CHECK-NEXT: ret i32 [[COND]]
+;
bb0:
br i1 %x, label %bb1, label %bb2
bb1:
br label %bb2
bb2:
%cond = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb0 ]
-; CHECK: %cond = phi i32 [ 0, %bb1 ], [ srem (i32 1, i32 zext (i1 icmp eq (i32* @b, i32* null) to i32)), %bb0 ]
ret i32 %cond
}
diff --git a/test/Transforms/SimplifyCFG/PR27615-simplify-cond-br.ll b/test/Transforms/SimplifyCFG/PR27615-simplify-cond-br.ll
new file mode 100644
index 000000000000..872444d01655
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/PR27615-simplify-cond-br.ll
@@ -0,0 +1,68 @@
+; RUN: opt -S -simplifycfg -strip-debug < %s | FileCheck %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+; Test case for BUG-27615
+; Test that simplify cond branch produce same result for debug and non-debug builds
+; CHECK: select i1 %or.cond, i32 -1, i32 5
+; CHECK-NOT: bb1:
+
+; ModuleID = './csmith107.i.debug.ll'
+source_filename = "./csmith107.i.debug.ll"
+
+@a = global i16 0
+@b = global i32 0
+@c = global i16* null
+
+
+; Function Attrs: nounwind
+define i16 @fn1() #3 !dbg !15 {
+bb2:
+ store i32 -1, i32* @b, align 1
+ %_tmp1.pre = load i16, i16* @a, align 1, !dbg !19
+ %_tmp2.pre = load i16*, i16** @c, align 1
+ tail call void @llvm.dbg.value(metadata i16 6, i64 0, metadata !22, metadata !23), !dbg !24
+ tail call void @llvm.dbg.value(metadata i16 %_tmp1.pre, i64 0, metadata !25, metadata !23), !dbg !19
+ %_tmp3 = load i16, i16* %_tmp2.pre, align 1
+ %_tmp4 = icmp ne i16 %_tmp3, 0
+ %_tmp6 = icmp ne i16 %_tmp1.pre, 0
+ %or.cond = and i1 %_tmp6, %_tmp4
+ br i1 %or.cond, label %bb5, label %bb1
+
+bb1: ; preds = %bb2
+ store i32 5, i32* @b, align 1
+ br label %bb5
+
+bb5: ; preds = %bb1, %bb2
+ ret i16 0
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.value(metadata, i64, metadata, metadata) #4
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!12, !13}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C, file: !1, producer: "FlexC Compiler v6.36 (LLVM)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !2, retainedTypes: !2, globals: !3)
+!1 = !DIFile(filename: "csmith107.i.c", directory: "/tmp")
+!2 = !{}
+!3 = !{!4, !6, !10}
+!4 = !DIGlobalVariable(name: "a", scope: null, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true, variable: i16* @a)
+!5 = !DIBasicType(name: "int", size: 16, align: 16, encoding: DW_ATE_signed)
+!6 = !DIGlobalVariable(name: "b", scope: null, file: !1, line: 3, type: !7, isLocal: false, isDefinition: true, variable: i32* @b)
+!7 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !1, line: 1, baseType: !8)
+!8 = !DIDerivedType(tag: DW_TAG_typedef, name: "__u32_t", file: !1, baseType: !9)
+!9 = !DIBasicType(name: "unsigned long", size: 32, align: 16, encoding: DW_ATE_unsigned)
+!10 = !DIGlobalVariable(name: "c", scope: null, file: !1, line: 4, type: !11, isLocal: false, isDefinition: true, variable: i16** @c)
+!11 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !5, size: 16, align: 16)
+!12 = !{i32 2, !"Dwarf Version", i32 4}
+!13 = !{i32 2, !"Debug Info Version", i32 3}
+!15 = distinct !DISubprogram(name: "fn1", scope: !1, file: !1, line: 5, type: !16, isLocal: false, isDefinition: true, scopeLine: 5, isOptimized: false, unit: !0, variables: !2)
+!16 = !DISubroutineType(types: !17)
+!17 = !{!5}
+!19 = !DILocation(line: 8, column: 16, scope: !20)
+!20 = !DILexicalBlock(scope: !15, file: !1, line: 7, column: 29)
+!22 = !DILocalVariable(name: "d", scope: !20, line: 8, type: !5)
+!23 = !DIExpression()
+!24 = !DILocation(line: 8, column: 9, scope: !20)
+!25 = !DILocalVariable(name: "e", scope: !20, line: 8, type: !5)
+
diff --git a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
index 555082921b96..85b987060181 100644
--- a/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
+++ b/test/Transforms/SimplifyCFG/PhiBlockMerge.ll
@@ -1,24 +1,29 @@
+; NOTE: Assertions have been autogenerated by update_test_checks.py
; Test merging of blocks that only have PHI nodes in them
;
; RUN: opt < %s -simplifycfg -S | FileCheck %s
;
define i32 @test(i1 %a, i1 %b) {
- br i1 %a, label %M, label %O
+; CHECK-LABEL: @test(
+; CHECK: M:
+; CHECK-NEXT: [[DOT:%.*]] = select i1 %b, i32 0, i32 1
+; CHECK-NEXT: [[W:%.*]] = select i1 %a, i32 2, i32 [[DOT]]
+; CHECK-NEXT: [[R:%.*]] = add i32 [[W]], 1
+; CHECK-NEXT: ret i32 [[R]]
+;
+ br i1 %a, label %M, label %O
O: ; preds = %0
-; CHECK: select i1 %b, i32 0, i32 1
-; CHECK-NOT: phi
- br i1 %b, label %N, label %Q
+ br i1 %b, label %N, label %Q
Q: ; preds = %O
- br label %N
+ br label %N
N: ; preds = %Q, %O
- ; This block should be foldable into M
- %Wp = phi i32 [ 0, %O ], [ 1, %Q ] ; <i32> [#uses=1]
- br label %M
+ ; This block should be foldable into M
+ %Wp = phi i32 [ 0, %O ], [ 1, %Q ] ; <i32> [#uses=1]
+ br label %M
M: ; preds = %N, %0
- %W = phi i32 [ %Wp, %N ], [ 2, %0 ] ; <i32> [#uses=1]
- %R = add i32 %W, 1 ; <i32> [#uses=1]
- ret i32 %R
-; CHECK: ret
+ %W = phi i32 [ %Wp, %N ], [ 2, %0 ] ; <i32> [#uses=1]
+ %R = add i32 %W, 1 ; <i32> [#uses=1]
+ ret i32 %R
}
diff --git a/test/Transforms/SimplifyCFG/PhiEliminate2.ll b/test/Transforms/SimplifyCFG/PhiEliminate2.ll
index 0b3893d520db..0ca65286da35 100644
--- a/test/Transforms/SimplifyCFG/PhiEliminate2.ll
+++ b/test/Transforms/SimplifyCFG/PhiEliminate2.ll
@@ -1,17 +1,34 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
-define i32 @test(i1 %C, i32 %V1, i32 %V2, i16 %V3) {
+; Use a select to make this a single BB.
+; Also, make sure the profile metadata is propagated to the select (PR26636).
+
+define i32 @FoldTwoEntryPHINode(i1 %C, i32 %V1, i32 %V2, i16 %V3) {
entry:
- br i1 %C, label %then, label %else
-then: ; preds = %entry
- %V4 = or i32 %V2, %V1 ; <i32> [#uses=1]
+ br i1 %C, label %then, label %else, !prof !0, !unpredictable !1
+then:
+ %V4 = or i32 %V2, %V1
br label %Cont
-else: ; preds = %entry
- %V5 = sext i16 %V3 to i32 ; <i32> [#uses=1]
+else:
+ %V5 = sext i16 %V3 to i32
br label %Cont
-Cont: ; preds = %then, %else
- %V6 = phi i32 [ %V5, %else ], [ %V4, %then ] ; <i32> [#uses=0]
- call i32 @test( i1 false, i32 0, i32 0, i16 0 ) ; <i32>:0 [#uses=0]
+Cont:
+ %V6 = phi i32 [ %V5, %else ], [ %V4, %then ]
+ call i32 @FoldTwoEntryPHINode( i1 false, i32 0, i32 0, i16 0 )
ret i32 %V1
+
+; CHECK-LABEL: @FoldTwoEntryPHINode(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: %V5 = sext i16 %V3 to i32
+; CHECK-NEXT: %V4 = or i32 %V2, %V1
+; CHECK-NEXT: %V6 = select i1 %C, i32 %V4, i32 %V5, !prof !0, !unpredictable !1
+; CHECK-NEXT: %0 = call i32 @FoldTwoEntryPHINode(i1 false, i32 0, i32 0, i16 0)
+; CHECK-NEXT: ret i32 %V1
}
+!0 = !{!"branch_weights", i32 3, i32 5}
+!1 = !{}
+
+; CHECK: !0 = !{!"branch_weights", i32 3, i32 5}
+; CHECK: !1 = !{}
+
diff --git a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
index 87872a6a8a10..be612b288b77 100644
--- a/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
+++ b/test/Transforms/SimplifyCFG/UnreachableEliminate.ll
@@ -96,3 +96,34 @@ bb2:
store i8 2, i8* %ptr.2, align 8
ret void
}
+
+define i32 @test7(i1 %X) {
+entry:
+ br i1 %X, label %if, label %else
+
+if:
+ call void undef()
+ br label %else
+
+else:
+ %phi = phi i32 [ 0, %entry ], [ 1, %if ]
+ ret i32 %phi
+}
+; CHECK-LABEL: define i32 @test7(
+; CHECK-NOT: call
+; CHECK: ret i32 0
+
+define void @test8(i1 %X, void ()* %Y) {
+entry:
+ br i1 %X, label %if, label %else
+
+if:
+ br label %else
+
+else:
+ %phi = phi void ()* [ %Y, %entry ], [ null, %if ]
+ call void %phi()
+ ret void
+}
+; CHECK-LABEL: define void @test8(
+; CHECK: call void %Y(
diff --git a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
index 6953cf9c8b33..bae8c1dc5a4b 100644
--- a/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
+++ b/test/Transforms/SimplifyCFG/X86/switch_to_lookup_table.ll
@@ -1306,8 +1306,8 @@ l6:
; Speculation depth must be limited to avoid a zero-cost instruction cycle.
; CHECK-LABEL: @PR26308(
-; CHECK: cleanup4:
-; CHECK-NEXT: br label %cleanup4
+; CHECK: while.body:
+; CHECK-NEXT: br label %while.body
define i32 @PR26308(i1 %B, i64 %load) {
entry:
diff --git a/test/Transforms/SimplifyCFG/attr-convergent.ll b/test/Transforms/SimplifyCFG/attr-convergent.ll
new file mode 100644
index 000000000000..a5f363d055a6
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/attr-convergent.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
+
+; Checks that the SimplifyCFG pass won't duplicate a call to a function marked
+; convergent.
+;
+; CHECK: call void @barrier
+; CHECK-NOT: call void @barrier
+define void @check(i1 %cond, i32* %out) {
+entry:
+ br i1 %cond, label %if.then, label %if.end
+
+if.then:
+ store i32 5, i32* %out
+ br label %if.end
+
+if.end:
+ %x = phi i1 [ true, %entry ], [ false, %if.then ]
+ call void @barrier()
+ br i1 %x, label %cond.end, label %cond.false
+
+cond.false:
+ br label %cond.end
+
+cond.end:
+ ret void
+}
+
+declare void @barrier() convergent
diff --git a/test/Transforms/SimplifyCFG/basictest.ll b/test/Transforms/SimplifyCFG/basictest.ll
index d4a9c81e506d..686a535a32d9 100644
--- a/test/Transforms/SimplifyCFG/basictest.ll
+++ b/test/Transforms/SimplifyCFG/basictest.ll
@@ -25,6 +25,54 @@ define void @test3(i1 %T) {
; CHECK-NEXT: ret void
}
+; Folding branch to a common destination.
+; CHECK-LABEL: @test4_fold
+; CHECK: %cmp1 = icmp eq i32 %a, %b
+; CHECK: %cmp2 = icmp ugt i32 %a, 0
+; CHECK: %or.cond = and i1 %cmp1, %cmp2
+; CHECK: br i1 %or.cond, label %else, label %untaken
+; CHECK-NOT: taken:
+; CHECK: ret void
+define void @test4_fold(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, 0
+ br i1 %cmp2, label %else, label %untaken
+
+else:
+ call void @foo()
+ ret void
+
+untaken:
+ ret void
+}
+
+; Prefer a simplification based on a dominating condition rather than folding a
+; branch to a common destination.
+; CHECK-LABEL: @test4
+; CHECK-NOT: br
+; CHECK-NOT: br
+; CHECK-NOT: call
+; CHECK: ret void
+define void @test4_no_fold(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, %b
+ br i1 %cmp2, label %else, label %untaken
+
+else:
+ call void @foo()
+ ret void
+
+untaken:
+ ret void
+}
+
+declare void @foo()
; PR5795
define void @test5(i32 %A) {
@@ -75,10 +123,10 @@ declare i8 @test6g(i8*)
!0 = !{!1, !1, i64 0}
!1 = !{!"foo"}
!2 = !{i8 0, i8 2}
-!3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !7, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: 1, enums: !4, subprograms: !4, globals: !4)
+!3 = distinct !DICompileUnit(language: DW_LANG_C99, file: !7, producer: "clang", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !4)
!4 = !{}
!5 = !DILocation(line: 23, scope: !6)
-!6 = distinct !DISubprogram(name: "foo", scope: !3, file: !7, line: 1, type: !DISubroutineType(types: !4), isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, variables: !4)
+!6 = distinct !DISubprogram(name: "foo", scope: !3, file: !7, line: 1, type: !DISubroutineType(types: !4), isLocal: false, isDefinition: true, scopeLine: 1, flags: DIFlagPrototyped, isOptimized: false, unit: !3, variables: !4)
!7 = !DIFile(filename: "foo.c", directory: "/")
!8 = !{i32 2, !"Dwarf Version", i32 2}
!9 = !{i32 2, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
index 34871063bbcc..3c01e71f54a5 100644
--- a/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
+++ b/test/Transforms/SimplifyCFG/branch-fold-dbg.ll
@@ -39,11 +39,11 @@ BB5: ; preds = %BB3, %BB2, %BB1, %E
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
-!0 = distinct !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !15, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", line: 231, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !15, scope: !1, type: !3)
!1 = !DIFile(filename: "a.c", directory: "/private/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: 0, file: !15, enums: !4, retainedTypes: !4)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang (trunk 129006)", isOptimized: true, emissionKind: FullDebug, file: !15, enums: !4, retainedTypes: !4)
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 131, column: 2, scope: !0)
diff --git a/test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll b/test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
new file mode 100644
index 000000000000..7afde1ff8d9a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/combine-parallel-mem-md.ll
@@ -0,0 +1,55 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+; Function Attrs: norecurse nounwind uwtable
+define void @Test(i32* nocapture %res, i32* nocapture readnone %c, i32* nocapture readonly %d, i32* nocapture readonly %p) #0 {
+entry:
+ br label %for.body
+
+; CHECK-LABEL: @Test
+; CHECK: load i32, i32* {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
+; CHECK: load i32, i32* {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
+; CHECK: store i32 {{.*}}, align 4, !llvm.mem.parallel_loop_access !0
+; CHECK-NOT: load
+; CHECK-NOT: store
+
+for.body: ; preds = %cond.end, %entry
+ %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
+ %arrayidx = getelementptr inbounds i32, i32* %p, i64 %indvars.iv
+ %0 = load i32, i32* %arrayidx, align 4, !llvm.mem.parallel_loop_access !0
+ %cmp1 = icmp eq i32 %0, 0
+ br i1 %cmp1, label %cond.true, label %cond.false
+
+cond.false: ; preds = %for.body
+ %arrayidx3 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
+ %v = load i32, i32* %arrayidx3, align 4, !llvm.mem.parallel_loop_access !0
+ %arrayidx7 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
+ %1 = load i32, i32* %arrayidx7, align 4, !llvm.mem.parallel_loop_access !0
+ %add = add nsw i32 %1, %v
+ br label %cond.end
+
+cond.true: ; preds = %for.body
+ %arrayidx4 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
+ %w = load i32, i32* %arrayidx4, align 4, !llvm.mem.parallel_loop_access !0
+ %arrayidx8 = getelementptr inbounds i32, i32* %d, i64 %indvars.iv
+ %2 = load i32, i32* %arrayidx8, align 4, !llvm.mem.parallel_loop_access !0
+ %add2 = add nsw i32 %2, %w
+ br label %cond.end
+
+cond.end: ; preds = %for.body, %cond.false
+ %cond = phi i32 [ %add, %cond.false ], [ %add2, %cond.true ]
+ %arrayidx9 = getelementptr inbounds i32, i32* %res, i64 %indvars.iv
+ store i32 %cond, i32* %arrayidx9, align 4, !llvm.mem.parallel_loop_access !0
+ %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
+ %exitcond = icmp eq i64 %indvars.iv.next, 16
+ br i1 %exitcond, label %for.end, label %for.body, !llvm.loop !0
+
+for.end: ; preds = %cond.end
+ ret void
+}
+
+attributes #0 = { norecurse nounwind uwtable }
+
+!0 = distinct !{!0, !1}
+!1 = !{!"llvm.loop.vectorize.enable", i1 true}
diff --git a/test/Transforms/SimplifyCFG/empty-cleanuppad.ll b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
index 57b362889955..9f657a81a05b 100644
--- a/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
+++ b/test/Transforms/SimplifyCFG/empty-cleanuppad.ll
@@ -404,6 +404,59 @@ catch.cont: ; preds = %catch
return: ; preds = %invoke.cont, %catch.cont
ret void
}
+; CHECK-LABEL: define i32 @f9()
+; CHECK: entry:
+; CHECK: invoke void @"\01??1S2@@QEAA@XZ"(
+; CHECK-NOT: cleanuppad
+; CHECK: catch.dispatch:
+; CHECK: }
+define i32 @f9() personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ %s = alloca i8, align 1
+ call void @llvm.lifetime.start(i64 1, i8* nonnull %s)
+ %bc = bitcast i8* %s to %struct.S2*
+ invoke void @"\01??1S2@@QEAA@XZ"(%struct.S2* %bc)
+ to label %try.cont unwind label %ehcleanup
+
+ehcleanup:
+ %cleanup.pad = cleanuppad within none []
+ call void @llvm.lifetime.end(i64 1, i8* nonnull %s)
+ cleanupret from %cleanup.pad unwind label %catch.dispatch
+
+catch.dispatch:
+ %catch.switch = catchswitch within none [label %catch] unwind to caller
+
+catch:
+ %catch.pad = catchpad within %catch.switch [i8* null, i32 0, i8* null]
+ catchret from %catch.pad to label %try.cont
+
+try.cont:
+ ret i32 0
+}
+
+; CHECK-LABEL: define void @f10(
+define void @f10(i32 %V) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @g()
+ to label %unreachable unwind label %cleanup
+; CHECK: call void @g()
+; CHECK-NEXT: unreachable
+
+unreachable:
+ unreachable
+
+cleanup:
+ %cp = cleanuppad within none []
+ switch i32 %V, label %cleanupret1 [
+ i32 0, label %cleanupret2
+ ]
+
+cleanupret1:
+ cleanupret from %cp unwind to caller
+
+cleanupret2:
+ cleanupret from %cp unwind to caller
+}
%struct.S = type { i8 }
%struct.S2 = type { i8 }
@@ -413,3 +466,5 @@ declare void @use_x(i32 %x)
declare i32 @__CxxFrameHandler3(...)
+declare void @llvm.lifetime.start(i64, i8* nocapture)
+declare void @llvm.lifetime.end(i64, i8* nocapture)
diff --git a/test/Transforms/SimplifyCFG/guards.ll b/test/Transforms/SimplifyCFG/guards.ll
new file mode 100644
index 000000000000..71144d7ac24f
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/guards.ll
@@ -0,0 +1,86 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+
+declare void @llvm.experimental.guard(i1, ...)
+
+define i32 @f_0(i1 %c) {
+; CHECK-LABEL: @f_0(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+entry:
+ call void(i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+ ret i32 10
+}
+
+define i32 @f_1(i1 %c) {
+; Demonstrate that we (intentionally) do not simplify a guard on undef
+
+; CHECK-LABEL: @f_1(
+; CHECK: ret i32 10
+; CHECK: ret i32 20
+
+entry:
+ br i1 %c, label %true, label %false
+
+true:
+ call void(i1, ...) @llvm.experimental.guard(i1 undef) [ "deopt"() ]
+ ret i32 10
+
+false:
+ ret i32 20
+}
+
+define i32 @f_2(i1 %c, i32* %buf) {
+; CHECK-LABEL: @f_2(
+entry:
+ br i1 %c, label %guard_block, label %merge_block
+
+guard_block:
+ call void(i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+ %val = load i32, i32* %buf
+ br label %merge_block
+
+merge_block:
+ %to.return = phi i32 [ %val, %guard_block ], [ 50, %entry ]
+ ret i32 %to.return
+; CHECK: guard_block:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+
+; CHECK: merge_block:
+; CHECK-NEXT: ret i32 50
+}
+
+define i32 @f_3(i1* %c, i32* %buf) {
+; CHECK-LABEL: @f_3(
+entry:
+ %c0 = load volatile i1, i1* %c
+ br i1 %c0, label %guard_block, label %merge_block
+
+guard_block:
+ call void(i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+ %val = load i32, i32* %buf
+ %c2 = load volatile i1, i1* %c
+ br i1 %c2, label %left, label %right
+
+merge_block:
+ %c1 = load volatile i1, i1* %c
+ br i1 %c1, label %left, label %right
+
+left:
+ %val.left = phi i32 [ %val, %guard_block ], [ 50, %merge_block ]
+ ret i32 %val.left
+
+right:
+ %val.right = phi i32 [ %val, %guard_block ], [ 100, %merge_block ]
+ ret i32 %val.right
+
+; CHECK: guard_block:
+; CHECK-NEXT: call void (i1, ...) @llvm.experimental.guard(i1 false) [ "deopt"() ]
+; CHECK-NEXT: unreachable
+
+; CHECK: merge_block:
+; CHECK-NEXT: %c1 = load volatile i1, i1* %c
+; CHECK-NEXT: [[VAL:%[^ ]]] = select i1 %c1, i32 50, i32 100
+; CHECK-NEXT: ret i32 [[VAL]]
+}
diff --git a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
index 887373a2d3db..e91fa731c59d 100644
--- a/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
+++ b/test/Transforms/SimplifyCFG/hoist-dbgvalue.ll
@@ -30,17 +30,16 @@ declare i32 @bar(...)
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
!llvm.module.flags = !{!21}
-!llvm.dbg.sp = !{!0}
+!llvm.dbg.cu = !{!2}
-!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, file: !20, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, flags: DIFlagPrototyped, isOptimized: false, unit: !2, file: !20, scope: !1, type: !3)
!1 = !DIFile(filename: "b.c", directory: "/private/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: 0, file: !20, enums: !8, retainedTypes: !8)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang", isOptimized: true, emissionKind: FullDebug, file: !20)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
!6 = !DILocalVariable(name: "i", line: 2, arg: 1, scope: !0, file: !1, type: !5)
!7 = !DILocation(line: 2, column: 13, scope: !0)
-!8 = !{i32 0}
!9 = !DILocalVariable(name: "k", line: 3, scope: !10, file: !1, type: !5)
!10 = distinct !DILexicalBlock(line: 2, column: 16, file: !20, scope: !0)
!11 = !DILocation(line: 3, column: 12, scope: !10)
diff --git a/test/Transforms/SimplifyCFG/implied-cond-matching-false-dest.ll b/test/Transforms/SimplifyCFG/implied-cond-matching-false-dest.ll
new file mode 100644
index 000000000000..1d29813ecfa2
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/implied-cond-matching-false-dest.ll
@@ -0,0 +1,339 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+
+declare void @is(i1)
+
+; If A == B is false then A == B is implied false.
+; CHECK-LABEL: @test_eq_eq
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_eq_eq(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp eq i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A == B is false then A != B is implied true.
+; CHECK-LABEL: @test_eq_ne
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_eq_ne(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ne i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A != B is false then A != B is implied false.
+; CHECK-LABEL: @test_ne_ne
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_ne(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ne i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A != B is false then A >u B is implied false.
+; CHECK-LABEL: @test_ne_ugt
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_ugt(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A != B is false then A >=u B is implied true.
+; CHECK-LABEL: @test_ne_uge
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ne_uge(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp uge i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A != B is false then A <u B is implied false.
+; CHECK-LABEL: @test_ne_ult
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A != B is false then A <=u B is implied true.
+; CHECK-LABEL: @test_ne_ule
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ne_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A >u B is false then A >u B is implied false.
+; CHECK-LABEL: @test_ugt_ugt
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ugt_ugt(i32 %a, i32 %b) {
+ %cmp1 = icmp ugt i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A >u B is false then A <=u B is implied true.
+; CHECK-LABEL: @test_ugt_ule
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ugt_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp ugt i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A >=u B is false then A >=u B is implied false.
+; CHECK-LABEL: @test_uge_uge
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_uge_uge(i32 %a, i32 %b) {
+ %cmp1 = icmp uge i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp uge i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A >=u B is false then A <u B is implied true.
+; CHECK-LABEL: @test_uge_ult
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_uge_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp uge i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A >=u B is false then A <=u B is implied true.
+; CHECK-LABEL: @test_uge_ule
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_uge_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp uge i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A <u B is false then A <u B is implied false.
+; CHECK-LABEL: @test_ult_ult
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ult_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp ult i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; If A <=u B is false then A <=u B is implied false.
+; CHECK-LABEL: @test_ule_ule
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ule_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp ule i32 %a, %b
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
diff --git a/test/Transforms/SimplifyCFG/implied-cond-matching-imm.ll b/test/Transforms/SimplifyCFG/implied-cond-matching-imm.ll
new file mode 100644
index 000000000000..60ef81365982
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/implied-cond-matching-imm.ll
@@ -0,0 +1,123 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+
+; cmp1 implies cmp2 is false
+; CHECK-LABEL: @test1
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test1(i32 %a) {
+ %cmp1 = icmp eq i32 %a, 0
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp eq i32 %a, 1
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; cmp1 implies cmp2 is false
+; CHECK-LABEL: @test2
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test2(i32 %a) {
+ %cmp1 = icmp ugt i32 %a, 5
+ br i1 %cmp1, label %untaken, label %taken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, 6
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; cmp1 implies cmp2 is false
+; CHECK-LABEL: @test3
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test3(i32 %a) {
+ %cmp1 = icmp ugt i32 %a, 1
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp eq i32 %a, 0
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; cmp1 implies cmp2 is true
+; CHECK-LABEL: @test4
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test4(i32 %a) {
+ %cmp1 = icmp sgt i32 %a, 1
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, 0
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; cmp1 implies cmp2 is true
+; CHECK-LABEL: @test5
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test5(i32 %a) {
+ %cmp1 = icmp sgt i32 %a, 5
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sgt i32 %a, -1
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+declare void @is(i1)
diff --git a/test/Transforms/SimplifyCFG/implied-cond-matching.ll b/test/Transforms/SimplifyCFG/implied-cond-matching.ll
new file mode 100644
index 000000000000..33fc016bd386
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/implied-cond-matching.ll
@@ -0,0 +1,1029 @@
+; RUN: opt %s -S -simplifycfg | FileCheck %s
+
+declare void @dead()
+declare void @alive()
+declare void @is(i1)
+
+; Test same condition with swapped operands.
+; void test_swapped_ops(unsigned a, unsigned b) {
+; if (a > b) {
+; if (b > a) <- always false
+; dead();
+; alive();
+; }
+; }
+;
+; CHECK-LABEL: @test_swapped_ops
+; CHECK-NOT: call void @dead()
+; CHECK: call void @alive()
+; CHECK: ret
+define void @test_swapped_ops(i32 %a, i32 %b) {
+entry:
+ %cmp = icmp ugt i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.end3
+
+if.then:
+ %cmp1 = icmp ugt i32 %b, %a
+ br i1 %cmp1, label %if.then2, label %if.end
+
+if.then2:
+ call void @dead()
+ br label %if.end
+
+if.end:
+ call void @alive()
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; void test_swapped_pred(unsigned a, unsigned b) {
+; if (a > b) {
+; alive();
+; if (b < a) <- always true; remove branch
+; alive();
+; }
+; }
+;
+; CHECK-LABEL: @test_swapped_pred
+; CHECK: call void @alive()
+; CHECK-NEXT: call void @alive()
+; CHECK: ret
+define void @test_swapped_pred(i32 %a, i32 %b) {
+entry:
+ %cmp = icmp ugt i32 %a, %b
+ br i1 %cmp, label %if.then, label %if.end3
+
+if.then:
+ call void @alive()
+ %cmp1 = icmp ult i32 %b, %a
+ br i1 %cmp1, label %if.then2, label %if.end3
+
+if.then2:
+ call void @alive()
+ br label %if.end3
+
+if.end3:
+ ret void
+}
+
+; A == B implies A == B is true.
+; CHECK-LABEL: @test_eq_eq
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_eq_eq(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp eq i32 %a, %b
+ br i1 %cmp2, label %eq_eq_istrue, label %eq_eq_isfalse
+
+eq_eq_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_eq_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A != B is false.
+; CHECK-LABEL: @test_eq_ne
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_eq_ne(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ne i32 %a, %b
+ br i1 %cmp2, label %eq_ne_istrue, label %eq_ne_isfalse
+
+eq_ne_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_ne_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A >u B is false.
+; CHECK-LABEL: @test_eq_ugt
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_eq_ugt(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, %b
+ br i1 %cmp2, label %eq_ugt_istrue, label %eq_ugt_isfalse
+
+eq_ugt_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_ugt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A >=u B is true.
+; CHECK-LABEL: @test_eq_uge
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_eq_uge(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp uge i32 %a, %b
+ br i1 %cmp2, label %eq_uge_istrue, label %eq_uge_isfalse
+
+eq_uge_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_uge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A <u B is false.
+; CHECK-LABEL: @test_eq_ult
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_eq_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %eq_ult_istrue, label %eq_ult_isfalse
+
+eq_ult_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_ult_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A <=u B is true.
+; CHECK-LABEL: @test_eq_ule
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_eq_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %eq_ule_istrue, label %eq_ule_isfalse
+
+eq_ule_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_ule_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A >s B is false.
+; CHECK-LABEL: @test_eq_sgt
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_eq_sgt(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sgt i32 %a, %b
+ br i1 %cmp2, label %eq_sgt_istrue, label %eq_sgt_isfalse
+
+eq_sgt_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_sgt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A >=s B is true.
+; CHECK-LABEL: @test_eq_sge
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_eq_sge(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sge i32 %a, %b
+ br i1 %cmp2, label %eq_sge_istrue, label %eq_sge_isfalse
+
+eq_sge_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_sge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A <s B is false.
+; CHECK-LABEL: @test_eq_slt
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_eq_slt(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp slt i32 %a, %b
+ br i1 %cmp2, label %eq_slt_istrue, label %eq_slt_isfalse
+
+eq_slt_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_slt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A == B implies A <=s B is true.
+; CHECK-LABEL: @test_eq_sle
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_eq_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp eq i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %eq_sle_istrue, label %eq_sle_isfalse
+
+eq_sle_istrue:
+ call void @is(i1 true)
+ ret void
+
+eq_sle_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A != B is true.
+; CHECK-LABEL: @test_ne_ne
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ne_ne(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ne i32 %a, %b
+ br i1 %cmp2, label %ne_ne_istrue, label %ne_ne_isfalse
+
+ne_ne_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_ne_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A >u B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_ugt
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_ugt(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, %b
+ br i1 %cmp2, label %ne_ugt_istrue, label %ne_ugt_isfalse
+
+ne_ugt_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_ugt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A >=u B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_uge
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_uge(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp uge i32 %a, %b
+ br i1 %cmp2, label %ne_uge_istrue, label %ne_uge_isfalse
+
+ne_uge_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_uge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A <u B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_ult
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %ne_ult_istrue, label %ne_ult_isfalse
+
+ne_ult_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_ult_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A <=u B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_ule
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %ne_ule_istrue, label %ne_ule_isfalse
+
+ne_ule_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_ule_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A >s B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_sgt
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_sgt(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sgt i32 %a, %b
+ br i1 %cmp2, label %ne_sgt_istrue, label %ne_sgt_isfalse
+
+ne_sgt_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_sgt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A >=s B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_sge
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_sge(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sge i32 %a, %b
+ br i1 %cmp2, label %ne_sge_istrue, label %ne_sge_isfalse
+
+ne_sge_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_sge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A <s B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_slt
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_slt(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp slt i32 %a, %b
+ br i1 %cmp2, label %ne_slt_istrue, label %ne_slt_isfalse
+
+ne_slt_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_slt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A != B implies A <=s B is unknown to be true or false.
+; CHECK-LABEL: @test_ne_sle
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ne_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp ne i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %ne_sle_istrue, label %ne_sle_isfalse
+
+ne_sle_istrue:
+ call void @is(i1 true)
+ ret void
+
+ne_sle_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >u B implies A >u B is true.
+; CHECK-LABEL: @test_ugt_ugt
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ugt_ugt(i32 %a, i32 %b) {
+ %cmp1 = icmp ugt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ugt i32 %a, %b
+ br i1 %cmp2, label %ugt_ugt_istrue, label %ugt_ugt_isfalse
+
+ugt_ugt_istrue:
+ call void @is(i1 true)
+ ret void
+
+ugt_ugt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >u B implies A >=u B is true.
+; CHECK-LABEL: @test_ugt_uge
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ugt_uge(i32 %a, i32 %b) {
+ %cmp1 = icmp ugt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp uge i32 %a, %b
+ br i1 %cmp2, label %ugt_uge_istrue, label %ugt_uge_isfalse
+
+ugt_uge_istrue:
+ call void @is(i1 true)
+ ret void
+
+ugt_uge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >u B implies A <u B is false.
+; CHECK-LABEL: @test_ugt_ult
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ugt_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp ugt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %ugt_ult_istrue, label %ugt_ult_isfalse
+
+ugt_ult_istrue:
+ call void @is(i1 true)
+ ret void
+
+ugt_ult_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >u B implies A <=u B is false.
+; CHECK-LABEL: @test_ugt_ule
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_ugt_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp ugt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %ugt_ule_istrue, label %ugt_ule_isfalse
+
+ugt_ule_istrue:
+ call void @is(i1 true)
+ ret void
+
+ugt_ule_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >=u B implies A >=u B is true.
+; CHECK-LABEL: @test_uge_uge
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_uge_uge(i32 %a, i32 %b) {
+ %cmp1 = icmp uge i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp uge i32 %a, %b
+ br i1 %cmp2, label %uge_uge_istrue, label %uge_uge_isfalse
+
+uge_uge_istrue:
+ call void @is(i1 true)
+ ret void
+
+uge_uge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >=u B implies A <u B is false.
+; CHECK-LABEL: @test_uge_ult
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_uge_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp uge i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %uge_ult_istrue, label %uge_ult_isfalse
+
+uge_ult_istrue:
+ call void @is(i1 true)
+ ret void
+
+uge_ult_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >=u B implies A <=u B is unknown to be true or false.
+; CHECK-LABEL: @test_uge_ule
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_uge_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp uge i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %uge_ule_istrue, label %uge_ule_isfalse
+
+uge_ule_istrue:
+ call void @is(i1 true)
+ ret void
+
+uge_ule_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A <u B implies A <u B is true.
+; CHECK-LABEL: @test_ult_ult
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ult_ult(i32 %a, i32 %b) {
+ %cmp1 = icmp ult i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ult i32 %a, %b
+ br i1 %cmp2, label %ult_ult_istrue, label %ult_ult_isfalse
+
+ult_ult_istrue:
+ call void @is(i1 true)
+ ret void
+
+ult_ult_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A <u B implies A <=u B is true.
+; CHECK-LABEL: @test_ult_ule
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ult_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp ult i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %ult_ule_istrue, label %ult_ule_isfalse
+
+ult_ule_istrue:
+ call void @is(i1 true)
+ ret void
+
+ult_ule_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A <=u B implies A <=u B is true.
+; CHECK-LABEL: @test_ule_ule
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_ule_ule(i32 %a, i32 %b) {
+ %cmp1 = icmp ule i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ule i32 %a, %b
+ br i1 %cmp2, label %ule_ule_istrue, label %ule_ule_isfalse
+
+ule_ule_istrue:
+ call void @is(i1 true)
+ ret void
+
+ule_ule_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >s B implies A >s B is true.
+; CHECK-LABEL: @test_sgt_sgt
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_sgt_sgt(i32 %a, i32 %b) {
+ %cmp1 = icmp sgt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sgt i32 %a, %b
+ br i1 %cmp2, label %sgt_sgt_istrue, label %sgt_sgt_isfalse
+
+sgt_sgt_istrue:
+ call void @is(i1 true)
+ ret void
+
+sgt_sgt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >s B implies A >=s B is true.
+; CHECK-LABEL: @test_sgt_sge
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_sgt_sge(i32 %a, i32 %b) {
+ %cmp1 = icmp sgt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sge i32 %a, %b
+ br i1 %cmp2, label %sgt_sge_istrue, label %sgt_sge_isfalse
+
+sgt_sge_istrue:
+ call void @is(i1 true)
+ ret void
+
+sgt_sge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >s B implies A <s B is false.
+; CHECK-LABEL: @test_sgt_slt
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_sgt_slt(i32 %a, i32 %b) {
+ %cmp1 = icmp sgt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp slt i32 %a, %b
+ br i1 %cmp2, label %sgt_slt_istrue, label %sgt_slt_isfalse
+
+sgt_slt_istrue:
+ call void @is(i1 true)
+ ret void
+
+sgt_slt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >s B implies A <=s B is false.
+; CHECK-LABEL: @test_sgt_sle
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_sgt_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp sgt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %sgt_sle_istrue, label %sgt_sle_isfalse
+
+sgt_sle_istrue:
+ call void @is(i1 true)
+ ret void
+
+sgt_sle_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >=s B implies A >=s B is true.
+; CHECK-LABEL: @test_sge_sge
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_sge_sge(i32 %a, i32 %b) {
+ %cmp1 = icmp sge i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sge i32 %a, %b
+ br i1 %cmp2, label %sge_sge_istrue, label %sge_sge_isfalse
+
+sge_sge_istrue:
+ call void @is(i1 true)
+ ret void
+
+sge_sge_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >=s B implies A <s B is false.
+; CHECK-LABEL: @test_sge_slt
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_sge_slt(i32 %a, i32 %b) {
+ %cmp1 = icmp sge i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp slt i32 %a, %b
+ br i1 %cmp2, label %sge_slt_istrue, label %sge_slt_isfalse
+
+sge_slt_istrue:
+ call void @is(i1 true)
+ ret void
+
+sge_slt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >=s B implies A <=s B is unknown to be true or false.
+; CHECK-LABEL: @test_sge_sle
+; CHECK: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_sge_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp sge i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %sge_sle_istrue, label %sge_sle_isfalse
+
+sge_sle_istrue:
+ call void @is(i1 true)
+ ret void
+
+sge_sle_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A <s B implies A <s B is true.
+; CHECK-LABEL: @test_slt_slt
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_slt_slt(i32 %a, i32 %b) {
+ %cmp1 = icmp slt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp slt i32 %a, %b
+ br i1 %cmp2, label %slt_slt_istrue, label %slt_slt_isfalse
+
+slt_slt_istrue:
+ call void @is(i1 true)
+ ret void
+
+slt_slt_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A <s B implies A <=s B is true.
+; CHECK-LABEL: @test_slt_sle
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_slt_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp slt i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %slt_sle_istrue, label %slt_sle_isfalse
+
+slt_sle_istrue:
+ call void @is(i1 true)
+ ret void
+
+slt_sle_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A <=s B implies A <=s B is true.
+; CHECK-LABEL: @test_sle_sle
+; CHECK: call void @is(i1 true)
+; CHECK-NOT: call void @is(i1 false)
+define void @test_sle_sle(i32 %a, i32 %b) {
+ %cmp1 = icmp sle i32 %a, %b
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp sle i32 %a, %b
+ br i1 %cmp2, label %sle_sle_istrue, label %sle_sle_isfalse
+
+sle_sle_istrue:
+ call void @is(i1 true)
+ ret void
+
+sle_sle_isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
+
+; A >=u 5 implies A <u 5 is false.
+; CHECK-LABEL: @test_uge_ult_const
+; CHECK-NOT: call void @is(i1 true)
+; CHECK: call void @is(i1 false)
+define void @test_uge_ult_const(i32 %a, i32 %b) {
+ %cmp1 = icmp uge i32 %a, 5
+ br i1 %cmp1, label %taken, label %untaken
+
+taken:
+ %cmp2 = icmp ult i32 %a, 5
+ br i1 %cmp2, label %istrue, label %isfalse
+
+istrue:
+ call void @is(i1 true)
+ ret void
+
+isfalse:
+ call void @is(i1 false)
+ ret void
+
+untaken:
+ ret void
+}
diff --git a/test/Transforms/SimplifyCFG/merge-cleanuppads.ll b/test/Transforms/SimplifyCFG/merge-cleanuppads.ll
new file mode 100644
index 000000000000..23bbbca72346
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/merge-cleanuppads.ll
@@ -0,0 +1,39 @@
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
+target datalayout = "e-m:w-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-pc-windows-msvc18.0.0"
+
+; Function Attrs: uwtable
+define void @test1() #0 personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ invoke void @may_throw(i32 3)
+ to label %invoke.cont unwind label %ehcleanup
+
+invoke.cont: ; preds = %entry
+ tail call void @may_throw(i32 2) #2
+ tail call void @may_throw(i32 1) #2
+ ret void
+
+ehcleanup: ; preds = %entry
+ %cp = cleanuppad within none []
+ tail call void @may_throw(i32 2) #2 [ "funclet"(token %cp) ]
+ cleanupret from %cp unwind label %ehcleanup2
+
+ehcleanup2:
+ %cp2 = cleanuppad within none []
+ tail call void @may_throw(i32 1) #2 [ "funclet"(token %cp2) ]
+ cleanupret from %cp2 unwind to caller
+}
+
+; CHECK-LABEL: define void @test1(
+; CHECK: %[[cp:.*]] = cleanuppad within none []
+; CHECK: tail call void @may_throw(i32 2) #2 [ "funclet"(token %[[cp]]) ]
+; CHECK: tail call void @may_throw(i32 1) #2 [ "funclet"(token %[[cp]]) ]
+; CHECK: cleanupret from %[[cp]] unwind to caller
+
+declare void @may_throw(i32) #1
+
+declare i32 @__CxxFrameHandler3(...)
+
+attributes #0 = { uwtable "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+fxsr,+mmx,+sse,+sse2" "unsafe-fp-math"="false" "use-soft-float"="false" }
+attributes #2 = { nounwind }
diff --git a/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
index fe498b5334e8..3d69d25f6c6d 100644
--- a/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
+++ b/test/Transforms/SimplifyCFG/merge-cond-stores-2.ll
@@ -27,10 +27,10 @@ target triple = "armv7--linux-gnueabihf"
; CHECK: select
; CHECK-NOT: select
; CHECK: br i1 {{.*}}, label %[[L:.*]], label %[[R:.*]]
-; CHECK: [[L]] ; preds =
+; CHECK: [[L]]: ; preds =
; CHECK-NEXT: store
; CHECK-NEXT: br label %[[R]]
-; CHECK: [[R]] ; preds =
+; CHECK: [[R]]: ; preds =
; CHECK-NEXT: ret i32 0
define i32 @f(i32* %b) {
diff --git a/test/Transforms/SimplifyCFG/preserve-branchweights.ll b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
index ae1794b1c61a..dba5dcf68b0c 100644
--- a/test/Transforms/SimplifyCFG/preserve-branchweights.ll
+++ b/test/Transforms/SimplifyCFG/preserve-branchweights.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -simplifycfg -S -o - < %s | FileCheck %s
declare void @helper(i32)
@@ -21,11 +22,33 @@ Z:
ret void
}
+; Make sure the metadata name string is "branch_weights" before propagating it.
+
+define void @fake_weights(i1 %a, i1 %b) {
+; CHECK-LABEL: @fake_weights(
+entry:
+ br i1 %a, label %Y, label %X, !prof !12
+; CHECK: %or.cond = and i1 %a.not, %c
+; CHECK-NEXT: br i1 %or.cond, label %Z, label %Y, !prof !1
+; CHECK: Y:
+X:
+ %c = or i1 %b, false
+ br i1 %c, label %Z, label %Y, !prof !1
+
+Y:
+ call void @helper(i32 0)
+ ret void
+
+Z:
+ call void @helper(i32 1)
+ ret void
+}
+
define void @test2(i1 %a, i1 %b) {
; CHECK-LABEL: @test2(
entry:
br i1 %a, label %X, label %Y, !prof !1
-; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !2
; CHECK-NOT: !prof
X:
@@ -43,7 +66,7 @@ Z:
define void @test3(i1 %a, i1 %b) {
; CHECK-LABEL: @test3(
-; CHECK-NOT: !prof
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
entry:
br i1 %a, label %X, label %Y, !prof !1
@@ -62,7 +85,7 @@ Z:
define void @test4(i1 %a, i1 %b) {
; CHECK-LABEL: @test4(
-; CHECK-NOT: !prof
+; CHECK: br i1 %or.cond, label %Z, label %Y, !prof !1
entry:
br i1 %a, label %X, label %Y
@@ -91,7 +114,7 @@ entry:
; CHECK: switch i32 %N, label %sw2 [
; CHECK: i32 3, label %sw.bb1
; CHECK: i32 2, label %sw.bb
-; CHECK: ], !prof !2
+; CHECK: ], !prof !3
sw.bb:
call void @helper(i32 0)
@@ -124,7 +147,7 @@ entry:
; CHECK: i32 3, label %sw.bb1
; CHECK: i32 2, label %sw.bb
; CHECK: i32 4, label %sw.bb5
-; CHECK: ], !prof !3
+; CHECK: ], !prof !4
sw.bb:
call void @helper(i32 0)
@@ -159,7 +182,7 @@ define void @test1_swap(i1 %a, i1 %b) {
; CHECK-LABEL: @test1_swap(
entry:
br i1 %a, label %Y, label %X, !prof !0
-; CHECK: br i1 %or.cond, label %Y, label %Z, !prof !4
+; CHECK: br i1 %or.cond, label %Y, label %Z, !prof !5
X:
%c = or i1 %b, false
@@ -179,7 +202,7 @@ define void @test7(i1 %a, i1 %b) {
entry:
%c = or i1 %b, false
br i1 %a, label %Y, label %X, !prof !0
-; CHECK: br i1 %brmerge, label %Y, label %Z, !prof !5
+; CHECK: br i1 %brmerge, label %Y, label %Z, !prof !6
X:
br i1 %c, label %Y, label %Z, !prof !6
@@ -198,7 +221,7 @@ define void @test8(i64 %x, i64 %y) nounwind {
; CHECK-LABEL: @test8(
entry:
%lt = icmp slt i64 %x, %y
-; CHECK: br i1 %lt, label %a, label %b, !prof !6
+; CHECK: br i1 %lt, label %a, label %b, !prof !7
%qux = select i1 %lt, i32 0, i32 2
switch i32 %qux, label %bees [
i32 0, label %a
@@ -231,7 +254,7 @@ entry:
; CHECK: i32 1, label %end
; CHECK: i32 2, label %end
; CHECK: i32 92, label %end
-; CHECK: ], !prof !7
+; CHECK: ], !prof !8
a:
call void @helper(i32 0) nounwind
@@ -269,7 +292,7 @@ lor.end:
; CHECK-LABEL: @test10(
; CHECK: %x.off = add i32 %x, -1
; CHECK: %switch = icmp ult i32 %x.off, 3
-; CHECK: br i1 %switch, label %lor.end, label %lor.rhs, !prof !8
+; CHECK: br i1 %switch, label %lor.end, label %lor.rhs, !prof !9
}
; Remove dead cases from the switch.
@@ -281,7 +304,7 @@ define void @test11(i32 %x) nounwind {
], !prof !8
; CHECK-LABEL: @test11(
; CHECK: %cond = icmp eq i32 %i, 24
-; CHECK: br i1 %cond, label %c, label %a, !prof !9
+; CHECK: br i1 %cond, label %c, label %a, !prof !10
a:
call void @helper(i32 0) nounwind
@@ -344,7 +367,7 @@ c:
@max_regno = common global i32 0, align 4
define void @test14(i32* %old, i32 %final) {
; CHECK-LABEL: @test14
-; CHECK: br i1 %or.cond, label %for.exit, label %for.inc, !prof !10
+; CHECK: br i1 %or.cond, label %for.exit, label %for.inc, !prof !11
for.cond:
br label %for.cond2
for.cond2:
@@ -364,6 +387,107 @@ for.exit:
ret void
}
+; Don't drop the metadata.
+
+define i32 @HoistThenElseCodeToIf(i32 %n) {
+; CHECK-LABEL: @HoistThenElseCodeToIf(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 %n, 0
+; CHECK-NEXT: [[DOT:%.*]] = select i1 [[TOBOOL]], i32 1, i32 234, !prof !12
+; CHECK-NEXT: ret i32 [[DOT]]
+;
+entry:
+ %tobool = icmp eq i32 %n, 0
+ br i1 %tobool, label %if, label %else, !prof !0
+
+if:
+ br label %return
+
+else:
+ br label %return
+
+return:
+ %retval.0 = phi i32 [ 1, %if ], [ 234, %else ]
+ ret i32 %retval.0
+}
+
+; The selects should have freshly calculated branch weights.
+
+define i32 @SimplifyCondBranchToCondBranch(i1 %cmpa, i1 %cmpb) {
+; CHECK-LABEL: @SimplifyCondBranchToCondBranch(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 %cmpa, %cmpb
+; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 %cmpa, i32 0, i32 2, !prof !13
+; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !14
+; CHECK-NEXT: ret i32 [[OUTVAL]]
+;
+block1:
+ br i1 %cmpa, label %block3, label %block2, !prof !13
+
+block2:
+ br i1 %cmpb, label %block3, label %exit, !prof !14
+
+block3:
+ %cowval = phi i32 [ 2, %block2 ], [ 0, %block1 ]
+ br label %exit
+
+exit:
+ %outval = phi i32 [ %cowval, %block3 ], [ 1, %block2 ]
+ ret i32 %outval
+}
+
+; Swap the operands of the compares to verify that the weights update correctly.
+
+define i32 @SimplifyCondBranchToCondBranchSwap(i1 %cmpa, i1 %cmpb) {
+; CHECK-LABEL: @SimplifyCondBranchToCondBranchSwap(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 %cmpa, true
+; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 %cmpb, true
+; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMPA_NOT]], [[CMPB_NOT]]
+; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof !15
+; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !16
+; CHECK-NEXT: ret i32 [[OUTVAL]]
+;
+block1:
+ br i1 %cmpa, label %block2, label %block3, !prof !13
+
+block2:
+ br i1 %cmpb, label %exit, label %block3, !prof !14
+
+block3:
+ %cowval = phi i32 [ 2, %block2 ], [ 0, %block1 ]
+ br label %exit
+
+exit:
+ %outval = phi i32 [ %cowval, %block3 ], [ 1, %block2 ]
+ ret i32 %outval
+}
+
+define i32 @SimplifyCondBranchToCondBranchSwapMissingWeight(i1 %cmpa, i1 %cmpb) {
+; CHECK-LABEL: @SimplifyCondBranchToCondBranchSwapMissingWeight(
+; CHECK-NEXT: block1:
+; CHECK-NEXT: [[CMPA_NOT:%.*]] = xor i1 %cmpa, true
+; CHECK-NEXT: [[CMPB_NOT:%.*]] = xor i1 %cmpb, true
+; CHECK-NEXT: [[BRMERGE:%.*]] = or i1 [[CMPA_NOT]], [[CMPB_NOT]]
+; CHECK-NEXT: [[DOTMUX:%.*]] = select i1 [[CMPA_NOT]], i32 0, i32 2, !prof !17
+; CHECK-NEXT: [[OUTVAL:%.*]] = select i1 [[BRMERGE]], i32 [[DOTMUX]], i32 1, !prof !18
+; CHECK-NEXT: ret i32 [[OUTVAL]]
+;
+block1:
+ br i1 %cmpa, label %block2, label %block3, !prof !13
+
+block2:
+ br i1 %cmpb, label %exit, label %block3
+
+block3:
+ %cowval = phi i32 [ 2, %block2 ], [ 0, %block1 ]
+ br label %exit
+
+exit:
+ %outval = phi i32 [ %cowval, %block3 ], [ 1, %block2 ]
+ ret i32 %outval
+}
+
!0 = !{!"branch_weights", i32 3, i32 5}
!1 = !{!"branch_weights", i32 1, i32 1}
!2 = !{!"branch_weights", i32 1, i32 2}
@@ -376,17 +500,28 @@ for.exit:
!9 = !{!"branch_weights", i32 7, i32 6}
!10 = !{!"branch_weights", i32 672646, i32 21604207}
!11 = !{!"branch_weights", i32 6960, i32 21597248}
+!12 = !{!"these_are_not_the_branch_weights_you_are_looking_for", i32 3, i32 5}
+!13 = !{!"branch_weights", i32 2, i32 3}
+!14 = !{!"branch_weights", i32 4, i32 7}
; CHECK: !0 = !{!"branch_weights", i32 5, i32 11}
-; CHECK: !1 = !{!"branch_weights", i32 1, i32 5}
-; CHECK: !2 = !{!"branch_weights", i32 7, i32 1, i32 2}
-; CHECK: !3 = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35}
-; CHECK: !4 = !{!"branch_weights", i32 11, i32 5}
-; CHECK: !5 = !{!"branch_weights", i32 17, i32 15}
-; CHECK: !6 = !{!"branch_weights", i32 9, i32 7}
-; CHECK: !7 = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
-; CHECK: !8 = !{!"branch_weights", i32 24, i32 33}
-; CHECK: !9 = !{!"branch_weights", i32 8, i32 33}
+; CHECK: !1 = !{!"branch_weights", i32 1, i32 3}
+; CHECK: !2 = !{!"branch_weights", i32 1, i32 5}
+; CHECK: !3 = !{!"branch_weights", i32 7, i32 1, i32 2}
+; CHECK: !4 = !{!"branch_weights", i32 49, i32 12, i32 24, i32 35}
+; CHECK: !5 = !{!"branch_weights", i32 11, i32 5}
+; CHECK: !6 = !{!"branch_weights", i32 17, i32 15}
+; CHECK: !7 = !{!"branch_weights", i32 9, i32 7}
+; CHECK: !8 = !{!"branch_weights", i32 17, i32 9, i32 8, i32 7, i32 17}
+; CHECK: !9 = !{!"branch_weights", i32 24, i32 33}
+; CHECK: !10 = !{!"branch_weights", i32 8, i32 33}
;; The false weight prints out as a negative integer here, but inside llvm, we
;; treat the weight as an unsigned integer.
-; CHECK: !10 = !{!"branch_weights", i32 112017436, i32 -735157296}
+; CHECK: !11 = !{!"branch_weights", i32 112017436, i32 -735157296}
+; CHECK: !12 = !{!"branch_weights", i32 3, i32 5}
+; CHECK: !13 = !{!"branch_weights", i32 22, i32 12}
+; CHECK: !14 = !{!"branch_weights", i32 34, i32 21}
+; CHECK: !15 = !{!"branch_weights", i32 33, i32 14}
+; CHECK: !16 = !{!"branch_weights", i32 47, i32 8}
+; CHECK: !17 = !{!"branch_weights", i32 6, i32 2}
+; CHECK: !18 = !{!"branch_weights", i32 8, i32 2}
diff --git a/test/Transforms/SimplifyCFG/speculate-store.ll b/test/Transforms/SimplifyCFG/speculate-store.ll
index c1ac7bcea249..497e024e2489 100644
--- a/test/Transforms/SimplifyCFG/speculate-store.ll
+++ b/test/Transforms/SimplifyCFG/speculate-store.ll
@@ -1,108 +1,90 @@
; RUN: opt -simplifycfg -S < %s | FileCheck %s
-define void @ifconvertstore(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+define void @ifconvertstore(i32* %A, i32 %B, i32 %C, i32 %D) {
+; CHECK-LABEL: @ifconvertstore(
+; CHECK: store i32 %B, i32* %A
+; CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 %D, 42
+; CHECK-NEXT: [[C_B:%.*]] = select i1 [[CMP]], i32 %C, i32 %B, !prof !0
+; CHECK-NEXT: store i32 [[C_B]], i32* %A
+; CHECK-NEXT: ret void
+;
entry:
- %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
- %0 = load i32, i32* %arrayidx, align 4
- %add = add nsw i32 %0, %C
- %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
-
; First store to the location.
- store i32 %add, i32* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
- %1 = load i32, i32* %arrayidx4, align 4
- %add5 = add nsw i32 %1, %D
- %cmp6 = icmp sgt i32 %add5, %C
- br i1 %cmp6, label %if.then, label %ret.end
+ store i32 %B, i32* %A
+ %cmp = icmp sgt i32 %D, 42
+ br i1 %cmp, label %if.then, label %ret.end, !prof !0
; Make sure we speculate stores like the following one. It is cheap compared to
; a mispredicated branch.
-; CHECK-LABEL: @ifconvertstore(
-; CHECK: %add5.add = select i1 %cmp6, i32 %add5, i32 %add
-; CHECK: store i32 %add5.add, i32* %arrayidx2, align 4
if.then:
- store i32 %add5, i32* %arrayidx2, align 4
+ store i32 %C, i32* %A
br label %ret.end
ret.end:
ret void
}
-define void @noifconvertstore1(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
-entry:
- %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
- %0 = load i32, i32* %arrayidx, align 4
- %add = add nsw i32 %0, %C
- %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
-
; Store to a different location.
- store i32 %add, i32* %arrayidx, align 4
- %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
- %1 = load i32, i32* %arrayidx4, align 4
- %add5 = add nsw i32 %1, %D
- %cmp6 = icmp sgt i32 %add5, %C
- br i1 %cmp6, label %if.then, label %ret.end
+define void @noifconvertstore1(i32* %A1, i32* %A2, i32 %B, i32 %C, i32 %D) {
; CHECK-LABEL: @noifconvertstore1(
; CHECK-NOT: select
+;
+entry:
+ store i32 %B, i32* %A1
+ %cmp = icmp sgt i32 %D, 42
+ br i1 %cmp, label %if.then, label %ret.end
+
if.then:
- store i32 %add5, i32* %arrayidx2, align 4
+ store i32 %C, i32* %A2
br label %ret.end
ret.end:
ret void
}
+; This function could store to our address, so we can't repeat the first store a second time.
declare void @unknown_fun()
-define void @noifconvertstore2(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
+define void @noifconvertstore2(i32* %A, i32 %B, i32 %C, i32 %D) {
+; CHECK-LABEL: @noifconvertstore2(
+; CHECK-NOT: select
+;
entry:
- %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
- %0 = load i32, i32* %arrayidx, align 4
- %add = add nsw i32 %0, %C
- %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
-
; First store to the location.
- store i32 %add, i32* %arrayidx2, align 4
+ store i32 %B, i32* %A
call void @unknown_fun()
- %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
- %1 = load i32, i32* %arrayidx4, align 4
- %add5 = add nsw i32 %1, %D
- %cmp6 = icmp sgt i32 %add5, %C
+ %cmp6 = icmp sgt i32 %D, 42
br i1 %cmp6, label %if.then, label %ret.end
-; CHECK-LABEL: @noifconvertstore2(
-; CHECK-NOT: select
if.then:
- store i32 %add5, i32* %arrayidx2, align 4
+ store i32 %C, i32* %A
br label %ret.end
ret.end:
ret void
}
-define void @noifconvertstore_volatile(i32 %m, i32* %A, i32* %B, i32 %C, i32 %D) {
-entry:
- %arrayidx = getelementptr inbounds i32, i32* %B, i64 0
- %0 = load i32, i32* %arrayidx, align 4
- %add = add nsw i32 %0, %C
- %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 0
+; Make sure we don't speculate volatile stores.
+define void @noifconvertstore_volatile(i32* %A, i32 %B, i32 %C, i32 %D) {
+; CHECK-LABEL: @noifconvertstore_volatile(
+; CHECK-NOT: select
+;
+entry:
; First store to the location.
- store i32 %add, i32* %arrayidx2, align 4
- %arrayidx4 = getelementptr inbounds i32, i32* %B, i64 1
- %1 = load i32, i32* %arrayidx4, align 4
- %add5 = add nsw i32 %1, %D
- %cmp6 = icmp sgt i32 %add5, %C
+ store i32 %B, i32* %A
+ %cmp6 = icmp sgt i32 %D, 42
br i1 %cmp6, label %if.then, label %ret.end
-; Make sure we don't speculate volatile stores.
-; CHECK-LABEL: @noifconvertstore_volatile(
-; CHECK-NOT: select
if.then:
- store volatile i32 %add5, i32* %arrayidx2, align 4
+ store volatile i32 %C, i32* %A
br label %ret.end
ret.end:
ret void
}
+
+; CHECK: !0 = !{!"branch_weights", i32 3, i32 5}
+!0 = !{!"branch_weights", i32 3, i32 5}
+
diff --git a/test/Transforms/SimplifyCFG/switch-masked-bits.ll b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
index 692973c362bf..2d46aac23f61 100644
--- a/test/Transforms/SimplifyCFG/switch-masked-bits.ll
+++ b/test/Transforms/SimplifyCFG/switch-masked-bits.ll
@@ -1,10 +1,18 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
; RUN: opt -S -simplifycfg < %s | FileCheck %s
define i32 @test1(i32 %x) nounwind {
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: a:
+; CHECK-NEXT: [[I:%.*]] = shl i32 %x, 1
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 [[I]], 24
+; CHECK-NEXT: [[DOT:%.*]] = select i1 [[COND]], i32 5, i32 0
+; CHECK-NEXT: ret i32 [[DOT]]
+;
%i = shl i32 %x, 1
switch i32 %i, label %a [
- i32 21, label %b
- i32 24, label %c
+ i32 21, label %b
+ i32 24, label %c
]
a:
@@ -13,18 +21,18 @@ b:
ret i32 3
c:
ret i32 5
-; CHECK-LABEL: @test1(
-; CHECK: %cond = icmp eq i32 %i, 24
-; CHECK: %. = select i1 %cond, i32 5, i32 0
-; CHECK: ret i32 %.
}
define i32 @test2(i32 %x) nounwind {
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: a:
+; CHECK-NEXT: ret i32 0
+;
%i = shl i32 %x, 1
switch i32 %i, label %a [
- i32 21, label %b
- i32 23, label %c
+ i32 21, label %b
+ i32 23, label %c
]
a:
@@ -33,6 +41,37 @@ b:
ret i32 3
c:
ret i32 5
-; CHECK-LABEL: @test2(
-; CHECK: ret i32 0
}
+
+; We're sign extending an 8-bit value.
+; The switch condition must be in the range [-128, 127], so any cases outside of that range must be dead.
+
+define i1 @repeated_signbits(i8 %condition) {
+; CHECK-LABEL: @repeated_signbits(
+; CHECK: switch i32
+; CHECK-DAG: i32 -128, label %a
+; CHECK-DAG: i32 -1, label %a
+; CHECK-DAG: i32 0, label %a
+; CHECK-DAG: i32 127, label %a
+; CHECK-NEXT: ]
+;
+entry:
+ %sext = sext i8 %condition to i32
+ switch i32 %sext, label %default [
+ i32 -2147483648, label %a
+ i32 -129, label %a
+ i32 -128, label %a
+ i32 -1, label %a
+ i32 0, label %a
+ i32 127, label %a
+ i32 128, label %a
+ i32 2147483647, label %a
+ ]
+
+a:
+ ret i1 1
+
+default:
+ ret i1 0
+}
+
diff --git a/test/Transforms/SimplifyCFG/switch_create.ll b/test/Transforms/SimplifyCFG/switch_create.ll
index 490b7513a944..29d3a34a05e6 100644
--- a/test/Transforms/SimplifyCFG/switch_create.ll
+++ b/test/Transforms/SimplifyCFG/switch_create.ll
@@ -1,4 +1,4 @@
-; RUN: opt -S -simplifycfg < %s | FileCheck -check-prefix=CHECK %s
+; RUN: opt -S -simplifycfg < %s | FileCheck %s
; RUN: opt -S -default-data-layout="p:32:32-p1:16:16" -simplifycfg < %s | FileCheck -check-prefix=CHECK -check-prefix=DL %s
declare void @foo1()
@@ -554,3 +554,107 @@ bb20: ; preds = %bb19, %bb8
; CHECK: %arg.off = add i32 %arg, -8
; CHECK: icmp ult i32 %arg.off, 11
}
+
+define void @PR26323(i1 %tobool23, i32 %tmp3) {
+entry:
+ %tobool5 = icmp ne i32 %tmp3, 0
+ %neg14 = and i32 %tmp3, -2
+ %cmp17 = icmp ne i32 %neg14, -1
+ %or.cond = and i1 %tobool5, %tobool23
+ %or.cond1 = and i1 %cmp17, %or.cond
+ br i1 %or.cond1, label %if.end29, label %if.then27
+
+if.then27: ; preds = %entry
+ call void @foo1()
+ unreachable
+
+if.end29: ; preds = %entry
+ ret void
+}
+
+; CHECK-LABEL: define void @PR26323(
+; CHECK: %tobool5 = icmp ne i32 %tmp3, 0
+; CHECK: %neg14 = and i32 %tmp3, -2
+; CHECK: %cmp17 = icmp ne i32 %neg14, -1
+; CHECK: %or.cond = and i1 %tobool5, %tobool23
+; CHECK: %or.cond1 = and i1 %cmp17, %or.cond
+; CHECK: br i1 %or.cond1, label %if.end29, label %if.then27
+
+; Form a switch when and'ing a negated power of two
+; CHECK-LABEL: define void @test19
+; CHECK: switch i32 %arg, label %else [
+; CHECK: i32 32, label %if
+; CHECK: i32 13, label %if
+; CHECK: i32 12, label %if
+define void @test19(i32 %arg) {
+ %and = and i32 %arg, -2
+ %cmp1 = icmp eq i32 %and, 12
+ %cmp2 = icmp eq i32 %arg, 32
+ %pred = or i1 %cmp1, %cmp2
+ br i1 %pred, label %if, label %else
+
+if:
+ call void @foo1()
+ ret void
+
+else:
+ ret void
+}
+
+; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test20
+; CHECK-NOT: switch
+; CHECK: ret void
+define void @test20(i32 %arg) {
+ %and = and i32 %arg, -2
+ %cmp1 = icmp eq i32 %and, 13
+ %cmp2 = icmp eq i32 %arg, 32
+ %pred = or i1 %cmp1, %cmp2
+ br i1 %pred, label %if, label %else
+
+if:
+ call void @foo1()
+ ret void
+
+else:
+ ret void
+}
+
+; Form a switch when or'ing a power of two
+; CHECK-LABEL: define void @test21
+; CHECK: i32 32, label %else
+; CHECK: i32 13, label %else
+; CHECK: i32 12, label %else
+define void @test21(i32 %arg) {
+ %and = or i32 %arg, 1
+ %cmp1 = icmp ne i32 %and, 13
+ %cmp2 = icmp ne i32 %arg, 32
+ %pred = and i1 %cmp1, %cmp2
+ br i1 %pred, label %if, label %else
+
+if:
+ call void @foo1()
+ ret void
+
+else:
+ ret void
+}
+
+; Since %cmp1 is always false, a switch is never formed
+; CHECK-LABEL: define void @test22
+; CHECK-NOT: switch
+; CHECK: ret void
+define void @test22(i32 %arg) {
+ %and = or i32 %arg, 1
+ %cmp1 = icmp ne i32 %and, 12
+ %cmp2 = icmp ne i32 %arg, 32
+ %pred = and i1 %cmp1, %cmp2
+ br i1 %pred, label %if, label %else
+
+if:
+ call void @foo1()
+ ret void
+
+else:
+ ret void
+} \ No newline at end of file
diff --git a/test/Transforms/SimplifyCFG/switch_switch_fold.ll b/test/Transforms/SimplifyCFG/switch_switch_fold.ll
index 2e2e31014017..7f6f1c94bd4a 100644
--- a/test/Transforms/SimplifyCFG/switch_switch_fold.ll
+++ b/test/Transforms/SimplifyCFG/switch_switch_fold.ll
@@ -1,8 +1,7 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN: grep switch | count 1
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
-; Test that a switch going to a switch on the same value can be merged. All
-; three switches in this example can be merged into one big one.
+; Test that a switch going to a switch on the same value can be merged.
+; All three switches in this example can be merged into one big one.
declare void @foo1()
@@ -43,5 +42,24 @@ F: ; preds = %F, %T, %0, %0
D: ; preds = %F
call void @foo4( )
ret void
+
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: switch i32 %V, label %infloop [
+; CHECK-NEXT: i32 4, label %A
+; CHECK-NEXT: i32 17, label %B
+; CHECK-NEXT: i32 18, label %B
+; CHECK-NEXT: i32 42, label %D
+; CHECK-NEXT: ]
+; CHECK: A:
+; CHECK-NEXT: call void @foo1()
+; CHECK-NEXT: ret void
+; CHECK: B:
+; CHECK-NEXT: call void @foo2()
+; CHECK-NEXT: ret void
+; CHECK: D:
+; CHECK-NEXT: call void @foo4()
+; CHECK-NEXT: ret void
+; CHECK: infloop:
+; CHECK-NEXT: br label %infloop
}
diff --git a/test/Transforms/SimplifyCFG/switch_thread.ll b/test/Transforms/SimplifyCFG/switch_thread.ll
index 93966841a425..32e0325df7f2 100644
--- a/test/Transforms/SimplifyCFG/switch_thread.ll
+++ b/test/Transforms/SimplifyCFG/switch_thread.ll
@@ -1,5 +1,4 @@
-; RUN: opt < %s -simplifycfg -S | \
-; RUN: not grep "call void @DEAD"
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
; Test that we can thread a simple known condition through switch statements.
@@ -45,6 +44,21 @@ B: ; preds = %T
C: ; preds = %B, %A, %A2, %T, %T
call void @DEAD( )
ret void
+
+; CHECK-LABEL: @test1(
+; CHECK-NEXT: switch i32 %V, label %A [
+; CHECK-NEXT: i32 4, label %T
+; CHECK-NEXT: i32 17, label %Done
+; CHECK-NEXT: ]
+; CHECK: T:
+; CHECK-NEXT: call void @foo1()
+; CHECK-NEXT: call void @foo2()
+; CHECK-NEXT: br label %Done
+; CHECK: A:
+; CHECK-NEXT: call void @foo1()
+; CHECK-NEXT: br label %Done
+; CHECK: Done:
+; CHECK-NEXT: ret void
}
define void @test2(i32 %V) {
@@ -75,5 +89,25 @@ D: ; preds = %A, %0
ret void
E: ; preds = %A, %0
ret void
+
+; CHECK-LABEL: @test2(
+; CHECK-NEXT: switch i32 %V, label %A [
+; CHECK-NEXT: i32 4, label %T
+; CHECK-NEXT: i32 17, label %D
+; CHECK-NEXT: i32 1234, label %E
+; CHECK-NEXT: ]
+; CHECK: A:
+; CHECK-NEXT: call void @foo1()
+; CHECK-NEXT: [[COND:%.*]] = icmp eq i32 %V, 42
+; CHECK-NEXT: br i1 [[COND]], label %D, label %E
+; CHECK: T:
+; CHECK-NEXT: call void @foo1()
+; CHECK-NEXT: call void @foo1()
+; CHECK-NEXT: ret void
+; CHECK: D:
+; CHECK-NEXT: call void @foo1()
+; CHECK-NEXT: ret void
+; CHECK: E:
+; CHECK-NEXT: ret void
}
diff --git a/test/Transforms/SimplifyCFG/trap-debugloc.ll b/test/Transforms/SimplifyCFG/trap-debugloc.ll
index 2887aaf52eee..a912dc561a4f 100644
--- a/test/Transforms/SimplifyCFG/trap-debugloc.ll
+++ b/test/Transforms/SimplifyCFG/trap-debugloc.ll
@@ -9,16 +9,14 @@ define void @foo() nounwind ssp !dbg !0 {
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!10}
-!llvm.dbg.sp = !{!0}
-!0 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !8, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !2, file: !8, scope: !1, type: !3)
!1 = !DIFile(filename: "foo.c", directory: "/private/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: 0, file: !8, enums: !{}, retainedTypes: !{}, subprograms: !9)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "Apple clang version 3.0 (tags/Apple/clang-206.1) (based on LLVM 3.0svn)", isOptimized: true, emissionKind: FullDebug, file: !8, enums: !{}, retainedTypes: !{})
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocation(line: 4, column: 2, scope: !6)
!6 = distinct !DILexicalBlock(line: 3, column: 12, file: !8, scope: !0)
!7 = !DILocation(line: 5, column: 1, scope: !6)
!8 = !DIFile(filename: "foo.c", directory: "/private/tmp")
-!9 = !{!0}
!10 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/SimplifyCFG/two-entry-phi-return.ll b/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
index fb18624c71f7..1e9aa6b48f6b 100644
--- a/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
+++ b/test/Transforms/SimplifyCFG/two-entry-phi-return.ll
@@ -1,15 +1,26 @@
-; RUN: opt < %s -simplifycfg -S | not grep br
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
+; RUN: opt < %s -simplifycfg -S | FileCheck %s
define i1 @qux(i8* %m, i8* %n, i8* %o, i8* %p) nounwind {
+; CHECK-LABEL: @qux(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i8* %m, %n
+; CHECK-NEXT: [[TMP15:%.*]] = icmp eq i8* %o, %p
+; CHECK-NEXT: [[TMP15_:%.*]] = select i1 [[TMP7]], i1 [[TMP15]], i1 false, !prof !0
+; CHECK-NEXT: ret i1 [[TMP15_]]
+;
entry:
- %tmp7 = icmp eq i8* %m, %n
- br i1 %tmp7, label %bb, label %UnifiedReturnBlock
+ %tmp7 = icmp eq i8* %m, %n
+ br i1 %tmp7, label %bb, label %UnifiedReturnBlock, !prof !0
bb:
- %tmp15 = icmp eq i8* %o, %p
- br label %UnifiedReturnBlock
+ %tmp15 = icmp eq i8* %o, %p
+ br label %UnifiedReturnBlock
UnifiedReturnBlock:
- %result = phi i1 [ 0, %entry ], [ %tmp15, %bb ]
- ret i1 %result
+ %result = phi i1 [ 0, %entry ], [ %tmp15, %bb ]
+ ret i1 %result
+
}
+
+!0 = !{!"branch_weights", i32 4, i32 64}
diff --git a/test/Transforms/SimplifyCFG/unreachable-cleanuppad.ll b/test/Transforms/SimplifyCFG/unreachable-cleanuppad.ll
new file mode 100644
index 000000000000..9198b2a6ea4a
--- /dev/null
+++ b/test/Transforms/SimplifyCFG/unreachable-cleanuppad.ll
@@ -0,0 +1,40 @@
+; RUN: opt -simplifycfg -S < %s | FileCheck %s
+target datalayout = "e-m:x-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
+target triple = "i686-pc-win32"
+
+declare i32 @__CxxFrameHandler3(...)
+
+declare void @fn_2()
+
+define void @fn_1(i1 %B) personality i32 (...)* @__CxxFrameHandler3 {
+entry:
+ br i1 %B, label %__Ea.exit, label %lor.lhs.false.i.i
+
+lor.lhs.false.i.i:
+ br i1 %B, label %if.end.i.i, label %__Ea.exit
+
+if.end.i.i:
+ invoke void @fn_2()
+ to label %__Ea.exit unwind label %ehcleanup.i
+
+ehcleanup.i:
+ %t4 = cleanuppad within none []
+ br label %arraydestroy.body.i
+
+arraydestroy.body.i:
+ %gep = getelementptr i8, i8* null, i32 -1
+ br label %dtor.exit.i
+
+dtor.exit.i:
+ br i1 %B, label %arraydestroy.done3.i, label %arraydestroy.body.i
+
+arraydestroy.done3.i:
+ cleanupret from %t4 unwind to caller
+
+__Ea.exit:
+ ret void
+}
+
+; CHECK-LABEL: define void @fn_1(
+; CHECK-NEXT: entry:
+; CHECK-NEXT: ret void
diff --git a/test/Transforms/Sink/basic.ll b/test/Transforms/Sink/basic.ll
index 1bbf161921d1..8ff41262014d 100644
--- a/test/Transforms/Sink/basic.ll
+++ b/test/Transforms/Sink/basic.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -basicaa -sink -S | FileCheck %s
+; RUN: opt < %s -aa-pipeline='basic-aa' -passes='sink' -S | FileCheck %s
@A = external global i32
@B = external global i32
diff --git a/test/Transforms/Sink/call.ll b/test/Transforms/Sink/call.ll
new file mode 100644
index 000000000000..5aaad4499f59
--- /dev/null
+++ b/test/Transforms/Sink/call.ll
@@ -0,0 +1,112 @@
+; RUN: opt < %s -basicaa -sink -S | FileCheck %s
+
+declare i32 @f_load_global() nounwind readonly
+declare i32 @f_load_arg(i32*) nounwind readonly argmemonly
+declare void @f_store_global(i32) nounwind
+declare void @f_store_arg(i32*) nounwind argmemonly
+declare void @f_readonly_arg(i32* readonly, i32*) nounwind argmemonly
+declare i32 @f_readnone(i32) nounwind readnone
+
+@A = external global i32
+@B = external global i32
+
+; Sink readonly call if no stores are in the way.
+;
+; CHECK-LABEL: @test_sink_no_stores(
+; CHECK: true:
+; CHECK-NEXT: %l = call i32 @f_load_global
+; CHECK-NEXT: ret i32 %l
+define i32 @test_sink_no_stores(i1 %z) {
+ %l = call i32 @f_load_global()
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; CHECK-LABEL: @test_sink_argmem_store(
+; CHECK: true:
+; CHECK-NEXT: %l = call i32 @f_load_arg
+; CHECK-NEXT: ret i32 %l
+define i32 @test_sink_argmem_store(i1 %z) {
+ %l = call i32 @f_load_arg(i32* @A)
+ store i32 0, i32* @B
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; CHECK-LABEL: @test_sink_argmem_call(
+; CHECK: true:
+; CHECK-NEXT: %l = call i32 @f_load_arg
+; CHECK-NEXT: ret i32 %l
+define i32 @test_sink_argmem_call(i1 %z) {
+ %l = call i32 @f_load_arg(i32* @A)
+ call void @f_store_arg(i32* @B)
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; CHECK-LABEL: @test_sink_argmem_multiple(
+; CHECK: true:
+; CHECK-NEXT: %l = call i32 @f_load_arg
+; CHECK-NEXT: ret i32 %l
+define i32 @test_sink_argmem_multiple(i1 %z) {
+ %l = call i32 @f_load_arg(i32* @A)
+ call void @f_readonly_arg(i32* @A, i32* @B)
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; But don't sink if there is a store.
+;
+; CHECK-LABEL: @test_nosink_store(
+; CHECK: call i32 @f_load_global
+; CHECK-NEXT: store i32
+define i32 @test_nosink_store(i1 %z) {
+ %l = call i32 @f_load_global()
+ store i32 0, i32* @A
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; CHECK-LABEL: @test_nosink_call(
+; CHECK: call i32 @f_load_global
+; CHECK-NEXT: call void @f_store_global
+define i32 @test_nosink_call(i1 %z) {
+ %l = call i32 @f_load_global()
+ call void @f_store_global(i32 0)
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
+
+; readnone calls are sunk across stores.
+;
+; CHECK-LABEL: @test_sink_readnone(
+; CHECK: true:
+; CHECK-NEXT: %l = call i32 @f_readnone(
+; CHECK-NEXT: ret i32 %l
+define i32 @test_sink_readnone(i1 %z) {
+ %l = call i32 @f_readnone(i32 0)
+ store i32 0, i32* @A
+ br i1 %z, label %true, label %false
+true:
+ ret i32 %l
+false:
+ ret i32 0
+}
diff --git a/test/Transforms/Sink/convergent.ll b/test/Transforms/Sink/convergent.ll
index 49207dbc9927..b209e6705468 100644
--- a/test/Transforms/Sink/convergent.ll
+++ b/test/Transforms/Sink/convergent.ll
@@ -21,4 +21,3 @@ end:
}
declare i32 @bar() readonly convergent
-
diff --git a/test/Transforms/StraightLineStrengthReduce/slsr-add.ll b/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
index e25ddc2888a3..b4f448ace2ae 100644
--- a/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
+++ b/test/Transforms/StraightLineStrengthReduce/slsr-add.ll
@@ -98,4 +98,19 @@ define void @simple_enough(i32 %b, i32 %s) {
ret void
}
+define void @slsr_strided_add_128bit(i128 %b, i128 %s) {
+; CHECK-LABEL: @slsr_strided_add_128bit(
+ %s125 = shl i128 %s, 125
+ %s126 = shl i128 %s, 126
+ %1 = add i128 %b, %s125
+; CHECK: [[t1:%[a-zA-Z0-9]+]] = add i128 %b, %s125
+ call void @bar(i128 %1)
+ %2 = add i128 %b, %s126
+; CHECK: [[t2:%[a-zA-Z0-9]+]] = add i128 [[t1]], %s125
+ call void @bar(i128 %2)
+; CHECK: call void @bar(i128 [[t2]])
+ ret void
+}
+
declare void @foo(i32)
+declare void @bar(i128)
diff --git a/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll b/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
index bd92780a036c..b9bb4faf1b41 100644
--- a/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
+++ b/test/Transforms/StraightLineStrengthReduce/slsr-gep.ll
@@ -1,6 +1,6 @@
; RUN: opt < %s -slsr -gvn -S | FileCheck %s
-target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64"
+target datalayout = "e-i64:64-v16:16-v32:32-n16:32:64-p:64:64:64-p1:32:32:32"
; foo(input[0]);
; foo(input[s]);
@@ -16,21 +16,18 @@ define void @slsr_gep(i32* %input, i64 %s) {
; CHECK-LABEL: @slsr_gep(
; v0 = input[0];
%p0 = getelementptr inbounds i32, i32* %input, i64 0
- %v0 = load i32, i32* %p0
- call void @foo(i32 %v0)
+ call void @foo(i32* %p0)
; v1 = input[s];
%p1 = getelementptr inbounds i32, i32* %input, i64 %s
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %s
- %v1 = load i32, i32* %p1
- call void @foo(i32 %v1)
+ call void @foo(i32* %p1)
; v2 = input[s * 2];
%s2 = shl nsw i64 %s, 1
%p2 = getelementptr inbounds i32, i32* %input, i64 %s2
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %s
- %v2 = load i32, i32* %p2
- call void @foo(i32 %v2)
+ call void @foo(i32* %p2)
ret void
}
@@ -49,23 +46,20 @@ define void @slsr_gep_sext(i32* %input, i32 %s) {
; CHECK-LABEL: @slsr_gep_sext(
; v0 = input[0];
%p0 = getelementptr inbounds i32, i32* %input, i64 0
- %v0 = load i32, i32* %p0
- call void @foo(i32 %v0)
+ call void @foo(i32* %p0)
; v1 = input[s];
%t = sext i32 %s to i64
%p1 = getelementptr inbounds i32, i32* %input, i64 %t
; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i64 %t
- %v1 = load i32, i32* %p1
- call void @foo(i32 %v1)
+ call void @foo(i32* %p1)
; v2 = input[s * 2];
%s2 = shl nsw i32 %s, 1
%t2 = sext i32 %s2 to i64
%p2 = getelementptr inbounds i32, i32* %input, i64 %t2
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 %t
- %v2 = load i32, i32* %p2
- call void @foo(i32 %v2)
+ call void @foo(i32* %p2)
ret void
}
@@ -85,23 +79,20 @@ define void @slsr_gep_2d([10 x [5 x i32]]* %input, i64 %s, i64 %t) {
; CHECK-LABEL: @slsr_gep_2d(
; v0 = input[s][t];
%p0 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s, i64 %t
- %v0 = load i32, i32* %p0
- call void @foo(i32 %v0)
+ call void @foo(i32* %p0)
; v1 = input[s * 2][t];
%s2 = shl nsw i64 %s, 1
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 5
%p1 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s2, i64 %t
; CHECK: %p1 = getelementptr inbounds i32, i32* %p0, i64 [[BUMP]]
- %v1 = load i32, i32* %p1
- call void @foo(i32 %v1)
+ call void @foo(i32* %p1)
; v3 = input[s * 3][t];
%s3 = mul nsw i64 %s, 3
%p2 = getelementptr inbounds [10 x [5 x i32]], [10 x [5 x i32]]* %input, i64 0, i64 %s3, i64 %t
; CHECK: %p2 = getelementptr inbounds i32, i32* %p1, i64 [[BUMP]]
- %v2 = load i32, i32* %p2
- call void @foo(i32 %v2)
+ call void @foo(i32* %p2)
ret void
}
@@ -118,23 +109,20 @@ define void @slsr_gep_uglygep([10 x [5 x %struct.S]]* %input, i64 %s, i64 %t) {
; CHECK-LABEL: @slsr_gep_uglygep(
; v0 = input[s][t].f1;
%p0 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s, i64 %t, i32 0
- %v0 = load i64, i64* %p0
- call void @bar(i64 %v0)
+ call void @bar(i64* %p0)
; v1 = input[s * 2][t].f1;
%s2 = shl nsw i64 %s, 1
; CHECK: [[BUMP:%[a-zA-Z0-9]+]] = mul i64 %s, 60
%p1 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s2, i64 %t, i32 0
; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
- %v1 = load i64, i64* %p1
- call void @bar(i64 %v1)
+ call void @bar(i64* %p1)
; v2 = input[s * 3][t].f1;
%s3 = mul nsw i64 %s, 3
%p2 = getelementptr inbounds [10 x [5 x %struct.S]], [10 x [5 x %struct.S]]* %input, i64 0, i64 %s3, i64 %t, i32 0
; CHECK: getelementptr inbounds i8, i8* %{{[0-9]+}}, i64 [[BUMP]]
- %v2 = load i64, i64* %p2
- call void @bar(i64 %v2)
+ call void @bar(i64* %p2)
ret void
}
@@ -143,26 +131,61 @@ define void @slsr_out_of_bounds_gep(i32* %input, i32 %s) {
; CHECK-LABEL: @slsr_out_of_bounds_gep(
; v0 = input[0];
%p0 = getelementptr i32, i32* %input, i64 0
- %v0 = load i32, i32* %p0
- call void @foo(i32 %v0)
+ call void @foo(i32* %p0)
; v1 = input[(long)s];
%t = sext i32 %s to i64
%p1 = getelementptr i32, i32* %input, i64 %t
; CHECK: %p1 = getelementptr i32, i32* %input, i64 %t
- %v1 = load i32, i32* %p1
- call void @foo(i32 %v1)
+ call void @foo(i32* %p1)
; v2 = input[(long)(s * 2)];
%s2 = shl nsw i32 %s, 1
%t2 = sext i32 %s2 to i64
%p2 = getelementptr i32, i32* %input, i64 %t2
; CHECK: %p2 = getelementptr i32, i32* %p1, i64 %t
- %v2 = load i32, i32* %p2
- call void @foo(i32 %v2)
+ call void @foo(i32* %p2)
ret void
}
-declare void @foo(i32)
-declare void @bar(i64)
+define void @slsr_gep_128bit_index(i32* %input, i128 %s) {
+; CHECK-LABEL: @slsr_gep_128bit_index(
+ ; p0 = &input[0]
+ %p0 = getelementptr inbounds i32, i32* %input, i128 0
+ call void @foo(i32* %p0)
+
+ ; p1 = &input[s << 125]
+ %s125 = shl nsw i128 %s, 125
+ %p1 = getelementptr inbounds i32, i32* %input, i128 %s125
+; CHECK: %p1 = getelementptr inbounds i32, i32* %input, i128 %s125
+ call void @foo(i32* %p1)
+
+ ; p2 = &input[s << 126]
+ %s126 = shl nsw i128 %s, 126
+ %p2 = getelementptr inbounds i32, i32* %input, i128 %s126
+; CHECK: %p2 = getelementptr inbounds i32, i32* %input, i128 %s126
+ call void @foo(i32* %p2)
+
+ ret void
+}
+
+define void @slsr_gep_32bit_pointer(i32 addrspace(1)* %input, i64 %s) {
+; CHECK-LABEL: @slsr_gep_32bit_pointer(
+ ; p1 = &input[s]
+ %p1 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s
+ call void @baz(i32 addrspace(1)* %p1)
+
+ ; p2 = &input[s * 2]
+ %s2 = mul nsw i64 %s, 2
+ %p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2
+ ; %s2 is wider than the pointer size of addrspace(1), so do not factor it.
+; CHECK: %p2 = getelementptr inbounds i32, i32 addrspace(1)* %input, i64 %s2
+ call void @baz(i32 addrspace(1)* %p2)
+
+ ret void
+}
+
+declare void @foo(i32*)
+declare void @bar(i64*)
+declare void @baz(i32 addrspace(1)*)
diff --git a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
index 32d7e77b20df..cb6c26e9aa49 100644
--- a/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
+++ b/test/Transforms/StripSymbols/2010-06-30-StripDebug.ll
@@ -1,6 +1,6 @@
; RUN: opt -strip-debug < %s -S | FileCheck %s
-; CHECK-NOT: llvm.dbg
+; CHECK-NOT: call void @llvm.dbg.value
@x = common global i32 0 ; <i32*> [#uses=0]
@@ -12,15 +12,12 @@ entry:
declare void @llvm.dbg.value(metadata, i64, metadata, metadata) nounwind readnone
-!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!13}
-!llvm.dbg.sp = !{!0}
-!llvm.dbg.lv.foo = !{!5}
-!llvm.dbg.gv = !{!8}
+!llvm.dbg.cu = !{!2}
-!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !12, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 2, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, unit: !2, file: !12, scope: !1, type: !3)
!1 = !DIFile(filename: "b.c", directory: "/tmp")
-!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 0, file: !12)
+!2 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: FullDebug, file: !12, globals: !{!8})
!3 = !DISubroutineType(types: !4)
!4 = !{null}
!5 = !DILocalVariable(name: "y", line: 3, scope: !6, file: !1, type: !7)
diff --git a/test/Transforms/StripSymbols/2010-08-25-crash.ll b/test/Transforms/StripSymbols/2010-08-25-crash.ll
index ba8979c9772f..4e454ba2c8bf 100644
--- a/test/Transforms/StripSymbols/2010-08-25-crash.ll
+++ b/test/Transforms/StripSymbols/2010-08-25-crash.ll
@@ -7,9 +7,9 @@ entry:
!llvm.dbg.cu = !{!2}
!llvm.module.flags = !{!14}
-!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, file: !10, scope: !1, type: !3)
+!0 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 3, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: false, unit: !2, file: !10, scope: !1, type: !3)
!1 = !DIFile(filename: "/tmp/a.c", directory: "/Volumes/Lalgate/clean/D.CW")
-!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: 1, file: !10, enums: !11, retainedTypes: !11, subprograms: !12, globals: !13)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, producer: "clang version 2.8 (trunk 112062)", isOptimized: true, emissionKind: FullDebug, file: !10, enums: !11, retainedTypes: !11, globals: !13)
!3 = !DISubroutineType(types: !4)
!4 = !{!5}
!5 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
@@ -19,6 +19,5 @@ entry:
!9 = distinct !DILexicalBlock(line: 3, column: 11, file: !10, scope: !0)
!10 = !DIFile(filename: "/tmp/a.c", directory: "/Volumes/Lalgate/clean/D.CW")
!11 = !{}
-!12 = !{!0}
!13 = !{!6}
!14 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/StripSymbols/strip-dead-debug-info.ll b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
index 39038c955617..91074808df4d 100644
--- a/test/Transforms/StripSymbols/strip-dead-debug-info.ll
+++ b/test/Transforms/StripSymbols/strip-dead-debug-info.ll
@@ -1,8 +1,8 @@
; RUN: opt -strip-dead-debug-info -verify %s -S | FileCheck %s
; CHECK: ModuleID = '{{.*}}'
-; CHECK-NOT: bar
-; CHECK-NOT: abcd
+; CHECK-NOT: "bar"
+; CHECK-NOT: "abcd"
@xyz = global i32 2
@@ -30,17 +30,17 @@ attributes #2 = { nounwind readonly ssp }
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!25}
-!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: 1, file: !1, enums: !{}, retainedTypes: !{}, subprograms: !23, globals: !24)
+!0 = distinct !DICompileUnit(language: DW_LANG_C89, producer: "4.2.1 (Based on Apple Inc. build 5658) (LLVM build)", isOptimized: true, emissionKind: FullDebug, file: !1, enums: !{}, retainedTypes: !{}, globals: !24)
!1 = !DIFile(filename: "g.c", directory: "/tmp/")
!2 = !{null}
-!3 = distinct !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !4)
+!3 = distinct !DISubprogram(name: "bar", line: 5, isLocal: true, isDefinition: true, virtualIndex: 6, isOptimized: true, unit: !0, file: !1, scope: null, type: !4)
!4 = !DISubroutineType(types: !2)
!5 = !DIFile(filename: "g.c", directory: "/tmp/")
-!6 = distinct !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !7)
+!6 = distinct !DISubprogram(name: "fn", linkageName: "fn", line: 6, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, unit: !0, file: !1, scope: null, type: !7)
!7 = !DISubroutineType(types: !8)
!8 = !{!9}
!9 = !DIBasicType(tag: DW_TAG_base_type, name: "int", size: 32, align: 32, encoding: DW_ATE_signed)
-!10 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, file: !1, scope: null, type: !11)
+!10 = distinct !DISubprogram(name: "foo", linkageName: "foo", line: 7, isLocal: false, isDefinition: true, virtualIndex: 6, isOptimized: true, unit: !0, file: !1, scope: null, type: !11)
!11 = !DISubroutineType(types: !12)
!12 = !{!9, !9}
!13 = !DILocalVariable(name: "bb", line: 5, scope: !14, file: !5, type: !9)
@@ -53,6 +53,5 @@ attributes #2 = { nounwind readonly ssp }
!20 = !DILocation(line: 7, scope: !10)
!21 = !DILocation(line: 10, scope: !22)
!22 = distinct !DILexicalBlock(line: 7, column: 0, file: !1, scope: !10)
-!23 = !{!3, !6, !10}
!24 = !{!16, !17}
!25 = !{i32 1, !"Debug Info Version", i32 3}
diff --git a/test/Transforms/StructurizeCFG/invert-constantexpr.ll b/test/Transforms/StructurizeCFG/invert-constantexpr.ll
new file mode 100644
index 000000000000..fc50a5a90a7f
--- /dev/null
+++ b/test/Transforms/StructurizeCFG/invert-constantexpr.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -o - -structurizecfg < %s | FileCheck %s
+
+; CHECK-LABEL: @invert_constantexpr_condition(
+; CHECK: %tmp5 = or i1 %tmp4, icmp eq (i32 bitcast (float fadd (float undef, float undef) to i32), i32 0)
+; CHECK: [ icmp ne (i32 bitcast (float fadd (float undef, float undef) to i32), i32 0), %bb ]
+define void @invert_constantexpr_condition(i32 %arg, i32 %arg1) #0 {
+bb:
+ %tmp = icmp eq i32 %arg, 0
+ br i1 icmp eq (i32 bitcast (float fadd (float undef, float undef) to i32), i32 0), label %bb2, label %bb6
+
+bb2:
+ br i1 %tmp, label %bb3, label %bb6
+
+bb3:
+ %tmp4 = phi i1 [ %tmp7, %bb6 ], [ undef, %bb2 ]
+ %tmp5 = or i1 %tmp4, icmp eq (i32 bitcast (float fadd (float undef, float undef) to i32), i32 0)
+ br i1 %tmp5, label %bb8, label %bb8
+
+bb6:
+ %tmp7 = icmp slt i32 %arg, %arg1
+ br label %bb3
+
+bb8:
+ ret void
+}
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+attributes #0 = { nounwind }
+attributes #1 = { nounwind readnone }
diff --git a/test/Transforms/StructurizeCFG/nested-loop-order.ll b/test/Transforms/StructurizeCFG/nested-loop-order.ll
index 8a506c3e3962..58634d0d37db 100644
--- a/test/Transforms/StructurizeCFG/nested-loop-order.ll
+++ b/test/Transforms/StructurizeCFG/nested-loop-order.ll
@@ -63,17 +63,6 @@ ENDIF28: ; preds = %ENDIF
br i1 %tmp36, label %ENDLOOP, label %LOOP.outer
}
-; Function Attrs: nounwind readnone
-declare <4 x float> @llvm.SI.vs.load.input(<16 x i8>, i32, i32) #1
-
-; Function Attrs: readnone
-declare float @llvm.AMDIL.clamp.(float, float, float) #2
-
-declare void @llvm.SI.export(i32, i32, i32, i32, i32, float, float, float, float)
-
-attributes #0 = { "ShaderType"="1" "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
+attributes #0 = { "enable-no-nans-fp-math"="true" "unsafe-fp-math"="true" }
attributes #1 = { nounwind readnone }
attributes #2 = { readnone }
-
-!0 = !{!1, !1, i64 0, i32 1}
-!1 = !{!"const", null}
diff --git a/test/Transforms/TailCallElim/accum_recursion.ll b/test/Transforms/TailCallElim/accum_recursion.ll
index c95bfe6aeed1..1175d581722f 100644
--- a/test/Transforms/TailCallElim/accum_recursion.ll
+++ b/test/Transforms/TailCallElim/accum_recursion.ll
@@ -1,4 +1,5 @@
; RUN: opt < %s -tailcallelim -S | FileCheck %s
+; RUN: opt < %s -passes=tailcallelim -S | FileCheck %s
define i32 @test1_factorial(i32 %x) {
entry:
diff --git a/test/Transforms/TailCallElim/dont_reorder_load.ll b/test/Transforms/TailCallElim/dont_reorder_load.ll
index ac399a1bf5a9..f8542799cc64 100644
--- a/test/Transforms/TailCallElim/dont_reorder_load.ll
+++ b/test/Transforms/TailCallElim/dont_reorder_load.ll
@@ -1,4 +1,4 @@
-; RUN: opt < %s -tailcallelim -S | grep call | count 3
+; RUN: opt < %s -tailcallelim -S | grep call | count 4
; PR4323
; Several cases where tail call elimination should not move the load above the
@@ -62,3 +62,21 @@ else: ; preds = %entry
%tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
ret i32 %tmp10
}
+
+; This load can NOT be moved above the call because the a_arg is not
+; sufficiently dereferenceable.
+define fastcc i32 @no_tailrecelim_4(i32* dereferenceable(2) %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+entry:
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
+
+if: ; preds = %entry
+ ret i32 0
+
+else: ; preds = %entry
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @no_tailrecelim_4(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
+}
diff --git a/test/Transforms/TailCallElim/reorder_load.ll b/test/Transforms/TailCallElim/reorder_load.ll
index b989bbf9547a..2f9b692d0991 100644
--- a/test/Transforms/TailCallElim/reorder_load.ll
+++ b/test/Transforms/TailCallElim/reorder_load.ll
@@ -122,3 +122,26 @@ recurse: ; preds = %else
%tmp10 = add i32 %second, %tmp8 ; <i32> [#uses=1]
ret i32 %tmp10
}
+
+; This load can be moved above the call because the function won't write to it
+; and the a_arg is dereferenceable.
+define fastcc i32 @raise_load_5(i32* dereferenceable(4) %a_arg, i32 %a_len_arg, i32 %start_arg) readonly {
+; CHECK-LABEL: @raise_load_5(
+; CHECK-NOT: call
+; CHECK: load i32, i32*
+; CHECK-NOT: call
+; CHECK: }
+entry:
+ %tmp2 = icmp sge i32 %start_arg, %a_len_arg ; <i1> [#uses=1]
+ br i1 %tmp2, label %if, label %else
+
+if: ; preds = %entry
+ ret i32 0
+
+else: ; preds = %entry
+ %tmp7 = add i32 %start_arg, 1 ; <i32> [#uses=1]
+ %tmp8 = call fastcc i32 @raise_load_5(i32* %a_arg, i32 %a_len_arg, i32 %tmp7) ; <i32> [#uses=1]
+ %tmp9 = load i32, i32* %a_arg ; <i32> [#uses=1]
+ %tmp10 = add i32 %tmp9, %tmp8 ; <i32> [#uses=1]
+ ret i32 %tmp10
+}
diff --git a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll b/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
deleted file mode 100644
index 4f7a3ca8ce6a..000000000000
--- a/test/Transforms/TailDup/2008-06-11-AvoidDupLoopHeader.ll
+++ /dev/null
@@ -1,28 +0,0 @@
-; REQUIRES: asserts
-; RUN: opt < %s -tailduplicate -taildup-threshold=3 -stats -disable-output 2>&1 | not grep tailduplicate
-; XFAIL: *
-
-define i32 @foo(i32 %l) nounwind {
-entry:
- %cond = icmp eq i32 %l, 1 ; <i1> [#uses=1]
- br i1 %cond, label %bb, label %bb9
-
-bb: ; preds = %entry
- br label %bb9
-
-bb5: ; preds = %bb9
- %tmp7 = call i32 (...) @bar( i32 %x.0 ) nounwind ; <i32> [#uses=1]
- br label %bb9
-
-bb9: ; preds = %bb5, %bb, %entry
- %x.0 = phi i32 [ 0, %entry ], [ %tmp7, %bb5 ], [ 1525, %bb ] ; <i32> [#uses=2]
- %l_addr.0 = phi i32 [ %l, %entry ], [ %tmp11, %bb5 ], [ %l, %bb ] ; <i32> [#uses=1]
- %tmp11 = add i32 %l_addr.0, -1 ; <i32> [#uses=2]
- %tmp13 = icmp eq i32 %tmp11, -1 ; <i1> [#uses=1]
- br i1 %tmp13, label %bb15, label %bb5
-
-bb15: ; preds = %bb9
- ret i32 %x.0
-}
-
-declare i32 @bar(...)
diff --git a/test/Transforms/Util/MemorySSA/assume.ll b/test/Transforms/Util/MemorySSA/assume.ll
new file mode 100644
index 000000000000..d771c78eb1cf
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/assume.ll
@@ -0,0 +1,19 @@
+; RUN: opt -basicaa -memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Ensures that assumes are treated as not reading or writing memory.
+
+declare void @llvm.assume(i1)
+
+define i32 @foo(i32* %a, i32* %b, i1 %c) {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 4
+ store i32 4, i32* %a, align 4
+; CHECK-NOT: MemoryDef
+; CHECK: call void @llvm.assume
+ call void @llvm.assume(i1 %c)
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: %1 = load i32
+ %1 = load i32, i32* %a, align 4
+ ret i32 %1
+}
diff --git a/test/Transforms/Util/MemorySSA/atomic-clobber.ll b/test/Transforms/Util/MemorySSA/atomic-clobber.ll
new file mode 100644
index 000000000000..217d5f65d787
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/atomic-clobber.ll
@@ -0,0 +1,18 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Ensures that atomic loads count as MemoryDefs
+
+define i32 @foo(i32* %a, i32* %b) {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 4
+ store i32 4, i32* %a, align 4
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: %1 = load atomic i32
+ %1 = load atomic i32, i32* %b acquire, align 4
+; CHECK: MemoryUse(2)
+; CHECK-NEXT: %2 = load i32
+ %2 = load i32, i32* %a, align 4
+ %3 = add i32 %1, %2
+ ret i32 %3
+}
diff --git a/test/Transforms/Util/MemorySSA/cyclicphi.ll b/test/Transforms/Util/MemorySSA/cyclicphi.ll
new file mode 100644
index 000000000000..c9a5422e0a18
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/cyclicphi.ll
@@ -0,0 +1,124 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+
+%struct.hoge = type { i32, %struct.widget }
+%struct.widget = type { i64 }
+
+define hidden void @quux(%struct.hoge *%f) align 2 {
+ %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0
+ %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1
+ %tmp25 = bitcast %struct.widget* %tmp24 to i64**
+ br label %bb26
+
+bb26: ; preds = %bb77, %0
+; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2})
+; CHECK-NEXT: br i1 undef, label %bb68, label %bb77
+ br i1 undef, label %bb68, label %bb77
+
+bb68: ; preds = %bb26
+; CHECK: MemoryUse(liveOnEntry)
+; CHECK-NEXT: %tmp69 = load i64, i64* null, align 8
+ %tmp69 = load i64, i64* null, align 8
+; CHECK: 1 = MemoryDef(3)
+; CHECK-NEXT: store i64 %tmp69, i64* %tmp, align 8
+ store i64 %tmp69, i64* %tmp, align 8
+ br label %bb77
+
+bb77: ; preds = %bb68, %bb26
+; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1})
+; CHECK: MemoryUse(2)
+; CHECK-NEXT: %tmp78 = load i64*, i64** %tmp25, align 8
+ %tmp78 = load i64*, i64** %tmp25, align 8
+ %tmp79 = getelementptr inbounds i64, i64* %tmp78, i64 undef
+ br label %bb26
+}
+
+; CHECK-LABEL: define void @quux_skip
+define void @quux_skip(%struct.hoge* noalias %f, i64* noalias %g) align 2 {
+ %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0
+ %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1
+ %tmp25 = bitcast %struct.widget* %tmp24 to i64**
+ br label %bb26
+
+bb26: ; preds = %bb77, %0
+; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2})
+; CHECK-NEXT: br i1 undef, label %bb68, label %bb77
+ br i1 undef, label %bb68, label %bb77
+
+bb68: ; preds = %bb26
+; CHECK: MemoryUse(3)
+; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8
+ %tmp69 = load i64, i64* %g, align 8
+; CHECK: 1 = MemoryDef(3)
+; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8
+ store i64 %tmp69, i64* %g, align 8
+ br label %bb77
+
+bb77: ; preds = %bb68, %bb26
+; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1})
+; FIXME: This should be MemoryUse(liveOnEntry)
+; CHECK: MemoryUse(3)
+; CHECK-NEXT: %tmp78 = load i64*, i64** %tmp25, align 8
+ %tmp78 = load i64*, i64** %tmp25, align 8
+ br label %bb26
+}
+
+; CHECK-LABEL: define void @quux_dominated
+define void @quux_dominated(%struct.hoge* noalias %f, i64* noalias %g) align 2 {
+ %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0
+ %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1
+ %tmp25 = bitcast %struct.widget* %tmp24 to i64**
+ br label %bb26
+
+bb26: ; preds = %bb77, %0
+; CHECK: 4 = MemoryPhi({%0,liveOnEntry},{bb77,2})
+; CHECK: MemoryUse(4)
+; CHECK-NEXT: load i64*, i64** %tmp25, align 8
+ load i64*, i64** %tmp25, align 8
+ br i1 undef, label %bb68, label %bb77
+
+bb68: ; preds = %bb26
+; CHECK: MemoryUse(4)
+; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8
+ %tmp69 = load i64, i64* %g, align 8
+; CHECK: 1 = MemoryDef(4)
+; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8
+ store i64 %tmp69, i64* %g, align 8
+ br label %bb77
+
+bb77: ; preds = %bb68, %bb26
+; CHECK: 3 = MemoryPhi({bb26,4},{bb68,1})
+; CHECK: 2 = MemoryDef(3)
+; CHECK-NEXT: store i64* null, i64** %tmp25, align 8
+ store i64* null, i64** %tmp25, align 8
+ br label %bb26
+}
+
+; CHECK-LABEL: define void @quux_nodominate
+define void @quux_nodominate(%struct.hoge* noalias %f, i64* noalias %g) align 2 {
+ %tmp = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1, i32 0
+ %tmp24 = getelementptr inbounds %struct.hoge, %struct.hoge* %f, i64 0, i32 1
+ %tmp25 = bitcast %struct.widget* %tmp24 to i64**
+ br label %bb26
+
+bb26: ; preds = %bb77, %0
+; CHECK: 3 = MemoryPhi({%0,liveOnEntry},{bb77,2})
+; CHECK: MemoryUse(liveOnEntry)
+; CHECK-NEXT: load i64*, i64** %tmp25, align 8
+ load i64*, i64** %tmp25, align 8
+ br i1 undef, label %bb68, label %bb77
+
+bb68: ; preds = %bb26
+; CHECK: MemoryUse(3)
+; CHECK-NEXT: %tmp69 = load i64, i64* %g, align 8
+ %tmp69 = load i64, i64* %g, align 8
+; CHECK: 1 = MemoryDef(3)
+; CHECK-NEXT: store i64 %tmp69, i64* %g, align 8
+ store i64 %tmp69, i64* %g, align 8
+ br label %bb77
+
+bb77: ; preds = %bb68, %bb26
+; CHECK: 2 = MemoryPhi({bb26,3},{bb68,1})
+; CHECK-NEXT: br label %bb26
+ br label %bb26
+}
diff --git a/test/Transforms/Util/MemorySSA/forward-unreachable.ll b/test/Transforms/Util/MemorySSA/forward-unreachable.ll
new file mode 100644
index 000000000000..2bbf399daae4
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/forward-unreachable.ll
@@ -0,0 +1,23 @@
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128"
+
+define void @test() {
+entry:
+ br i1 undef, label %split1, label %split2
+
+split1:
+ store i16 undef, i16* undef, align 2
+ br label %merge
+split2:
+ br label %merge
+forwardunreachable:
+ br label %merge
+merge:
+; The forwardunreachable block still needs an entry in the phi node,
+; because it is reverse reachable, so the CFG still has it as a
+; predecessor of the block
+; CHECK: 3 = MemoryPhi({split1,1},{split2,liveOnEntry},{forwardunreachable,liveOnEntry})
+ store i16 undef, i16* undef, align 2
+ ret void
+}
+
diff --git a/test/Transforms/Util/MemorySSA/function-clobber.ll b/test/Transforms/Util/MemorySSA/function-clobber.ll
new file mode 100644
index 000000000000..a01893a5b954
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/function-clobber.ll
@@ -0,0 +1,54 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Ensuring that external functions without attributes are MemoryDefs
+
+@g = external global i32
+declare void @modifyG()
+
+define i32 @foo() {
+; CHECK: MemoryUse(liveOnEntry)
+; CHECK-NEXT: %1 = load i32
+ %1 = load i32, i32* @g
+
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 4
+ store i32 4, i32* @g, align 4
+
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: call void @modifyG()
+ call void @modifyG()
+
+; CHECK: MemoryUse(2)
+; CHECK-NEXT: %2 = load i32
+ %2 = load i32, i32* @g
+ %3 = add i32 %2, %1
+ ret i32 %3
+}
+
+declare void @readEverything() readonly
+declare void @clobberEverything()
+
+; CHECK-LABEL: define void @bar
+define void @bar() {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: call void @clobberEverything()
+ call void @clobberEverything()
+ br i1 undef, label %if.end, label %if.then
+
+if.then:
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: call void @readEverything()
+ call void @readEverything()
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: call void @clobberEverything()
+ call void @clobberEverything()
+ br label %if.end
+
+if.end:
+; CHECK: 3 = MemoryPhi({%0,1},{if.then,2})
+; CHECK: MemoryUse(3)
+; CHECK-NEXT: call void @readEverything()
+ call void @readEverything()
+ ret void
+}
diff --git a/test/Transforms/Util/MemorySSA/function-mem-attrs.ll b/test/Transforms/Util/MemorySSA/function-mem-attrs.ll
new file mode 100644
index 000000000000..11383771a413
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/function-mem-attrs.ll
@@ -0,0 +1,59 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Test that various function attributes give us sane results.
+
+@g = external global i32
+
+declare void @readonlyFunction() readonly
+declare void @noattrsFunction()
+
+define void @readonlyAttr() {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 0
+ store i32 0, i32* @g, align 4
+
+ %1 = alloca i32, align 4
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i32 0
+ store i32 0, i32* %1, align 4
+
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: call void @readonlyFunction()
+ call void @readonlyFunction()
+
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: call void @noattrsFunction() #
+; Assume that #N is readonly
+ call void @noattrsFunction() readonly
+
+ ; Sanity check that noattrsFunction is otherwise a MemoryDef
+; CHECK: 3 = MemoryDef(2)
+; CHECK-NEXT: call void @noattrsFunction()
+ call void @noattrsFunction()
+ ret void
+}
+
+declare void @argMemOnly(i32*) argmemonly
+
+define void @inaccessableOnlyAttr() {
+ %1 = alloca i32, align 4
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 0
+ store i32 0, i32* %1, align 4
+
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i32 0
+ store i32 0, i32* @g, align 4
+
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: call void @argMemOnly(i32* %1) #
+; Assume that #N is readonly
+ call void @argMemOnly(i32* %1) readonly
+
+; CHECK: 3 = MemoryDef(2)
+; CHECK-NEXT: call void @argMemOnly(i32* %1)
+ call void @argMemOnly(i32* %1)
+
+ ret void
+}
diff --git a/test/Transforms/Util/MemorySSA/livein.ll b/test/Transforms/Util/MemorySSA/livein.ll
new file mode 100644
index 000000000000..93072ea97daf
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/livein.ll
@@ -0,0 +1,53 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+define void @F(i8*) {
+ br i1 true, label %left, label %right
+left:
+; CHECK: 1 = MemoryDef(liveOnEntry)
+ store i8 16, i8* %0
+ br label %merge
+right:
+ br label %merge
+
+merge:
+; CHECK-NOT: 2 = MemoryPhi
+ret void
+}
+
+define void @F2(i8*) {
+ br i1 true, label %left, label %right
+left:
+; CHECK: 1 = MemoryDef(liveOnEntry)
+ store i8 16, i8* %0
+ br label %merge
+right:
+ br label %merge
+
+merge:
+; CHECK: 2 = MemoryPhi({left,1},{right,liveOnEntry})
+%c = load i8, i8* %0
+ret void
+}
+
+; Ensure we treat def-only blocks as though they have uses for phi placement.
+; CHECK-LABEL: define void @F3
+define void @F3() {
+ %a = alloca i8
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 0, i8* %a
+ store i8 0, i8* %a
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i8 1, i8* %a
+ store i8 1, i8* %a
+ br label %if.end
+
+if.end:
+; CHECK: 4 = MemoryPhi({%0,1},{if.then,2})
+; CHECK: 3 = MemoryDef(4)
+; CHECK-NEXT: store i8 2, i8* %a
+ store i8 2, i8* %a
+ ret void
+}
diff --git a/test/Transforms/Util/MemorySSA/load-invariant.ll b/test/Transforms/Util/MemorySSA/load-invariant.ll
new file mode 100644
index 000000000000..e387ff4c5302
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/load-invariant.ll
@@ -0,0 +1,25 @@
+; XFAIL: *
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>' -verify-memoryssa -disable-output < %s 2>&1 | FileCheck %s
+;
+; Invariant loads should be considered live on entry, because, once the
+; location is known to be dereferenceable, the value can never change.
+;
+; Currently XFAILed because this optimization was held back from the initial
+; commit.
+
+@g = external global i32
+
+declare void @clobberAllTheThings()
+
+define i32 @foo() {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: call void @clobberAllTheThings()
+ call void @clobberAllTheThings()
+; CHECK: MemoryUse(liveOnEntry)
+; CHECK-NEXT: %1 = load i32
+ %1 = load i32, i32* @g, align 4, !invariant.load !0
+ ret i32 %1
+}
+
+!0 = !{}
diff --git a/test/Transforms/Util/MemorySSA/many-dom-backedge.ll b/test/Transforms/Util/MemorySSA/many-dom-backedge.ll
new file mode 100644
index 000000000000..3d76f4af2d61
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/many-dom-backedge.ll
@@ -0,0 +1,77 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; many-dom.ll, with an added back-edge back into the switch.
+; Because people love their gotos.
+
+declare i1 @getBool() readnone
+
+define i32 @foo(i32* %p) {
+entry:
+ br label %loopbegin
+
+loopbegin:
+; CHECK: 9 = MemoryPhi({entry,liveOnEntry},{sw.epilog,6})
+; CHECK-NEXT: %n =
+ %n = phi i32 [ 0, %entry ], [ %1, %sw.epilog ]
+ %m = alloca i32, align 4
+ switch i32 %n, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+; CHECK: 1 = MemoryDef(9)
+; CHECK-NEXT: store i32 1
+ store i32 1, i32* %m, align 4
+ br label %sw.epilog
+
+sw.bb1:
+; CHECK: 2 = MemoryDef(9)
+; CHECK-NEXT: store i32 2
+ store i32 2, i32* %m, align 4
+ br label %sw.epilog
+
+sw.bb2:
+; CHECK: 3 = MemoryDef(9)
+; CHECK-NEXT: store i32 3
+ store i32 3, i32* %m, align 4
+ br label %sw.epilog
+
+sw.bb3:
+; CHECK: 10 = MemoryPhi({loopbegin,9},{sw.almostexit,6})
+; CHECK: 4 = MemoryDef(10)
+; CHECK-NEXT: store i32 4
+ store i32 4, i32* %m, align 4
+ br label %sw.epilog
+
+sw.default:
+; CHECK: 5 = MemoryDef(9)
+; CHECK-NEXT: store i32 5
+ store i32 5, i32* %m, align 4
+ br label %sw.epilog
+
+sw.epilog:
+; CHECK: 8 = MemoryPhi({sw.default,5},{sw.bb3,4},{sw.bb,1},{sw.bb1,2},{sw.bb2,3})
+; CHECK-NEXT: MemoryUse(8)
+; CHECK-NEXT: %0 =
+ %0 = load i32, i32* %m, align 4
+; CHECK: 6 = MemoryDef(8)
+; CHECK-NEXT: %1 =
+ %1 = load volatile i32, i32* %p, align 4
+ %2 = icmp eq i32 %0, %1
+ br i1 %2, label %sw.almostexit, label %loopbegin
+
+sw.almostexit:
+ %3 = icmp eq i32 0, %1
+ br i1 %3, label %exit, label %sw.bb3
+
+exit:
+; CHECK: 7 = MemoryDef(6)
+; CHECK-NEXT: %4 = load volatile i32
+ %4 = load volatile i32, i32* %p, align 4
+ %5 = add i32 %4, %1
+ ret i32 %5
+}
diff --git a/test/Transforms/Util/MemorySSA/many-doms.ll b/test/Transforms/Util/MemorySSA/many-doms.ll
new file mode 100644
index 000000000000..d2e6c6fa1e43
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/many-doms.ll
@@ -0,0 +1,67 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Testing many dominators, specifically from a switch statement in C.
+
+declare i1 @getBool() readnone
+
+define i32 @foo(i32* %p) {
+entry:
+ br label %loopbegin
+
+loopbegin:
+; CHECK: 8 = MemoryPhi({entry,liveOnEntry},{sw.epilog,6})
+; CHECK-NEXT: %n =
+ %n = phi i32 [ 0, %entry ], [ %1, %sw.epilog ]
+ %m = alloca i32, align 4
+ switch i32 %n, label %sw.default [
+ i32 0, label %sw.bb
+ i32 1, label %sw.bb1
+ i32 2, label %sw.bb2
+ i32 3, label %sw.bb3
+ ]
+
+sw.bb:
+; CHECK: 1 = MemoryDef(8)
+; CHECK-NEXT: store i32 1
+ store i32 1, i32* %m, align 4
+ br label %sw.epilog
+
+sw.bb1:
+; CHECK: 2 = MemoryDef(8)
+; CHECK-NEXT: store i32 2
+ store i32 2, i32* %m, align 4
+ br label %sw.epilog
+
+sw.bb2:
+; CHECK: 3 = MemoryDef(8)
+; CHECK-NEXT: store i32 3
+ store i32 3, i32* %m, align 4
+ br label %sw.epilog
+
+sw.bb3:
+; CHECK: 4 = MemoryDef(8)
+; CHECK-NEXT: store i32 4
+ store i32 4, i32* %m, align 4
+ br label %sw.epilog
+
+sw.default:
+; CHECK: 5 = MemoryDef(8)
+; CHECK-NEXT: store i32 5
+ store i32 5, i32* %m, align 4
+ br label %sw.epilog
+
+sw.epilog:
+; CHECK: 7 = MemoryPhi({sw.default,5},{sw.bb,1},{sw.bb1,2},{sw.bb2,3},{sw.bb3,4})
+; CHECK-NEXT: MemoryUse(7)
+; CHECK-NEXT: %0 =
+ %0 = load i32, i32* %m, align 4
+; CHECK: 6 = MemoryDef(7)
+; CHECK-NEXT: %1 =
+ %1 = load volatile i32, i32* %p, align 4
+ %2 = icmp eq i32 %0, %1
+ br i1 %2, label %exit, label %loopbegin
+
+exit:
+ ret i32 %1
+}
diff --git a/test/Transforms/Util/MemorySSA/multi-edges.ll b/test/Transforms/Util/MemorySSA/multi-edges.ll
new file mode 100644
index 000000000000..c13fc016b2c7
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/multi-edges.ll
@@ -0,0 +1,32 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Makes sure we have a sane model if both successors of some block is the same
+; block.
+
+define i32 @foo(i1 %a) {
+entry:
+ %0 = alloca i32, align 4
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 4
+ store i32 4, i32* %0
+ br i1 %a, label %Loop.Body, label %Loop.End
+
+Loop.Body:
+; CHECK: 4 = MemoryPhi({entry,1},{Loop.End,3})
+; CHECK-NEXT: 2 = MemoryDef(4)
+; CHECK-NEXT: store i32 5
+ store i32 5, i32* %0, align 4
+ br i1 %a, label %Loop.End, label %Loop.End ; WhyDoWeEvenHaveThatLever.gif
+
+Loop.End:
+; CHECK: 3 = MemoryPhi({entry,1},{Loop.Body,2},{Loop.Body,2})
+; CHECK-NEXT: MemoryUse(3)
+; CHECK-NEXT: %1 = load
+ %1 = load i32, i32* %0, align 4
+ %2 = icmp eq i32 5, %1
+ br i1 %2, label %Ret, label %Loop.Body
+
+Ret:
+ ret i32 %1
+}
diff --git a/test/Transforms/Util/MemorySSA/multiple-backedges-hal.ll b/test/Transforms/Util/MemorySSA/multiple-backedges-hal.ll
new file mode 100644
index 000000000000..473b3685801c
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/multiple-backedges-hal.ll
@@ -0,0 +1,73 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+
+; hfinkel's case
+; [entry]
+; |
+; .....
+; (clobbering access - b)
+; |
+; .... ________________________________
+; \ / |
+; (x) |
+; ...... |
+; | |
+; | ______________________ |
+; \ / | |
+; (starting access) | |
+; ... | |
+; (clobbering access - a) | |
+; ... | |
+; | | | |
+; | |_______________________| |
+; | |
+; |_________________________________|
+;
+; More specifically, one access, with multiple clobbering accesses. One of
+; which strictly dominates the access, the other of which has a backedge
+
+; readnone so we don't have a 1:1 mapping of MemorySSA edges to Instructions.
+declare void @doThingWithoutReading() readnone
+declare i8 @getValue() readnone
+declare i1 @getBool() readnone
+
+define hidden void @testcase(i8* %Arg) {
+Entry:
+ call void @doThingWithoutReading()
+ %Val.Entry = call i8 @getValue()
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 %Val.Entry
+ store i8 %Val.Entry, i8* %Arg
+ call void @doThingWithoutReading()
+ br label %OuterLoop
+
+OuterLoop:
+; CHECK: 5 = MemoryPhi({Entry,1},{InnerLoop.Tail,3})
+; CHECK-NEXT: %Val.Outer =
+ %Val.Outer = call i8 @getValue()
+; CHECK: 2 = MemoryDef(5)
+; CHECK-NEXT: store i8 %Val.Outer
+ store i8 %Val.Outer, i8* %Arg
+ call void @doThingWithoutReading()
+ br label %InnerLoop
+
+InnerLoop:
+; CHECK: 4 = MemoryPhi({OuterLoop,2},{InnerLoop,3})
+; CHECK-NEXT: ; MemoryUse(4)
+; CHECK-NEXT: %StartingAccess = load
+ %StartingAccess = load i8, i8* %Arg, align 4
+ %Val.Inner = call i8 @getValue()
+; CHECK: 3 = MemoryDef(4)
+; CHECK-NEXT: store i8 %Val.Inner
+ store i8 %Val.Inner, i8* %Arg
+ call void @doThingWithoutReading()
+ %KeepGoing = call i1 @getBool()
+ br i1 %KeepGoing, label %InnerLoop.Tail, label %InnerLoop
+
+InnerLoop.Tail:
+ %KeepGoing.Tail = call i1 @getBool()
+ br i1 %KeepGoing.Tail, label %End, label %OuterLoop
+
+End:
+ ret void
+}
diff --git a/test/Transforms/Util/MemorySSA/multiple-locations.ll b/test/Transforms/Util/MemorySSA/multiple-locations.ll
new file mode 100644
index 000000000000..9a3e87e4ab6d
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/multiple-locations.ll
@@ -0,0 +1,25 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Checks that basicAA is doing some amount of disambiguation for us
+
+define i32 @foo(i1 %cond) {
+ %a = alloca i32, align 4
+ %b = alloca i32, align 4
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 0
+ store i32 0, i32* %a, align 4
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i32 1
+ store i32 1, i32* %b, align 4
+
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: %1 = load i32
+ %1 = load i32, i32* %a, align 4
+; CHECK: MemoryUse(2)
+; CHECK-NEXT: %2 = load i32
+ %2 = load i32, i32* %b, align 4
+
+ %3 = add i32 %1, %2
+ ret i32 %3
+}
diff --git a/test/Transforms/Util/MemorySSA/no-disconnected.ll b/test/Transforms/Util/MemorySSA/no-disconnected.ll
new file mode 100644
index 000000000000..d1dcb15893ad
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/no-disconnected.ll
@@ -0,0 +1,43 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; This test ensures we don't end up with multiple reaching defs for a single
+; use/phi edge If we were to optimize defs, we would end up with 2=
+; MemoryDef(liveOnEntry) and 4 = MemoryDef(liveOnEntry) Both would mean both
+; 1,2, and 3,4 would reach the phi node. Because the phi node can only have one
+; entry on each edge, it would choose 2, 4 and disconnect 1 and 3 completely
+; from the SSA graph, even though they are not dead
+
+define void @sink_store(i32 %index, i32* %foo, i32* %bar) {
+entry:
+ %cmp = trunc i32 %index to i1
+ br i1 %cmp, label %if.then, label %if.else
+
+if.then: ; preds = %entry
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 %index, i32* %foo, align 4
+ store i32 %index, i32* %foo, align 4
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i32 %index, i32* %bar, align 4
+ store i32 %index, i32* %bar, align 4
+ br label %if.end
+
+if.else: ; preds = %entry
+; CHECK: 3 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i32 %index, i32* %foo, align 4
+ store i32 %index, i32* %foo, align 4
+; CHECK: 4 = MemoryDef(3)
+; CHECK-NEXT: store i32 %index, i32* %bar, align 4
+ store i32 %index, i32* %bar, align 4
+ br label %if.end
+
+if.end: ; preds = %if.else, %if.then
+; CHECK: 5 = MemoryPhi({if.then,2},{if.else,4})
+; CHECK: MemoryUse(5)
+; CHECK-NEXT: %c = load i32, i32* %foo
+ %c = load i32, i32* %foo
+; CHECK: MemoryUse(5)
+; CHECK-NEXT: %d = load i32, i32* %bar
+ %d = load i32, i32* %bar
+ ret void
+}
diff --git a/test/Transforms/Util/MemorySSA/optimize-use.ll b/test/Transforms/Util/MemorySSA/optimize-use.ll
new file mode 100644
index 000000000000..8a8f2dd50959
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/optimize-use.ll
@@ -0,0 +1,37 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+
+; Function Attrs: ssp uwtable
+define i32 @main() {
+entry:
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: %call = call noalias i8* @_Znwm(i64 4)
+ %call = call noalias i8* @_Znwm(i64 4)
+ %0 = bitcast i8* %call to i32*
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: %call1 = call noalias i8* @_Znwm(i64 4)
+ %call1 = call noalias i8* @_Znwm(i64 4)
+ %1 = bitcast i8* %call1 to i32*
+; CHECK: 3 = MemoryDef(2)
+; CHECK-NEXT: store i32 5, i32* %0, align 4
+ store i32 5, i32* %0, align 4
+; CHECK: 4 = MemoryDef(3)
+; CHECK-NEXT: store i32 7, i32* %1, align 4
+ store i32 7, i32* %1, align 4
+; CHECK: MemoryUse(3)
+; CHECK-NEXT: %2 = load i32, i32* %0, align 4
+ %2 = load i32, i32* %0, align 4
+; CHECK: MemoryUse(4)
+; CHECK-NEXT: %3 = load i32, i32* %1, align 4
+ %3 = load i32, i32* %1, align 4
+; CHECK: MemoryUse(3)
+; CHECK-NEXT: %4 = load i32, i32* %0, align 4
+ %4 = load i32, i32* %0, align 4
+; CHECK: MemoryUse(4)
+; CHECK-NEXT: %5 = load i32, i32* %1, align 4
+ %5 = load i32, i32* %1, align 4
+ %add = add nsw i32 %3, %5
+ ret i32 %add
+}
+
+declare noalias i8* @_Znwm(i64)
diff --git a/test/Transforms/Util/MemorySSA/phi-translation.ll b/test/Transforms/Util/MemorySSA/phi-translation.ll
new file mode 100644
index 000000000000..30cd011a119c
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/phi-translation.ll
@@ -0,0 +1,182 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+
+; %ptr can't alias %local, so we should be able to optimize the use of %local to
+; point to the store to %local.
+; CHECK-LABEL: define void @check
+define void @check(i8* %ptr, i1 %bool) {
+entry:
+ %local = alloca i8, align 1
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 0, i8* %local, align 1
+ store i8 0, i8* %local, align 1
+ br i1 %bool, label %if.then, label %if.end
+
+if.then:
+ %p2 = getelementptr inbounds i8, i8* %ptr, i32 1
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i8 0, i8* %p2, align 1
+ store i8 0, i8* %p2, align 1
+ br label %if.end
+
+if.end:
+; CHECK: 3 = MemoryPhi({entry,1},{if.then,2})
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: load i8, i8* %local, align 1
+ load i8, i8* %local, align 1
+ ret void
+}
+
+; CHECK-LABEL: define void @check2
+define void @check2(i1 %val1, i1 %val2, i1 %val3) {
+entry:
+ %local = alloca i8, align 1
+ %local2 = alloca i8, align 1
+
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 0, i8* %local
+ store i8 0, i8* %local
+ br i1 %val1, label %if.then, label %phi.3
+
+if.then:
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i8 2, i8* %local2
+ store i8 2, i8* %local2
+ br i1 %val2, label %phi.2, label %phi.3
+
+phi.3:
+; CHECK: 6 = MemoryPhi({entry,1},{if.then,2})
+; CHECK: 3 = MemoryDef(6)
+; CHECK-NEXT: store i8 3, i8* %local2
+ store i8 3, i8* %local2
+ br i1 %val3, label %phi.2, label %phi.1
+
+phi.2:
+; CHECK: 5 = MemoryPhi({if.then,2},{phi.3,3})
+; CHECK: 4 = MemoryDef(5)
+; CHECK-NEXT: store i8 4, i8* %local2
+ store i8 4, i8* %local2
+ br label %phi.1
+
+phi.1:
+; Order matters here; phi.2 needs to come before phi.3, because that's the order
+; they're visited in.
+; CHECK: 7 = MemoryPhi({phi.2,4},{phi.3,3})
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: load i8, i8* %local
+ load i8, i8* %local
+ ret void
+}
+
+; CHECK-LABEL: define void @cross_phi
+define void @cross_phi(i8* noalias %p1, i8* noalias %p2) {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 0, i8* %p1
+ store i8 0, i8* %p1
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: load i8, i8* %p1
+ load i8, i8* %p1
+ br i1 undef, label %a, label %b
+
+a:
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i8 0, i8* %p2
+ store i8 0, i8* %p2
+ br i1 undef, label %c, label %d
+
+b:
+; CHECK: 3 = MemoryDef(1)
+; CHECK-NEXT: store i8 1, i8* %p2
+ store i8 1, i8* %p2
+ br i1 undef, label %c, label %d
+
+c:
+; CHECK: 6 = MemoryPhi({a,2},{b,3})
+; CHECK: 4 = MemoryDef(6)
+; CHECK-NEXT: store i8 2, i8* %p2
+ store i8 2, i8* %p2
+ br label %e
+
+d:
+; CHECK: 7 = MemoryPhi({a,2},{b,3})
+; CHECK: 5 = MemoryDef(7)
+; CHECK-NEXT: store i8 3, i8* %p2
+ store i8 3, i8* %p2
+ br label %e
+
+e:
+; 8 = MemoryPhi({c,4},{d,5})
+; CHECK: MemoryUse(1)
+; CHECK-NEXT: load i8, i8* %p1
+ load i8, i8* %p1
+ ret void
+}
+
+; CHECK-LABEL: define void @looped
+define void @looped(i8* noalias %p1, i8* noalias %p2) {
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store i8 0, i8* %p1
+ store i8 0, i8* %p1
+ br label %loop.1
+
+loop.1:
+; CHECK: 7 = MemoryPhi({%0,1},{loop.3,4})
+; CHECK: 2 = MemoryDef(7)
+; CHECK-NEXT: store i8 0, i8* %p2
+ store i8 0, i8* %p2
+ br i1 undef, label %loop.2, label %loop.3
+
+loop.2:
+; CHECK: 6 = MemoryPhi({loop.1,2},{loop.3,4})
+; CHECK: 3 = MemoryDef(6)
+; CHECK-NEXT: store i8 1, i8* %p2
+ store i8 1, i8* %p2
+ br label %loop.3
+
+loop.3:
+; CHECK: 5 = MemoryPhi({loop.1,2},{loop.2,3})
+; CHECK: 4 = MemoryDef(5)
+; CHECK-NEXT: store i8 2, i8* %p2
+ store i8 2, i8* %p2
+; FIXME: This should be MemoryUse(1)
+; CHECK: MemoryUse(5)
+; CHECK-NEXT: load i8, i8* %p1
+ load i8, i8* %p1
+ br i1 undef, label %loop.2, label %loop.1
+}
+
+; CHECK-LABEL: define void @looped_visitedonlyonce
+define void @looped_visitedonlyonce(i8* noalias %p1, i8* noalias %p2) {
+ br label %while.cond
+
+while.cond:
+; CHECK: 5 = MemoryPhi({%0,liveOnEntry},{if.end,3})
+; CHECK-NEXT: br i1 undef, label %if.then, label %if.end
+ br i1 undef, label %if.then, label %if.end
+
+if.then:
+; CHECK: 1 = MemoryDef(5)
+; CHECK-NEXT: store i8 0, i8* %p1
+ store i8 0, i8* %p1
+ br i1 undef, label %if.end, label %if.then2
+
+if.then2:
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store i8 1, i8* %p2
+ store i8 1, i8* %p2
+ br label %if.end
+
+if.end:
+; CHECK: 4 = MemoryPhi({while.cond,5},{if.then,1},{if.then2,2})
+; CHECK: MemoryUse(4)
+; CHECK-NEXT: load i8, i8* %p1
+ load i8, i8* %p1
+; CHECK: 3 = MemoryDef(4)
+; CHECK-NEXT: store i8 2, i8* %p2
+ store i8 2, i8* %p2
+; CHECK: MemoryUse(4)
+; CHECK-NEXT: load i8, i8* %p1
+ load i8, i8* %p1
+ br label %while.cond
+}
+
diff --git a/test/Transforms/Util/MemorySSA/volatile-clobber.ll b/test/Transforms/Util/MemorySSA/volatile-clobber.ll
new file mode 100644
index 000000000000..baad8d8c2d14
--- /dev/null
+++ b/test/Transforms/Util/MemorySSA/volatile-clobber.ll
@@ -0,0 +1,22 @@
+; RUN: opt -basicaa -print-memoryssa -verify-memoryssa -analyze < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes='print<memoryssa>,verify<memoryssa>' -disable-output < %s 2>&1 | FileCheck %s
+;
+; Ensures that volatile stores/loads count as MemoryDefs
+
+define i32 @foo() {
+ %1 = alloca i32, align 4
+; CHECK: 1 = MemoryDef(liveOnEntry)
+; CHECK-NEXT: store volatile i32 4
+ store volatile i32 4, i32* %1, align 4
+; CHECK: 2 = MemoryDef(1)
+; CHECK-NEXT: store volatile i32 8
+ store volatile i32 8, i32* %1, align 4
+; CHECK: 3 = MemoryDef(2)
+; CHECK-NEXT: %2 = load volatile i32
+ %2 = load volatile i32, i32* %1, align 4
+; CHECK: 4 = MemoryDef(3)
+; CHECK-NEXT: %3 = load volatile i32
+ %3 = load volatile i32, i32* %1, align 4
+ %4 = add i32 %3, %2
+ ret i32 %4
+}
diff --git a/test/Transforms/Util/simplify-dbg-declare-load.ll b/test/Transforms/Util/simplify-dbg-declare-load.ll
index 0357a5e6facb..21d305450860 100644
--- a/test/Transforms/Util/simplify-dbg-declare-load.ll
+++ b/test/Transforms/Util/simplify-dbg-declare-load.ll
@@ -19,9 +19,9 @@ fail: ; preds = %top
unreachable
idxend: ; preds = %top
-; CHECK-NOT call void @llvm.dbg.value(metadata %foo* %cp, i64 0, metadata !1, metadata !16), !dbg !17
+; CHECK-NOT call void @llvm.dbg.value(metadata %foo* %cp,
%0 = load volatile %foo, %foo* %cp, align 8
-; CHECK: call void @llvm.dbg.value(metadata %foo %0, i64 0, metadata !1, metadata !16), !dbg !17
+; CHECK: call void @llvm.dbg.value(metadata %foo %0,
store volatile %foo %0, %foo* undef, align 8
ret void
}
@@ -30,11 +30,11 @@ attributes #0 = { nounwind readnone }
attributes #1 = { sspreq }
!llvm.module.flags = !{!0}
-!llvm.dbg.cu = !{}
+!llvm.dbg.cu = !{!18}
!0 = !{i32 1, !"Debug Info Version", i32 3}
!1 = !DILocalVariable(name: "cp", scope: !2, file: !3, line: 106, type: !12)
-!2 = distinct !DISubprogram(name: "fastshortest", linkageName: "julia_fastshortest_6256", scope: null, file: !3, type: !4, isLocal: false, isDefinition: true, isOptimized: true, variables: !11)
+!2 = distinct !DISubprogram(name: "fastshortest", linkageName: "julia_fastshortest_6256", scope: null, file: !3, type: !4, isLocal: false, isDefinition: true, isOptimized: true, unit: !18, variables: !11)
!3 = !DIFile(filename: "grisu/fastshortest.jl", directory: ".")
!4 = !DISubroutineType(types: !5)
!5 = !{!6, !7}
@@ -50,3 +50,4 @@ attributes #1 = { sspreq }
!15 = !DIBasicType(name: "Int32", size: 32, align: 32, encoding: DW_ATE_unsigned)
!16 = !DIExpression()
!17 = !DILocation(line: 106, scope: !2)
+!18 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3)
diff --git a/test/Transforms/Util/split-bit-piece.ll b/test/Transforms/Util/split-bit-piece.ll
index 6945beca84b1..9343214cd991 100644
--- a/test/Transforms/Util/split-bit-piece.ll
+++ b/test/Transforms/Util/split-bit-piece.ll
@@ -29,7 +29,7 @@ attributes #0 = { nounwind readnone }
!llvm.module.flags = !{!7}
!llvm.ident = !{!8}
-!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256979) (llvm/trunk 257107)", isOptimized: false, runtimeVersion: 0, emissionKind: 1, retainedTypes: !2)
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (trunk 256979) (llvm/trunk 257107)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, retainedTypes: !2)
!1 = !DIFile(filename: "tsan_shadow_test.cc", directory: "/tmp")
!2 = !{!3, !5}
!3 = !DICompositeType(tag: DW_TAG_class_type, name: "FastState", file: !4, line: 91, size: 64, align: 64, identifier: "_ZTSN6__tsan9FastStateE")
@@ -40,6 +40,6 @@ attributes #0 = { nounwind readnone }
!8 = !{!"clang version 3.8.0 (trunk 256979) (llvm/trunk 257107)"}
!9 = !DILocalVariable(name: "v1", scope: !10, file: !4, line: 136, type: !5)
!10 = distinct !DILexicalBlock(scope: !11, file: !4, line: 136, column: 5)
-!11 = distinct !DISubprogram(name: "SetHistorySize", linkageName: "_ZN6__tsan9FastState14SetHistorySizeEi", scope: !"_ZTSN6__tsan9FastStateE", file: !4, line: 135, isLocal: false, isDefinition: true, scopeLine: 135, flags: DIFlagPrototyped, isOptimized: false)
+!11 = distinct !DISubprogram(name: "SetHistorySize", linkageName: "_ZN6__tsan9FastState14SetHistorySizeEi", scope: !3, file: !4, line: 135, isLocal: false, isDefinition: true, scopeLine: 135, flags: DIFlagPrototyped, isOptimized: false, unit: !0)
!12 = !DIExpression()
!13 = !DILocation(line: 136, column: 5, scope: !10)
diff --git a/test/Transforms/Util/store-first-op.ll b/test/Transforms/Util/store-first-op.ll
new file mode 100644
index 000000000000..08efbe47ccfe
--- /dev/null
+++ b/test/Transforms/Util/store-first-op.ll
@@ -0,0 +1,36 @@
+; RUN: opt -instcombine -S %s | FileCheck %s
+
+%foo = type { i8 }
+
+; Function Attrs: nounwind uwtable
+define void @_ZN4llvm13ScaledNumbers10multiply64Emm() {
+entry:
+ %getU = alloca %foo, align 1
+; This is supposed to make sure that the declare conversion, does not accidentally think the store OF
+; %getU is a store TO %getU. There are valid reasons to have an llvm.dbg.value here, but if the pass
+; is changed to emit such, a more specific check should be added to make sure that any llvm.dbg.value
+; is correct.
+; CHECK-NOT: @llvm.dbg.value(metadata %foo* %getU
+ call void @llvm.dbg.declare(metadata %foo* %getU, metadata !3, metadata !6), !dbg !7
+ store %foo* %getU, %foo** undef, align 8, !tbaa !8
+ unreachable
+}
+
+; Function Attrs: nounwind readnone
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+attributes #1 = { nounwind readnone }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !1, producer: "clang version 3.8.0 (https://github.com/llvm-mirror/clang 89dda3855cda574f355e6defa1d77bdae5053994) (llvm/trunk 257597)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug)
+!1 = !DIFile(filename: "none", directory: ".")
+!2 = !{i32 2, !"Debug Info Version", i32 3}
+!3 = !DILocalVariable(name: "getU", scope: !4, file: !1, line: 25, type: !5)
+!4 = distinct !DISubprogram(name: "multiply64", linkageName: "_ZN4llvm13ScaledNumbers10multiply64Emm", scope: null, file: !1, line: 22, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: true, unit: !0)
+!5 = !DICompositeType(tag: DW_TAG_class_type, scope: !4, file: !1, line: 25, size: 8, align: 8)
+!6 = !DIExpression()
+!7 = !DILocation(line: 25, column: 8, scope: !4)
+!8 = !{!9, !9, i64 0}
+!9 = !{i64 0}
diff --git a/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll b/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll
new file mode 100644
index 000000000000..97445efb101e
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/bad-read-from-vtable.ll
@@ -0,0 +1,63 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt = global [2 x i8*] [i8* zeroinitializer, i8* bitcast (void (i8*)* @vf to i8*)], !type !0
+
+define void @vf(i8* %this) {
+ ret void
+}
+
+; CHECK: define void @unaligned
+define void @unaligned(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr i8, i8* %vtablei8, i32 1
+ %fptrptr_casted = bitcast i8* %fptrptr to i8**
+ %fptr = load i8*, i8** %fptrptr_casted
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ ; CHECK: call void %
+ call void %fptr_casted(i8* %obj)
+ ret void
+}
+
+; CHECK: define void @outofbounds
+define void @outofbounds(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr i8, i8* %vtablei8, i32 16
+ %fptrptr_casted = bitcast i8* %fptrptr to i8**
+ %fptr = load i8*, i8** %fptrptr_casted
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ ; CHECK: call void %
+ call void %fptr_casted(i8* %obj)
+ ret void
+}
+
+; CHECK: define void @nonfunction
+define void @nonfunction(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr i8, i8* %vtablei8, i32 0
+ %fptrptr_casted = bitcast i8* %fptrptr to i8**
+ %fptr = load i8*, i8** %fptrptr_casted
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ ; CHECK: call void %
+ call void %fptr_casted(i8* %obj)
+ ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/constant-arg.ll b/test/Transforms/WholeProgramDevirt/constant-arg.ll
new file mode 100644
index 000000000000..f65e41327382
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/constant-arg.ll
@@ -0,0 +1,77 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+; RUN: opt -S -passes=wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf1 to i8*)], [0 x i8] zeroinitializer }, !type [[T8:![0-9]+]]
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf2 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\01", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf4 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+; CHECK: private constant { [8 x i8], [1 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\00\00\00\00\02", [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf8 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf1 to i8*)], !type !0
+@vt2 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf2 to i8*)], !type !0
+@vt4 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf4 to i8*)], !type !0
+@vt8 = constant [1 x i8*] [i8* bitcast (i1 (i8*, i32)* @vf8 to i8*)], !type !0
+
+define i1 @vf1(i8* %this, i32 %arg) readnone {
+ %and = and i32 %arg, 1
+ %cmp = icmp ne i32 %and, 0
+ ret i1 %cmp
+}
+
+define i1 @vf2(i8* %this, i32 %arg) readnone {
+ %and = and i32 %arg, 2
+ %cmp = icmp ne i32 %and, 0
+ ret i1 %cmp
+}
+
+define i1 @vf4(i8* %this, i32 %arg) readnone {
+ %and = and i32 %arg, 4
+ %cmp = icmp ne i32 %and, 0
+ ret i1 %cmp
+}
+
+define i1 @vf8(i8* %this, i32 %arg) readnone {
+ %and = and i32 %arg, 8
+ %cmp = icmp ne i32 %and, 0
+ ret i1 %cmp
+}
+
+; CHECK: define i1 @call1
+define i1 @call1(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*, i32)*
+ ; CHECK: getelementptr {{.*}} -1
+ ; CHECK: and {{.*}}, 1
+ %result = call i1 %fptr_casted(i8* %obj, i32 5)
+ ret i1 %result
+}
+
+; CHECK: define i1 @call2
+define i1 @call2(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*, i32)*
+ ; CHECK: getelementptr {{.*}} -1
+ ; CHECK: and {{.*}}, 2
+ %result = call i1 %fptr_casted(i8* %obj, i32 10)
+ ret i1 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+; CHECK: [[T8]] = !{i32 8, !"typeid"}
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll b/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll
new file mode 100644
index 000000000000..e5e64ab05a26
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/devirt-single-impl-check.ll
@@ -0,0 +1,41 @@
+; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: remark: <unknown>:0:0: devirtualized call
+; CHECK-NOT: devirtualized call
+
+@vt1 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0
+@vt2 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0
+
+define void @vf(i8* %this) {
+ ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 0, metadata !"typeid")
+ %fptr = extractvalue {i8*, i1} %pair, 0
+ %p = extractvalue {i8*, i1} %pair, 1
+ ; CHECK: br i1 true,
+ br i1 %p, label %cont, label %trap
+
+cont:
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ ; CHECK: call void @vf(
+ call void %fptr_casted(i8* %obj)
+ ret void
+
+trap:
+ call void @llvm.trap()
+ unreachable
+}
+
+declare {i8*, i1} @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll b/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll
new file mode 100644
index 000000000000..7d665f534a5d
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/devirt-single-impl.ll
@@ -0,0 +1,31 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0
+@vt2 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0
+
+define void @vf(i8* %this) {
+ ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ ; CHECK: call void @vf(
+ call void %fptr_casted(i8* %obj)
+ ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/expand-check.ll b/test/Transforms/WholeProgramDevirt/expand-check.ll
new file mode 100644
index 000000000000..4effaba08b25
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/expand-check.ll
@@ -0,0 +1,63 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+; Test that we correctly expand the llvm.type.checked.load intrinsic in cases
+; where we cannot devirtualize.
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf1 to i8*)], !type !0
+@vt2 = constant [1 x i8*] [i8* bitcast (void (i8*)* @vf2 to i8*)], !type !0
+
+define void @vf1(i8* %this) {
+ ret void
+}
+
+define void @vf2(i8* %this) {
+ ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 0, metadata !"typeid")
+ %p = extractvalue {i8*, i1} %pair, 1
+ ; CHECK: [[TT:%[^ ]*]] = call i1 @llvm.type.test(i8* [[VT:%[^,]*]], metadata !"typeid")
+ ; CHECK: br i1 [[TT]],
+ br i1 %p, label %cont, label %trap
+
+cont:
+ ; CHECK: [[GEP:%[^ ]*]] = getelementptr i8, i8* [[VT]], i32 0
+ ; CHECK: [[BC:%[^ ]*]] = bitcast i8* [[GEP]] to i8**
+ ; CHECK: [[LOAD:%[^ ]*]] = load i8*, i8** [[BC]]
+ ; CHECK: [[FPC:%[^ ]*]] = bitcast i8* [[LOAD]] to void (i8*)*
+ ; CHECK: call void [[FPC]]
+ %fptr = extractvalue {i8*, i1} %pair, 0
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ call void %fptr_casted(i8* %obj)
+ ret void
+
+trap:
+ call void @llvm.trap()
+ unreachable
+}
+
+; CHECK: define { i8*, i1 } @ret
+define {i8*, i1} @ret(i8* %vtablei8) {
+ ; CHECK: [[GEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2:%[^,]*]], i32 1
+ ; CHECK: [[BC2:%[^ ]*]] = bitcast i8* [[GEP2]] to i8**
+ ; CHECK: [[LOAD2:%[^ ]*]] = load i8*, i8** [[BC2]]
+ ; CHECK: [[TT2:%[^ ]*]] = call i1 @llvm.type.test(i8* [[VT2]], metadata !"typeid")
+ ; CHECK: [[I1:%[^ ]*]] = insertvalue { i8*, i1 } undef, i8* [[LOAD2]], 0
+ ; CHECK: [[I2:%[^ ]*]] = insertvalue { i8*, i1 } %5, i1 [[TT2]], 1
+ %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 1, metadata !"typeid")
+ ; CHECK: ret { i8*, i1 } [[I2]]
+ ret {i8*, i1} %pair
+}
+
+declare {i8*, i1} @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.trap()
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/non-array-vtable.ll b/test/Transforms/WholeProgramDevirt/non-array-vtable.ll
new file mode 100644
index 000000000000..e9c2db79fcbb
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/non-array-vtable.ll
@@ -0,0 +1,30 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt = constant i8* bitcast (void (i8*)* @vf to i8*), !type !0
+
+define void @vf(i8* %this) {
+ ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ ; CHECK: call void %
+ call void %fptr_casted(i8* %obj)
+ ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll b/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll
new file mode 100644
index 000000000000..ecc8ad0e7c73
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/non-constant-vtable.ll
@@ -0,0 +1,32 @@
+; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
+
+; CHECK-NOT: devirtualized call
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt = global [1 x i8*] [i8* bitcast (void (i8*)* @vf to i8*)], !type !0
+
+define void @vf(i8* %this) {
+ ret void
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to void (i8*)*
+ ; CHECK: call void %
+ call void %fptr_casted(i8* %obj)
+ ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll b/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll
new file mode 100644
index 000000000000..8fea9bc7b240
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/uniform-retval-invoke.ll
@@ -0,0 +1,43 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf1 to i8*)], !type !0
+@vt2 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf2 to i8*)], !type !0
+
+define i32 @vf1(i8* %this) readnone {
+ ret i32 123
+}
+
+define i32 @vf2(i8* %this) readnone {
+ ret i32 123
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) personality i8* undef {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+ ; CHECK: br label %[[RET:[0-9A-Za-z]*]]
+ %result = invoke i32 %fptr_casted(i8* %obj) to label %ret unwind label %unwind
+
+unwind:
+ %x = landingpad i32 cleanup
+ unreachable
+
+ret:
+ ; CHECK: [[RET]]:
+ ; CHECK-NEXT: ret i32 123
+ ret i32 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/uniform-retval.ll b/test/Transforms/WholeProgramDevirt/uniform-retval.ll
new file mode 100644
index 000000000000..ef3a7e49b52c
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/uniform-retval.ll
@@ -0,0 +1,36 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf1 to i8*)], !type !0
+@vt2 = constant [1 x i8*] [i8* bitcast (i32 (i8*)* @vf2 to i8*)], !type !0
+
+define i32 @vf1(i8* %this) readnone {
+ ret i32 123
+}
+
+define i32 @vf2(i8* %this) readnone {
+ ret i32 123
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+ %result = call i32 %fptr_casted(i8* %obj)
+ ; CHECK-NOT: call
+ ; CHECK: ret i32 123
+ ret i32 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/unique-retval.ll b/test/Transforms/WholeProgramDevirt/unique-retval.ll
new file mode 100644
index 000000000000..50b938c43e4a
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/unique-retval.ll
@@ -0,0 +1,56 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf0 to i8*)], !type !0
+@vt2 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf0 to i8*)], !type !0, !type !1
+@vt3 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf1 to i8*)], !type !0, !type !1
+@vt4 = constant [1 x i8*] [i8* bitcast (i1 (i8*)* @vf1 to i8*)], !type !1
+
+define i1 @vf0(i8* %this) readnone {
+ ret i1 0
+}
+
+define i1 @vf1(i8* %this) readnone {
+ ret i1 1
+}
+
+; CHECK: define i1 @call1
+define i1 @call1(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ ; CHECK: [[VT1:%[^ ]*]] = bitcast [1 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid1")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[RES1:%[^ ]*]] = icmp eq i8* [[VT1]], bitcast ([1 x i8*]* @vt3 to i8*)
+ %result = call i1 %fptr_casted(i8* %obj)
+ ; CHECK: ret i1 [[RES1]]
+ ret i1 %result
+}
+
+; CHECK: define i1 @call2
+define i1 @call2(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ ; CHECK: [[VT2:%[^ ]*]] = bitcast [1 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid2")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[RES1:%[^ ]*]] = icmp ne i8* [[VT1]], bitcast ([1 x i8*]* @vt2 to i8*)
+ %result = call i1 %fptr_casted(i8* %obj)
+ ret i1 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid1"}
+!1 = !{i32 0, !"typeid2"}
diff --git a/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll b/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll
new file mode 100644
index 000000000000..b5d51f2d4637
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/vcp-accesses-memory.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1 to i8*)], !type !0
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2 to i8*)], !type !0
+
+define i32 @vf1(i8* %this, i32 %arg) {
+ ret i32 %arg
+}
+
+define i32 @vf2(i8* %this, i32 %arg) {
+ ret i32 %arg
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)*
+ ; CHECK: call i32 %
+ %result = call i32 %fptr_casted(i8* %obj, i32 1)
+ ret i32 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/vcp-no-this.ll b/test/Transforms/WholeProgramDevirt/vcp-no-this.ll
new file mode 100644
index 000000000000..c564665471cf
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/vcp-no-this.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 ()* @vf1 to i8*)], !type !0
+@vt2 = global [1 x i8*] [i8* bitcast (i32 ()* @vf2 to i8*)], !type !0
+
+define i32 @vf1() readnone {
+ ret i32 1
+}
+
+define i32 @vf2() readnone {
+ ret i32 2
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 ()*
+ ; CHECK: call i32 %
+ %result = call i32 %fptr_casted()
+ ret i32 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll b/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll
new file mode 100644
index 000000000000..197c923c3a1c
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/vcp-non-constant-arg.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1 to i8*)], !type !0
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2 to i8*)], !type !0
+
+define i32 @vf1(i8* %this, i32 %arg) readnone {
+ ret i32 %arg
+}
+
+define i32 @vf2(i8* %this, i32 %arg) readnone {
+ ret i32 %arg
+}
+
+; CHECK: define void @call
+define void @call(i8* %obj, i32 %arg) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*, i32)*
+ ; CHECK: call i32 %
+ %result = call i32 %fptr_casted(i8* %obj, i32 %arg)
+ ret void
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll b/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll
new file mode 100644
index 000000000000..93936d5e1d27
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/vcp-too-wide-ints.ll
@@ -0,0 +1,35 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i128 (i8*, i128)* @vf1 to i8*)], !type !0
+@vt2 = global [1 x i8*] [i8* bitcast (i128 (i8*, i128)* @vf2 to i8*)], !type !0
+
+define i128 @vf1(i8* %this, i128 %arg) readnone {
+ ret i128 %arg
+}
+
+define i128 @vf2(i8* %this, i128 %arg) readnone {
+ ret i128 %arg
+}
+
+; CHECK: define i128 @call
+define i128 @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i128 (i8*, i128)*
+ ; CHECK: call i128 %
+ %result = call i128 %fptr_casted(i8* %obj, i128 1)
+ ret i128 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll b/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll
new file mode 100644
index 000000000000..3124889a7070
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/vcp-type-mismatch.ll
@@ -0,0 +1,65 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf1 to i8*)], !type !0
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*, i32)* @vf2 to i8*)], !type !0
+
+define i32 @vf1(i8* %this, i32 %arg) readnone {
+ ret i32 %arg
+}
+
+define i32 @vf2(i8* %this, i32 %arg) readnone {
+ ret i32 %arg
+}
+
+; CHECK: define i32 @bad_arg_type
+define i32 @bad_arg_type(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*, i64)*
+ ; CHECK: call i32 %
+ %result = call i32 %fptr_casted(i8* %obj, i64 1)
+ ret i32 %result
+}
+
+; CHECK: define i32 @bad_arg_count
+define i32 @bad_arg_count(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*, i64, i64)*
+ ; CHECK: call i32 %
+ %result = call i32 %fptr_casted(i8* %obj, i64 1, i64 2)
+ ret i32 %result
+}
+
+; CHECK: define i64 @bad_return_type
+define i64 @bad_return_type(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i64 (i8*, i32)*
+ ; CHECK: call i64 %
+ %result = call i64 %fptr_casted(i8* %obj, i32 1)
+ ret i64 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll b/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll
new file mode 100644
index 000000000000..fc4dee37dba7
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/vcp-uses-this.ll
@@ -0,0 +1,37 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+@vt1 = global [1 x i8*] [i8* bitcast (i32 (i8*)* @vf1 to i8*)], !type !0
+@vt2 = global [1 x i8*] [i8* bitcast (i32 (i8*)* @vf2 to i8*)], !type !0
+
+define i32 @vf1(i8* %this) readnone {
+ %this_int = ptrtoint i8* %this to i32
+ ret i32 %this_int
+}
+
+define i32 @vf2(i8* %this) readnone {
+ %this_int = ptrtoint i8* %this to i32
+ ret i32 %this_int
+}
+
+; CHECK: define i32 @call
+define i32 @call(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [1 x i8*]**
+ %vtable = load [1 x i8*]*, [1 x i8*]** %vtableptr
+ %vtablei8 = bitcast [1 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [1 x i8*], [1 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+ ; CHECK: call i32 %
+ %result = call i32 %fptr_casted(i8* %obj)
+ ret i32 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll b/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll
new file mode 100644
index 000000000000..530fe8aa89d0
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/virtual-const-prop-begin.ll
@@ -0,0 +1,136 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\01\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]]
+@vt1 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf1i32 to i8*)
+], section "vt1sec", !type !0
+
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\02\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+@vt2 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf2i32 to i8*)
+], !type !0
+
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\03\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+@vt3 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf3i32 to i8*)
+], !type !0
+
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\04\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+@vt4 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf4i32 to i8*)
+], !type !0
+
+; CHECK: @vt5 = {{.*}}, !type [[T0:![0-9]+]]
+@vt5 = constant [3 x i8*] [
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*),
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*),
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*)
+], !type !0
+
+; CHECK: @vt1 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT1DATA]], i32 0, i32 1)
+; CHECK: @vt2 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT2DATA]], i32 0, i32 1)
+; CHECK: @vt3 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT3DATA]], i32 0, i32 1)
+; CHECK: @vt4 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT4DATA]], i32 0, i32 1)
+
+define i1 @vf0i1(i8* %this) readnone {
+ ret i1 0
+}
+
+define i1 @vf1i1(i8* %this) readnone {
+ ret i1 1
+}
+
+define i32 @vf1i32(i8* %this) readnone {
+ ret i32 1
+}
+
+define i32 @vf2i32(i8* %this) readnone {
+ ret i32 2
+}
+
+define i32 @vf3i32(i8* %this) readnone {
+ ret i32 3
+}
+
+define i32 @vf4i32(i8* %this) readnone {
+ ret i32 4
+}
+
+; CHECK: define i1 @call1(
+define i1 @call1(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT1:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i64 -5
+ ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]]
+ ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2
+ ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0
+ %result = call i1 %fptr_casted(i8* %obj)
+ ; CHECK: ret i1 [[VTCMP1]]
+ ret i1 %result
+}
+
+; CHECK: define i1 @call2(
+define i1 @call2(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT2:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 1
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i64 -5
+ ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]]
+ ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1
+ ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0
+ %result = call i1 %fptr_casted(i8* %obj)
+ ; CHECK: ret i1 [[VTCMP2]]
+ ret i1 %result
+}
+
+; CHECK: define i32 @call3(
+define i32 @call3(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT3:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 2
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+ ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i64 -4
+ ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32*
+ ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]]
+ %result = call i32 %fptr_casted(i8* %obj)
+ ; CHECK: ret i32 [[VTLOAD3]]
+ ret i32 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+declare void @__cxa_pure_virtual()
+
+; CHECK: [[T8]] = !{i32 8, !"typeid"}
+; CHECK: [[T0]] = !{i32 0, !"typeid"}
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll b/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
new file mode 100644
index 000000000000..d2eff163a840
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/virtual-const-prop-check.ll
@@ -0,0 +1,141 @@
+; RUN: opt -S -wholeprogramdevirt -pass-remarks=wholeprogramdevirt %s 2>&1 | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: remark: <unknown>:0:0: devirtualized call
+; CHECK: remark: <unknown>:0:0: devirtualized call
+; CHECK: remark: <unknown>:0:0: devirtualized call
+; CHECK-NOT: devirtualized call
+
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\01\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [0 x i8] zeroinitializer }, section "vt1sec", !type [[T8:![0-9]+]]
+@vt1 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf1i32 to i8*)
+], section "vt1sec", !type !0
+
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\02\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+@vt2 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf2i32 to i8*)
+], !type !0
+
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\01\03\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+@vt3 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf3i32 to i8*)
+], !type !0
+
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [8 x i8], [3 x i8*], [0 x i8] } { [8 x i8] c"\00\00\00\02\04\00\00\00", [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [0 x i8] zeroinitializer }, !type [[T8]]
+@vt4 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf4i32 to i8*)
+], !type !0
+
+; CHECK: @vt5 = {{.*}}, !type [[T0:![0-9]+]]
+@vt5 = constant [3 x i8*] [
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*),
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*),
+i8* bitcast (void ()* @__cxa_pure_virtual to i8*)
+], !type !0
+
+; CHECK: @vt1 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT1DATA]], i32 0, i32 1)
+; CHECK: @vt2 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT2DATA]], i32 0, i32 1)
+; CHECK: @vt3 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT3DATA]], i32 0, i32 1)
+; CHECK: @vt4 = alias [3 x i8*], getelementptr inbounds ({ [8 x i8], [3 x i8*], [0 x i8] }, { [8 x i8], [3 x i8*], [0 x i8] }* [[VT4DATA]], i32 0, i32 1)
+
+define i1 @vf0i1(i8* %this) readnone {
+ ret i1 0
+}
+
+define i1 @vf1i1(i8* %this) readnone {
+ ret i1 1
+}
+
+define i32 @vf1i32(i8* %this) readnone {
+ ret i32 1
+}
+
+define i32 @vf2i32(i8* %this) readnone {
+ ret i32 2
+}
+
+define i32 @vf3i32(i8* %this) readnone {
+ ret i32 3
+}
+
+define i32 @vf4i32(i8* %this) readnone {
+ ret i32 4
+}
+
+; CHECK: define i1 @call1(
+define i1 @call1(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT1:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 0, metadata !"typeid")
+ %fptr = extractvalue {i8*, i1} %pair, 0
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i64 -5
+ ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]]
+ ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2
+ ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0
+ %result = call i1 %fptr_casted(i8* %obj)
+ ; CHECK: [[AND1:%[^ ]*]] = and i1 [[VTCMP1]], true
+ %p = extractvalue {i8*, i1} %pair, 1
+ %and = and i1 %result, %p
+ ; CHECK: ret i1 [[AND1]]
+ ret i1 %and
+}
+
+; CHECK: define i1 @call2(
+define i1 @call2(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT2:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 8, metadata !"typeid")
+ %fptr = extractvalue {i8*, i1} %pair, 0
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i64 -5
+ ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]]
+ ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1
+ ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0
+ %result = call i1 %fptr_casted(i8* %obj)
+ ; CHECK: [[AND2:%[^ ]*]] = and i1 [[VTCMP2]], true
+ %p = extractvalue {i8*, i1} %pair, 1
+ %and = and i1 %result, %p
+ ; CHECK: ret i1 [[AND2]]
+ ret i1 %and
+}
+
+; CHECK: define i32 @call3(
+define i32 @call3(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT3:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %pair = call {i8*, i1} @llvm.type.checked.load(i8* %vtablei8, i32 16, metadata !"typeid")
+ %fptr = extractvalue {i8*, i1} %pair, 0
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+ ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i64 -4
+ ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32*
+ ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]]
+ %result = call i32 %fptr_casted(i8* %obj)
+ ; CHECK: ret i32 [[VTLOAD3]]
+ ret i32 %result
+}
+
+declare {i8*, i1} @llvm.type.checked.load(i8*, i32, metadata)
+declare void @llvm.assume(i1)
+declare void @__cxa_pure_virtual()
+
+; CHECK: [[T8]] = !{i32 8, !"typeid"}
+; CHECK: [[T0]] = !{i32 0, !"typeid"}
+
+!0 = !{i32 0, !"typeid"}
diff --git a/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll b/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll
new file mode 100644
index 000000000000..75ec6ba95ef1
--- /dev/null
+++ b/test/Transforms/WholeProgramDevirt/virtual-const-prop-end.ll
@@ -0,0 +1,131 @@
+; RUN: opt -S -wholeprogramdevirt %s | FileCheck %s
+
+target datalayout = "e-p:64:64"
+target triple = "x86_64-unknown-linux-gnu"
+
+; CHECK: [[VT1DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf1i32 to i8*)], [8 x i8] c"\01\00\00\00\01\00\00\00" }, !type [[T8:![0-9]+]]
+@vt1 = constant [4 x i8*] [
+i8* null,
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf1i32 to i8*)
+], !type !1
+
+; CHECK: [[VT2DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf2i32 to i8*)], [8 x i8] c"\02\00\00\00\02\00\00\00" }, !type [[T0:![0-9]+]]
+@vt2 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf2i32 to i8*)
+], !type !0
+
+; CHECK: [[VT3DATA:@[^ ]*]] = private constant { [0 x i8], [4 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [4 x i8*] [i8* null, i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i32 (i8*)* @vf3i32 to i8*)], [8 x i8] c"\03\00\00\00\01\00\00\00" }, !type [[T8]]
+@vt3 = constant [4 x i8*] [
+i8* null,
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf3i32 to i8*)
+], !type !1
+
+; CHECK: [[VT4DATA:@[^ ]*]] = private constant { [0 x i8], [3 x i8*], [8 x i8] } { [0 x i8] zeroinitializer, [3 x i8*] [i8* bitcast (i1 (i8*)* @vf1i1 to i8*), i8* bitcast (i1 (i8*)* @vf0i1 to i8*), i8* bitcast (i32 (i8*)* @vf4i32 to i8*)], [8 x i8] c"\04\00\00\00\02\00\00\00" }, !type [[T0]]
+@vt4 = constant [3 x i8*] [
+i8* bitcast (i1 (i8*)* @vf1i1 to i8*),
+i8* bitcast (i1 (i8*)* @vf0i1 to i8*),
+i8* bitcast (i32 (i8*)* @vf4i32 to i8*)
+], !type !0
+
+; CHECK: @vt1 = alias [4 x i8*], getelementptr inbounds ({ [0 x i8], [4 x i8*], [8 x i8] }, { [0 x i8], [4 x i8*], [8 x i8] }* [[VT1DATA]], i32 0, i32 1)
+; CHECK: @vt2 = alias [3 x i8*], getelementptr inbounds ({ [0 x i8], [3 x i8*], [8 x i8] }, { [0 x i8], [3 x i8*], [8 x i8] }* [[VT2DATA]], i32 0, i32 1)
+; CHECK: @vt3 = alias [4 x i8*], getelementptr inbounds ({ [0 x i8], [4 x i8*], [8 x i8] }, { [0 x i8], [4 x i8*], [8 x i8] }* [[VT3DATA]], i32 0, i32 1)
+; CHECK: @vt4 = alias [3 x i8*], getelementptr inbounds ({ [0 x i8], [3 x i8*], [8 x i8] }, { [0 x i8], [3 x i8*], [8 x i8] }* [[VT4DATA]], i32 0, i32 1)
+
+define i1 @vf0i1(i8* %this) readnone {
+ ret i1 0
+}
+
+define i1 @vf1i1(i8* %this) readnone {
+ ret i1 1
+}
+
+define i32 @vf1i32(i8* %this) readnone {
+ ret i32 1
+}
+
+define i32 @vf2i32(i8* %this) readnone {
+ ret i32 2
+}
+
+define i32 @vf3i32(i8* %this) readnone {
+ ret i32 3
+}
+
+define i32 @vf4i32(i8* %this) readnone {
+ ret i32 4
+}
+
+; CHECK: define i1 @call1(
+define i1 @call1(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT1:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 0
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[VTGEP1:%[^ ]*]] = getelementptr i8, i8* [[VT1]], i64 28
+ ; CHECK: [[VTLOAD1:%[^ ]*]] = load i8, i8* [[VTGEP1]]
+ ; CHECK: [[VTAND1:%[^ ]*]] = and i8 [[VTLOAD1]], 2
+ ; CHECK: [[VTCMP1:%[^ ]*]] = icmp ne i8 [[VTAND1]], 0
+ %result = call i1 %fptr_casted(i8* %obj)
+ ; CHECK: ret i1 [[VTCMP1]]
+ ret i1 %result
+}
+
+; CHECK: define i1 @call2(
+define i1 @call2(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT2:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 1
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i1 (i8*)*
+ ; CHECK: [[VTGEP2:%[^ ]*]] = getelementptr i8, i8* [[VT2]], i64 28
+ ; CHECK: [[VTLOAD2:%[^ ]*]] = load i8, i8* [[VTGEP2]]
+ ; CHECK: [[VTAND2:%[^ ]*]] = and i8 [[VTLOAD2]], 1
+ ; CHECK: [[VTCMP2:%[^ ]*]] = icmp ne i8 [[VTAND2]], 0
+ %result = call i1 %fptr_casted(i8* %obj)
+ ; CHECK: ret i1 [[VTCMP2]]
+ ret i1 %result
+}
+
+; CHECK: define i32 @call3(
+define i32 @call3(i8* %obj) {
+ %vtableptr = bitcast i8* %obj to [3 x i8*]**
+ %vtable = load [3 x i8*]*, [3 x i8*]** %vtableptr
+ ; CHECK: [[VT3:%[^ ]*]] = bitcast [3 x i8*]* {{.*}} to i8*
+ %vtablei8 = bitcast [3 x i8*]* %vtable to i8*
+ %p = call i1 @llvm.type.test(i8* %vtablei8, metadata !"typeid")
+ call void @llvm.assume(i1 %p)
+ %fptrptr = getelementptr [3 x i8*], [3 x i8*]* %vtable, i32 0, i32 2
+ %fptr = load i8*, i8** %fptrptr
+ %fptr_casted = bitcast i8* %fptr to i32 (i8*)*
+ ; CHECK: [[VTGEP3:%[^ ]*]] = getelementptr i8, i8* [[VT3]], i64 24
+ ; CHECK: [[VTBC3:%[^ ]*]] = bitcast i8* [[VTGEP3]] to i32*
+ ; CHECK: [[VTLOAD3:%[^ ]*]] = load i32, i32* [[VTBC3]]
+ %result = call i32 %fptr_casted(i8* %obj)
+ ; CHECK: ret i32 [[VTLOAD3]]
+ ret i32 %result
+}
+
+declare i1 @llvm.type.test(i8*, metadata)
+declare void @llvm.assume(i1)
+
+; CHECK: [[T8]] = !{i32 8, !"typeid"}
+; CHECK: [[T0]] = !{i32 0, !"typeid"}
+
+!0 = !{i32 0, !"typeid"}
+!1 = !{i32 8, !"typeid"}